Add Activity Recognizer for Python and own gitignore

This commit is contained in:
Lukas Koeping
2016-04-05 08:44:29 +02:00
parent bb9364e9a4
commit e9fc33c2dc
39 changed files with 757006 additions and 0 deletions

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,13 @@
1460;UNDEFINED
7609;ELEVATOR_UP
30264;UNDEFINED
42412;ELEVATOR_DOWN
63334;UNDEFINED
75247;ELEVATOR_UP
95222;UNDEFINED
108569;ELEVATOR_DOWN
129666;UNDEFINED
141531;ELEVATOR_UP
161218;UNDEFINED
174578;ELEVATOR_DOWN
195029;UNDEFINED

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1 @@
3584;STAND

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,15 @@
2348;WALK_NORMAL
8941;UNDEFINED
11954;WALK_NORMAL
21541;UNDEFINED
24198;WALK_NORMAL
33469;UNDEFINED
36233;WALK_NORMAL
47991;UNDEFINED
50356;WALK_NORMAL
63256;UNDEFINED
67202;WALK_NORMAL
75750;UNDEFINED
78141;WALK_NORMAL
88637;UNDEFINED
91053;WALK_NORMAL

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,61 @@
576;UNDEFINED
11757;STAIRS_UP
15990;UNDEFINED
19433;STAIRS_UP
23657;UNDEFINED
26566;STAIRS_UP
30932;UNDEFINED
34880;STAIRS_DOWN
39065;UNDEFINED
42106;STAIRS_DOWN
46247;UNDEFINED
49422;STAIRS_DOWN
53230;UNDEFINED
58138;STAIRS_UP
61920;UNDEFINED
64520;STAIRS_UP
68571;UNDEFINED
71272;STAIRS_UP
75604;UNDEFINED
79527;STAIRS_DOWN
83243;UNDEFINED
85661;STAIRS_DOWN
89751;UNDEFINED
92920;STAIRS_DOWN
96766;UNDEFINED
100924;STAIRS_UP
104880;UNDEFINED
107871;STAIRS_UP
111861;UNDEFINED
114536;STAIRS_UP
118601;UNDEFINED
122589;STAIRS_DOWN
126208;UNDEFINED
129174;STAIRS_DOWN
133249;UNDEFINED
136649;STAIRS_DOWN
140090;UNDEFINED
143964;STAIRS_UP
147780;UNDEFINED
150738;STAIRS_UP
154819;UNDEFINED
157560;STAIRS_UP
161867;UNDEFINED
165723;STAIRS_DOWN
169269;UNDEFINED
172363;STAIRS_DOWN
176313;UNDEFINED
179102;STAIRS_DOWN
182750;UNDEFINED
187214;STAIRS_UP
190970;UNDEFINED
194485;STAIRS_UP
198564;UNDEFINED
201720;STAIRS_UP
205693;UNDEFINED
209273;STAIRS_DOWN
212737;UNDEFINED
216170;STAIRS_DOWN
219842;UNDEFINED
223507;STAIRS_DOWN
227271;UNDEFINED

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -0,0 +1,63 @@
import numpy as np
import pandas as pd
data_paths = ["Data/WalkingStairs", "Data/Standing",
"Data/Walking", "Data/Fahrstuhl"]
def read_data_files(path):
accelerometer = pd.read_csv(
path + "/Accelerometer.csv", sep=";", skiprows=1, header=0, names=["t", "ax", "ay", "az"])
barometer = pd.read_csv(path + "/Barometer.csv", sep=";",
skiprows=1, header=0, names=["t", "bx", "by", "bz"])
events = pd.read_csv(path + "/Events.txt", sep=";",
skiprows=0, header=None, names=["t", "event"])
data = pd.merge(accelerometer, barometer, on="t", how="outer")
data = pd.merge(data, events, on="t", how="outer")
data = data.sort_values('t')
# Fill all nan values in event with the last known event
data['event'] = data['event'].fillna(method='pad')
# Add additional magnitude column
data['magnitude'] = np.sqrt(data['ax']**2 + data['ay']**2 + data['az']**2)
return data
def increment_timestamps(data_frames):
"""
data_frames: list
"""
max_t = 0
for frame in data_frames:
frame['t'] += max_t
max_t = frame['t'].iloc[-1]
return data_frames
def merge_data_frames(data_frames):
df = pd.DataFrame()
for frame in data_frames:
df = df.append(frame, ignore_index=True)
return df
def load_all_data():
print("Loading data files...")
d = []
for path in data_paths:
data = read_data_files(path)
d.append(data)
d = increment_timestamps(d)
data = merge_data_frames(d)
return data

View File

@@ -0,0 +1,242 @@
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
np.set_printoptions(formatter={'float_kind': '{:8f}'.format})
from DataReader import load_all_data
"""
Data Preprocessing
"""
# load the data using DataReader
data = load_all_data()
# In the following I use pandas to structure the data for processing
# Split data regarding activities
standing_df = data[data['event'] == 'STAND']
stairs_up_df = data[data['event'] == 'STAIRS_UP']
stairs_down_df = data[data['event'] == 'STAIRS_DOWN']
walk_normal_df = data[data['event'] == 'WALK_NORMAL']
elevator_up_df = data[data['event'] == 'ELEVATOR_UP']
elevator_down_df = data[data['event'] == 'ELEVATOR_DOWN']
# Get the biggest timestamp
maxtime = data.iloc[-1].t
# Split data in sliding windows
INTERVAL_SIZE = 1000
# define start and end times for each interval and remove the first second
# due to bad data during recording
start = np.arange(1000, maxtime - INTERVAL_SIZE,
INTERVAL_SIZE / 2)
end = np.arange(1000 + INTERVAL_SIZE, maxtime, INTERVAL_SIZE / 2)
# Pairs of start and end times of every interval
start_end = np.vstack((start, end)).T
def extract_intervals(activity_df, start_end):
intervals = []
for ii in start_end:
df = activity_df[(activity_df['t'] > ii[0]) &
(activity_df['t'] <= ii[1])]
# only include the dataframe if it contains values
if df.size > 0:
intervals.append(df)
return intervals
stand_intervals = extract_intervals(standing_df, start_end)
stairs_up_intervals = extract_intervals(stairs_up_df, start_end)
stairs_down_intervals = extract_intervals(stairs_down_df, start_end)
walk_normal_intervals = extract_intervals(walk_normal_df, start_end)
elevator_up_intervals = extract_intervals(elevator_up_df, start_end)
elevator_down_intervals = extract_intervals(elevator_down_df, start_end)
# Calculate features
# from feature_calculator import calculate_features
def calculate_features(intervals):
feature_df = pd.DataFrame(
columns=["ax_mean", "ay_mean", "az_mean", "bx_mean",
"ax_var", "ay_var", "az_var", "bx_var",
'mag_mean', 'mag_var'])
all_ax = np.ndarray(len(intervals))
for ind, interval in enumerate(intervals):
ax = interval['ax'].values # Remove nan from the data
ax = ax[~np.isnan(ax)]
ay = interval['ay'].values
ay = ay[~np.isnan(ay)]
az = interval['az'].values
az = az[~np.isnan(az)]
bx = interval['bx'].values
bx = bx[~np.isnan(bx)]
# subtract gravity constant from magnitude
magnitude = (interval['magnitude'].values) - 9.81
magnitude = magnitude[~np.isnan(magnitude)]
if len(bx) == 0:
continue
#!!!!!!!BX mean is dependent on the location and the day!!!!!Do not use as feature
df = pd.DataFrame({'ax_mean': [np.mean(ax)],
'ay_mean': [np.mean(ay)],
'az_mean': [np.mean(az)],
'ax_var': [np.var(ax)],
'ay_var': [np.var(ay)],
'az_var': [np.var(az)],
'bx_var': [np.var(bx)],
'bx_diff': [bx[0] - bx[-1]],
'mag_mean': [np.mean(magnitude)],
'mag_var': [np.var(magnitude)],
'mag_max': [np.max(magnitude)],
'mag_min': [np.min(magnitude)],
'mag_diff': [np.max(magnitude) - np.min(magnitude)]
})
# skip dataframe if it contains nan
if df.isnull().any().any():
continue
feature_df = feature_df.append(df)
return feature_df
print("Calculating features...")
features_stand = calculate_features(stand_intervals)
features_stairs_up = calculate_features(stairs_up_intervals)
features_stairs_down = calculate_features(stairs_down_intervals)
features_walk_normal = calculate_features(walk_normal_intervals)
features_elevator_up = calculate_features(elevator_up_intervals)
features_elevator_down = calculate_features(elevator_down_intervals)
# Manually select some features
features = ["mag_var", "bx_diff"]
features_stand = features_stand[features]
features_stairs_up = features_stairs_up[features]
features_stairs_down = features_stairs_down[features]
features_walk_normal = features_walk_normal[features]
features_elevator_up = features_elevator_up[features]
features_elevator_down = features_elevator_down[features]
# Create labels
label_stand = np.full((len(features_stand), ), 1, dtype=np.int)
label_stairs_up = np.full((len(features_stairs_up), ), 2, dtype=np.int)
label_stairs_down = np.full((len(features_stairs_down), ), 3, dtype=np.int)
label_walk_normal = np.full((len(features_walk_normal), ), 4, dtype=np.int)
label_elevator_up = np.full((len(features_elevator_up), ), 5, dtype=np.int)
label_elevator_down = np.full((len(features_elevator_down), ), 6, dtype=np.int)
# Gather all feature vectors and labels in one array
features = np.vstack((features_stand, features_stairs_up,
features_stairs_down, features_walk_normal,
features_elevator_up, features_elevator_down))
# Scale features
from sklearn import preprocessing
features_scaled = preprocessing.scale(features)
labels = np.hstack((label_stand, label_stairs_up,
label_stairs_down, label_walk_normal,
label_elevator_up, label_elevator_down))
# Use PCA for dimesionality reduction (if necessary)
from sklearn.decomposition import PCA
pca = PCA(n_components=2)
features_trans = pca.fit(features_scaled).transform(features_scaled)
# Do classification
# Split in training and testing dataset
from sklearn import cross_validation
X_train, X_test, y_train, y_test = cross_validation.train_test_split(
features, labels, test_size=0.5)
# Train classifier
from sklearn.neighbors import KNeighborsClassifier
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC
from sklearn import tree
# clf = KNeighborsClassifier(n_neighbors=3)
# clf.fit(X_train, y_train)
clf = GaussianNB()
clf.fit(X_train, y_train)
# clf = SVC()
# clf.fit(X_train, y_train)
# clf = tree.DecisionTreeClassifier()
# clf.fit(X_train, y_train)
predicts = clf.predict(X_test)
# Some metrics
from sklearn.metrics import accuracy_score, classification_report, f1_score, confusion_matrix
print("Accuracy score: ", accuracy_score(y_test, predicts))
print("F1 score: ", f1_score(y_test, predicts, average=None))
print("Classification report: \n", classification_report(y_test, predicts))
print("Confusion matrix: \n", confusion_matrix(y_test, predicts))
# Plot some data
# wähle Merkmal
# für jede Klasse plotte ein histogram:
#
# f = features_scaled[:, 2] # feature 0
# target_names = ["standing", "stairs_up", "stairs_down",
# "walk", "elevator_up", "elevator_down"]
# colors = ['b', 'g', 'r', 'c', 'm', 'y']
# for label, color, target in zip(range(1, 7), colors, target_names):
# #sns.kdeplot(f[labels == label])
# plt.scatter(features_scaled[labels == label, 1], features_scaled[labels == label, 2],
# label=target, c=color, s=30)
# plt.legend()
# fig, ax = plt.subplots(nrows=2, ncols=1)
# ax[0].scatter(features[labels == 1, 0], features[
# labels == 1, 1], s=40, c="green", label="standing")
# ax[0].scatter(features[labels == 2, 0], features[
# labels == 2, 1], s=40, c="blue", label="stairs_up")
# ax[0].scatter(features[labels == 3, 0], features[
# labels == 3, 1], s=40, c="black", label="stairs_down")
# ax[0].scatter(features[labels == 4, 0], features[
# labels == 4, 1], s=40, c="yellow", label="walk_normal")
# ax[0].legend()
# ax[0].set_xlabel("Mag Mean")
# ax[0].set_ylabel("Mag_Var")
# ax[1].scatter(features_trans[labels == 1, 0], features_trans[
# labels == 1, 1], s=40, c="green", label="standing")
# ax[1].scatter(features_trans[labels == 2, 0], features_trans[
# labels == 2, 1], s=40, c="blue", label="stairs_up")
# ax[1].scatter(features_trans[labels == 3, 0], features_trans[
# labels == 3, 1], s=40, c="black", label="stairs_down")
# ax[1].scatter(features_trans[labels == 4, 0], features_trans[
# labels == 4, 1], s=40, c="yellow", label="walk_normal")
# ax[1].legend()
# ax[1].set_xlabel("First principial component")
# ax[1].set_ylabel("Second principal component")