import numpy as np import pandas as pd import matplotlib.pyplot as plt np.set_printoptions(formatter={'float_kind': '{:8f}'.format}) from DataReader import load_all_data """ Data Preprocessing """ # load the data using DataReader data = load_all_data() # In the following I use pandas to structure the data for processing # Split data regarding activities standing_df = data[data['event'] == 'STAND'] stairs_up_df = data[data['event'] == 'STAIRS_UP'] stairs_down_df = data[data['event'] == 'STAIRS_DOWN'] walk_normal_df = data[data['event'] == 'WALK_NORMAL'] elevator_up_df = data[data['event'] == 'ELEVATOR_UP'] elevator_down_df = data[data['event'] == 'ELEVATOR_DOWN'] # Get the biggest timestamp maxtime = data.iloc[-1].t # Split data in sliding windows INTERVAL_SIZE = 1000 # define start and end times for each interval and remove the first second # due to bad data during recording start = np.arange(1000, maxtime - INTERVAL_SIZE, INTERVAL_SIZE / 2) end = np.arange(1000 + INTERVAL_SIZE, maxtime, INTERVAL_SIZE / 2) # Pairs of start and end times of every interval start_end = np.vstack((start, end)).T def extract_intervals(activity_df, start_end): intervals = [] for ii in start_end: df = activity_df[(activity_df['t'] > ii[0]) & (activity_df['t'] <= ii[1])] # only include the dataframe if it contains values if df.size > 0: intervals.append(df) return intervals stand_intervals = extract_intervals(standing_df, start_end) stairs_up_intervals = extract_intervals(stairs_up_df, start_end) stairs_down_intervals = extract_intervals(stairs_down_df, start_end) walk_normal_intervals = extract_intervals(walk_normal_df, start_end) elevator_up_intervals = extract_intervals(elevator_up_df, start_end) elevator_down_intervals = extract_intervals(elevator_down_df, start_end) # Calculate features # from feature_calculator import calculate_features def calculate_features(intervals): feature_df = pd.DataFrame( columns=["ax_mean", "ay_mean", "az_mean", "bx_mean", "ax_var", "ay_var", "az_var", "bx_var", 'mag_mean', 'mag_var']) all_ax = np.ndarray(len(intervals)) for ind, interval in enumerate(intervals): ax = interval['ax'].values # Remove nan from the data ax = ax[~np.isnan(ax)] ay = interval['ay'].values ay = ay[~np.isnan(ay)] az = interval['az'].values az = az[~np.isnan(az)] bx = interval['bx'].values bx = bx[~np.isnan(bx)] # subtract gravity constant from magnitude magnitude = (interval['magnitude'].values) - 9.81 magnitude = magnitude[~np.isnan(magnitude)] if len(bx) == 0: continue #!!!!!!!BX mean is dependent on the location and the day!!!!!Do not use as feature df = pd.DataFrame({'ax_mean': [np.mean(ax)], 'ay_mean': [np.mean(ay)], 'az_mean': [np.mean(az)], 'ax_var': [np.var(ax)], 'ay_var': [np.var(ay)], 'az_var': [np.var(az)], 'bx_var': [np.var(bx)], 'bx_diff': [bx[0] - bx[-1]], 'mag_mean': [np.mean(magnitude)], 'mag_var': [np.var(magnitude)], 'mag_max': [np.max(magnitude)], 'mag_min': [np.min(magnitude)], 'mag_diff': [np.max(magnitude) - np.min(magnitude)] }) # skip dataframe if it contains nan if df.isnull().any().any(): continue feature_df = feature_df.append(df) return feature_df print("Calculating features...") features_stand = calculate_features(stand_intervals) features_stairs_up = calculate_features(stairs_up_intervals) features_stairs_down = calculate_features(stairs_down_intervals) features_walk_normal = calculate_features(walk_normal_intervals) features_elevator_up = calculate_features(elevator_up_intervals) features_elevator_down = calculate_features(elevator_down_intervals) # Manually select some features features = ["mag_var", "bx_diff"] features_stand = features_stand[features] features_stairs_up = features_stairs_up[features] features_stairs_down = features_stairs_down[features] features_walk_normal = features_walk_normal[features] features_elevator_up = features_elevator_up[features] features_elevator_down = features_elevator_down[features] # Create labels label_stand = np.full((len(features_stand), ), 1, dtype=np.int) label_stairs_up = np.full((len(features_stairs_up), ), 2, dtype=np.int) label_stairs_down = np.full((len(features_stairs_down), ), 3, dtype=np.int) label_walk_normal = np.full((len(features_walk_normal), ), 4, dtype=np.int) label_elevator_up = np.full((len(features_elevator_up), ), 5, dtype=np.int) label_elevator_down = np.full((len(features_elevator_down), ), 6, dtype=np.int) # Gather all feature vectors and labels in one array features = np.vstack((features_stand, features_stairs_up, features_stairs_down, features_walk_normal, features_elevator_up, features_elevator_down)) # Scale features from sklearn import preprocessing features_scaled = preprocessing.scale(features) labels = np.hstack((label_stand, label_stairs_up, label_stairs_down, label_walk_normal, label_elevator_up, label_elevator_down)) # Use PCA for dimesionality reduction (if necessary) from sklearn.decomposition import PCA pca = PCA(n_components=2) features_trans = pca.fit(features_scaled).transform(features_scaled) # Do classification # Split in training and testing dataset from sklearn import cross_validation X_train, X_test, y_train, y_test = cross_validation.train_test_split( features, labels, test_size=0.5) # Train classifier from sklearn.neighbors import KNeighborsClassifier from sklearn.naive_bayes import GaussianNB from sklearn.svm import SVC from sklearn import tree # clf = KNeighborsClassifier(n_neighbors=3) # clf.fit(X_train, y_train) clf = GaussianNB() clf.fit(X_train, y_train) # clf = SVC() # clf.fit(X_train, y_train) # clf = tree.DecisionTreeClassifier() # clf.fit(X_train, y_train) predicts = clf.predict(X_test) # Some metrics from sklearn.metrics import accuracy_score, classification_report, f1_score, confusion_matrix print("Accuracy score: ", accuracy_score(y_test, predicts)) print("F1 score: ", f1_score(y_test, predicts, average=None)) print("Classification report: \n", classification_report(y_test, predicts)) print("Confusion matrix: \n", confusion_matrix(y_test, predicts)) # Plot some data # wähle Merkmal # für jede Klasse plotte ein histogram: # # f = features_scaled[:, 2] # feature 0 # target_names = ["standing", "stairs_up", "stairs_down", # "walk", "elevator_up", "elevator_down"] # colors = ['b', 'g', 'r', 'c', 'm', 'y'] # for label, color, target in zip(range(1, 7), colors, target_names): # #sns.kdeplot(f[labels == label]) # plt.scatter(features_scaled[labels == label, 1], features_scaled[labels == label, 2], # label=target, c=color, s=30) # plt.legend() # fig, ax = plt.subplots(nrows=2, ncols=1) # ax[0].scatter(features[labels == 1, 0], features[ # labels == 1, 1], s=40, c="green", label="standing") # ax[0].scatter(features[labels == 2, 0], features[ # labels == 2, 1], s=40, c="blue", label="stairs_up") # ax[0].scatter(features[labels == 3, 0], features[ # labels == 3, 1], s=40, c="black", label="stairs_down") # ax[0].scatter(features[labels == 4, 0], features[ # labels == 4, 1], s=40, c="yellow", label="walk_normal") # ax[0].legend() # ax[0].set_xlabel("Mag Mean") # ax[0].set_ylabel("Mag_Var") # ax[1].scatter(features_trans[labels == 1, 0], features_trans[ # labels == 1, 1], s=40, c="green", label="standing") # ax[1].scatter(features_trans[labels == 2, 0], features_trans[ # labels == 2, 1], s=40, c="blue", label="stairs_up") # ax[1].scatter(features_trans[labels == 3, 0], features_trans[ # labels == 3, 1], s=40, c="black", label="stairs_down") # ax[1].scatter(features_trans[labels == 4, 0], features_trans[ # labels == 4, 1], s=40, c="yellow", label="walk_normal") # ax[1].legend() # ax[1].set_xlabel("First principial component") # ax[1].set_ylabel("Second principal component")