#!/usr/bin/env python # coding: utf-8 # In[4]: import sys from statistics import median from statistics import stdev from scipy.stats import kurtosis,skew import math import numpy as np import os import tensorflow as tf from tensorflow import keras import numpy as np import pandas as pd import csv from sklearn import svm from random import randint from sklearn.model_selection import train_test_split from sklearn.metrics import roc_auc_score from sklearn.metrics import roc_curve from sklearn.metrics import precision_recall_curve from sklearn.metrics import f1_score from sklearn.metrics import auc from sklearn import datasets from joblib import dump, load # In[42]: def feature(FOLDER, label): FALL_SIZE = 1200 df_list = [] sum_df = pd.DataFrame() #sum_df = df_.fillna(0) # with 0s rather than NaNs PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/Annotated Data/' OUTPUT_PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/train_data_trainsform_lstm/' #FOLDER = 'CSI' FILE_PATH = PATH + FOLDER OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv' count = 0 final = [] for file in os.listdir(FILE_PATH): #print(file) df = pd.read_csv(os.path.join(FILE_PATH,file)) df = df[(df['label'] == FOLDER).idxmax():] df = df.reset_index(drop=True) #print(df.head()) #print(df.count()) # if not df.empty: # df_list.append(df) #print(df.dtypes) print(file) df["acc_x"]= df["acc_x"].astype('float64') df["acc_y"]= df["acc_y"].astype('float64') df["acc_z"]= df["acc_z"].astype('float64') #print(df.dtypes) df['mag'] = df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z'] #mag = math.sqrt(df['acc_x']*df['acc_x'] + df['acc_y']*df['acc_y'] + df['acc_z']*df['acc_z']) #print(df.head()) OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '/' + file OUTPUT_FOLDER_PATH = OUTPUT_PATH + FOLDER if not os.path.exists(OUTPUT_FOLDER_PATH): os.makedirs(OUTPUT_FOLDER_PATH) #if(os.path.isdir(OUTPUT_FOLDER_PATH)): #else: # os.mkdir(OUTPUT_FOLDER_PATH) exists = os.path.isfile(OUTPUT_FILE_PATH) if(exists): print(OUTPUT_FILE_PATH + " exist , skip...") else: df.to_csv(OUTPUT_FILE_PATH,index=False) #X = [float(df[k][2]) for k in range(1,1+50)] X = [] Y = [] Z = [] MAG = [] ymag = [] #X_list = df["acc_x"].tolist() df_count = df.shape[0] print(df_count) if(df_count<FALL_SIZE): FALL_SIZE = df_count for i in range(0,FALL_SIZE): #label = data.iloc[i, 0] X.append(df.iloc[i, 2]) Y.append(df.iloc[i, 3]) Z.append(df.iloc[i, 4]) MAG.append(df.iloc[i, 12]) ymag.append(float(Y[i])/float(math.sqrt(MAG[i]))) #for fast test TA = [math.asin(ymag[k]) for k in range(0,FALL_SIZE)] avgX = sum(X)/len(X) avgY = sum(Y)/len(Y) avgZ = sum(Z)/len(Z) medianX = median(X) medianY = median(Y) medianZ = median(Z) stdX = stdev(X) stdY = stdev(Y) stdZ = stdev(Z) skewX = skew(X) skewY = skew(Y) skewZ = skew(Z) kurtosisX = kurtosis(X) kurtosisY = kurtosis(Y) kurtosisZ = kurtosis(Z) minX = min(X) minY = min(Y) minZ = min(Z) maxX = max(X) maxY = max(Y) maxZ = max(Z) slope = math.sqrt((maxX - minX)**2 + (maxY - minY)**2 + (maxZ - minZ)**2) meanTA = sum(TA)/len(TA) stdTA = stdev(TA) skewTA = skew(TA) kurtosisTA = kurtosis(TA) absX = sum([abs(X[k] - avgX) for k in range(0,FALL_SIZE) ]) / len(X) absY = sum([abs(Y[k] - avgY) for k in range(0,FALL_SIZE) ]) / len(Y) absZ = sum([abs(Z[k] - avgZ) for k in range(0,FALL_SIZE) ]) / len(Z) abs_meanX = sum([abs(X[k]) for k in range(0,FALL_SIZE)])/len(X) abs_meanY = sum([abs(Y[k]) for k in range(0,FALL_SIZE)])/len(Y) abs_meanZ = sum([abs(Z[k]) for k in range(0,FALL_SIZE)])/len(Z) abs_medianX = median([abs(X[k]) for k in range(0,FALL_SIZE)]) abs_medianY = median([abs(Y[k]) for k in range(0,FALL_SIZE)]) abs_medianZ = median([abs(Z[k]) for k in range(0,FALL_SIZE)]) abs_stdX = stdev([abs(X[k]) for k in range(0,FALL_SIZE)]) abs_stdY = stdev([abs(Y[k]) for k in range(0,FALL_SIZE)]) abs_stdZ = stdev([abs(Z[k]) for k in range(0,FALL_SIZE)]) abs_skewX = skew([abs(X[k]) for k in range(0,FALL_SIZE)]) abs_skewY = skew([abs(Y[k]) for k in range(0,FALL_SIZE)]) abs_skewZ = skew([abs(Z[k]) for k in range(0,FALL_SIZE)]) abs_kurtosisX = kurtosis([abs(X[k]) for k in range(0,FALL_SIZE)]) abs_kurtosisY = kurtosis([abs(Y[k]) for k in range(0,FALL_SIZE)]) abs_kurtosisZ = kurtosis([abs(Z[k]) for k in range(0,FALL_SIZE)]) abs_minX = min([abs(X[k]) for k in range(0,FALL_SIZE)]) abs_minY = min([abs(Y[k]) for k in range(0,FALL_SIZE)]) abs_minZ = min([abs(Z[k]) for k in range(0,FALL_SIZE)]) abs_maxX = max([abs(X[k]) for k in range(0,FALL_SIZE)]) abs_maxY = max([abs(Y[k]) for k in range(0,FALL_SIZE)]) abs_maxZ = max([abs(Z[k]) for k in range(0,FALL_SIZE)]) abs_slope = math.sqrt((abs_maxX - abs_minX)**2 + (abs_maxY - abs_minY)**2 + (abs_maxZ - abs_minZ)**2) meanMag = sum(MAG)/len(MAG) stdMag = stdev(MAG) minMag = min(MAG) maxMag = max(MAG) DiffMinMaxMag = maxMag - minMag ZCR_Mag = 0 AvgResAcc = (1/len(MAG))*sum(MAG) #label = 0 #print(minX) test = [avgX,avgY,avgZ,medianX,medianY,medianZ,stdX,stdY,stdZ,skewX,skewY,skewZ,kurtosisX,kurtosisY,kurtosisZ, minX,minY,minZ,maxX,maxY,maxZ,slope,meanTA,stdTA,skewTA,kurtosisTA,absX, absY,absZ,abs_meanX,abs_meanY,abs_meanZ,abs_medianX,abs_medianY,abs_medianZ, abs_stdX,abs_stdY,abs_stdZ,abs_skewX,abs_skewY,abs_skewZ,abs_kurtosisX, abs_kurtosisY,abs_kurtosisZ,abs_minX,abs_minY,abs_minZ,abs_maxX,abs_maxY ,abs_maxZ,abs_slope,meanMag,stdMag,minMag,maxMag,DiffMinMaxMag,ZCR_Mag,AvgResAcc,label] final.append(test) #count = count +1 #if(count > 1): # break return final # In[59]: OUTPUT_PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/featured/' FOLDER = 'WAL' label = 0 OUTPUT_FILE_PATH = OUTPUT_PATH + FOLDER + '.csv' if(os.path.isfile(OUTPUT_FILE_PATH)): os.remove(OUTPUT_FILE_PATH) with open(OUTPUT_FILE_PATH,'a') as f1: writer=csv.writer(f1, delimiter=',',lineterminator='\n',) writer.writerow(['AvgX','AvgY','AvgZ','MedianX','MedianY','MedianZ','StdX', 'StdY','StdZ','SkewX','SkewY','SkewZ','KurtosisX','KurtosisY','KurtosisZ','MinX','MinY', 'MinZ','MaxX','MaxY','MaxZ','Slope','MeanTA','StdTA','SkewTA','KurtosisTA', 'AbsX','AbsY','AbsZ','AbsMeanX','AbsMeanY','AbsMeanZ','AbsMedianX','AbsMedianY','AbsMedianZ', 'AbsStdX','AbsStdY','AbsStdZ','AbsSkewX','AbsSkewY','AbsSkewZ', 'AbsKurtosisX','AbsKurtosisY','AbsKurtosisZ','AbsMinX','AbsMinY','AbsMinZ', 'AbsMaxX','AbsMaxY','AbsMaxZ','AbsSlope','MeanMag', 'StdMag','MinMag','MaxMag','DiffMinMaxMag','ZCR_Mag','AverageResultantAcceleration','label']) lala = feature(FOLDER, label) data_len = len(lala) for p in range(0,data_len): writer.writerow(lala[p]) print("total ", data_len," records process done") # In[2]: def get_all_data(): PATH = '/home/helong/share/ML/MobiAct_Dataset_v2.0/featured/' fs = os.listdir(PATH) all_data = pd.DataFrame() count = 0 for f in fs: file_path = os.path.join(PATH, f) #print(file_path) if 'csv' in f: #data = pd.read_csv(file_path, index_col=False, nrows=200, low_memory=False) data = pd.read_csv(file_path, index_col=False, low_memory=False) #data.info() data = data.iloc[0:,0:59] #data.info() #data = data.fillna(method='ffill') #print(data.dtypes) #data = data.convert_objects(convert_numeric=True) #print(data.dtypes) #break all_data = all_data.append(data) #for fast test #break #count = count +1 #if(count > 5): # break count_row = all_data.shape[0] #print(count_row) count_row = all_data.shape[1] #print(count_row) np.random.shuffle(all_data.values) count_row = all_data.shape[1] #print(count_row) return all_data # In[16]: #train model all_data = get_all_data() #print(all_data.head()) _all_data_x = [] _all_data_y = [] count = all_data.shape[0] count1 = all_data.shape[1] for i in range(0,count): _all_data_x.append(all_data.iloc[i, 0:58]) _all_data_y.append(all_data.iloc[i, 58:59]) #print(_all_data_x[0]) #print(_all_data_y[0]) X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.2, random_state=42) clf = svm.SVC(gamma='scale') clf.fit(X_train, y_train) test_count = len(X_test) print(test_count) y_predict = clf.predict(X_test) score = roc_auc_score(y_test, y_predict) #print(y_predict) #print("actual result") #print(y_test) #for i in range(0, test_count): # print("actual vs predict", clf.predict(X_test[i]), ":", y_test[i]) print(score) precision, recall, thresholds = precision_recall_curve(y_test, y_predict) f1 = f1_score(y_test, y_predict) auc = auc(recall, precision) print("precision is ", precision, "recall is ", recall, "thresholds is " , thresholds) print("f1 is ", f1, "auc is ", auc) #print(y_predict) #print(y_test) print("done...") #Persistence the model dump(clf, 'fall_detect_svm.joblib') #this is how to load the model #clf_load = load('fall_detect_svm.joblib') # for i in range(15): # _test_x = []S # _test_y = [] # test_index = randint(0, count) # print(randint(0, 9)) # _test_x.append(all_data.iloc[test_index, 0:58]) # _test_y.append(all_data.iloc[test_index, 58:59]) # print("actual vs predict", clf.predict(_test_x), ":", _test_y[0]) # In[5]: #load the model to test all_data = get_all_data() #print(all_data.head()) _all_data_x = [] _all_data_y = [] count = all_data.shape[0] count1 = all_data.shape[1] for i in range(0,count): _all_data_x.append(all_data.iloc[i, 0:58]) _all_data_y.append(all_data.iloc[i, 58:59]) #print(_all_data_x[0]) #print(_all_data_y[0]) clf_load = load('fall_detect_svm.joblib') X_train, X_test, y_train, y_test = train_test_split(_all_data_x, _all_data_y, test_size=0.1, random_state=42) y_predict = clf_load.predict(X_train) score = roc_auc_score(y_train, y_predict) print(score) # In[ ]: