-
Notifications
You must be signed in to change notification settings - Fork 1
Expand file tree
/
Copy pathmodel_train.py
More file actions
71 lines (55 loc) · 2.73 KB
/
model_train.py
File metadata and controls
71 lines (55 loc) · 2.73 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
import pandas as pd
import pickle # Object serialization.
from sklearn.model_selection import train_test_split
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression, RidgeClassifier
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.metrics import accuracy_score # Accuracy metrics
def load_dataset(csv_data):
df = pd.read_csv(csv_data)
# print(f'Top5 datas: \n{df.head()}')
# print(f'Last5 datas: \n{df.tail()}')
# print(f'Specific class: \n', df[df['class']=='bridge']) # Show specific class data.
features = df.drop('class', axis=1) # Features, drop the colum 1 of 'class'.
target_value = df['class'] # target value.
x_train, x_test, y_train, y_test = train_test_split(features, target_value, test_size=0.3, random_state=1234)
return x_train, x_test, y_train, y_test
def evaluate_model(fit_models, x_test, y_test):
print('\nEvaluate model accuracy:')
# Evaluate and Serialize Model.
for key_algo, value_pipeline in fit_models.items():
yhat = value_pipeline.predict(x_test)
accuracy = accuracy_score(y_test, yhat)*100
print(f'Classify algorithm: {key_algo}, Accuracy: {accuracy}%')
if __name__ == '__main__':
dataset_csv_file = './dataset/coords_dataset.csv'
model_weights = './model_weights/weights_body_language.pkl'
x_train = load_dataset(csv_data=dataset_csv_file)[0]
y_train = load_dataset(csv_data=dataset_csv_file)[2]
x_test = load_dataset(csv_data=dataset_csv_file)[1]
y_test = load_dataset(csv_data=dataset_csv_file)[3]
pipelines = {
'lr' : make_pipeline(StandardScaler(), LogisticRegression()),
'rc' : make_pipeline(StandardScaler(), RidgeClassifier()),
'rf' : make_pipeline(StandardScaler(), RandomForestClassifier()),
'gb' : make_pipeline(StandardScaler(), GradientBoostingClassifier()),
}
# print('key:', pipelines.keys())
# print('value:', list(pipelines.values())[0]) # 0~3
fit_models = {}
print('Model is Training ....')
for key_algo, value_pipeline in pipelines.items():
model = value_pipeline.fit(x_train, y_train)
fit_models[key_algo] = model
print('Training done.')
# Using x_test data input to Ridge Classifier model to predict.
rc_predict = fit_models['rc'].predict(x_test)
print(f'\nPredict 5 datas: {rc_predict[0:5]}')
# Save model weights.
with open(model_weights, 'wb') as f:
# pickle.dump(obj, file, [,protocol=0])
# 將obj對象序列化存入已經打開的file中。
pickle.dump(fit_models['rf'], f)
print('\nSave model done.')
evaluate_model(fit_models, x_test, y_test)