Commit 0807a378 authored by 郭羽's avatar 郭羽

特征工程优化

parent ce66a0a8
...@@ -2,4 +2,4 @@ cd /srv/apps/tensorServing_models/ ...@@ -2,4 +2,4 @@ cd /srv/apps/tensorServing_models/
#文件打包 #文件打包
tar -czvf model_service.tar.gz service tar -czvf model_service.tar.gz service
#上传至coding #上传至coding
curl -T model_service.tar.gz -u guoyu@igengmei.com:Gengmei123! "https://gengmei-generic.pkg.coding.net/tob/mode/service" curl -T model_service.tar.gz -u guoyu@igengmei.com:Gengmei123! "https://gengmei-generic.pkg.coding.net/tob/mode/service"
\ No newline at end of file
...@@ -185,7 +185,7 @@ if __name__ == '__main__': ...@@ -185,7 +185,7 @@ if __name__ == '__main__':
print("读取数据...") print("读取数据...")
timestmp1 = int(round(time.time())) timestmp1 = int(round(time.time()))
df_train = loadData(data_path_train) df_train = loadData(data_path_train)
df_test = loadData(data_path_test) # df_test = loadData(data_path_test)
timestmp2 = int(round(time.time())) timestmp2 = int(round(time.time()))
print("读取数据耗时s:{}".format(timestmp2 - timestmp1)) print("读取数据耗时s:{}".format(timestmp2 - timestmp1))
...@@ -198,20 +198,20 @@ if __name__ == '__main__': ...@@ -198,20 +198,20 @@ if __name__ == '__main__':
print(datasColumns) print(datasColumns)
df_train = df_train[datasColumns + ["label"]] df_train = df_train[datasColumns + ["label"]]
df_test = df_test[datasColumns + ["label"]] # df_test = df_test[datasColumns + ["label"]]
trainSize = df_train["label"].count() trainSize = df_train["label"].count()
print("trainSize:{}".format(trainSize)) print("trainSize:{}".format(trainSize))
testSize = df_test["label"].count() # testSize = df_test["label"].count()
print("trainSize:{},testSize{}".format(trainSize,testSize)) # print("trainSize:{},testSize{}".format(trainSize,testSize))
# 数据类型转换 # 数据类型转换
df_train = csvTypeConvert(datasColumns,df_train,data_vocab) df_train = csvTypeConvert(datasColumns,df_train,data_vocab)
df_test = csvTypeConvert(datasColumns,df_test,data_vocab) # df_test = csvTypeConvert(datasColumns,df_test,data_vocab)
# 获取训练数据 # 获取训练数据
train_data = getDataSet(df_train,shuffleSize=trainSize,) train_data = getDataSet(df_train,shuffleSize=trainSize,)
test_data = getDataSet(df_test,shuffleSize=testSize) # test_data = getDataSet(df_test,shuffleSize=testSize)
print("train start...") print("train start...")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment