Commit b763e964 authored by 郭羽's avatar 郭羽

美购精排模型

parent a6d9521e
...@@ -21,7 +21,7 @@ one_hot_columns = ["item_"+c for c in ["service_type","doctor_type","doctor_famo ...@@ -21,7 +21,7 @@ one_hot_columns = ["item_"+c for c in ["service_type","doctor_type","doctor_famo
version = "v1" version = "v1"
data_path_train = "/data/files/service_feature_{}_train.csv".format(version) data_path_train = "/data/files/service_feature_{}_train.csv".format(version)
data_path_test = "/data/files/service_feature_{}_test.csv".format(version) data_path_test = "/data/files/service_feature_{}_test.csv".format(version)
model_file = "service_mlp_"+version model_file = "service/1"
def is_float(s): def is_float(s):
try: try:
...@@ -192,6 +192,10 @@ def predict(model_path,df): ...@@ -192,6 +192,10 @@ def predict(model_path,df):
if __name__ == '__main__': if __name__ == '__main__':
curTime = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime())
print("train_service执行时间:{}".format(curTime))
# redis中加载数据字典 # redis中加载数据字典
print("redis 中加载模型字典...") print("redis 中加载模型字典...")
data_vocab = getDataVocabFromRedis(version) data_vocab = getDataVocabFromRedis(version)
......
source /srv/envs/serviceRec/bin/activate
python /srv/apps/serviceRec/models/train_service.py > /srv/apps/serviceRec/models/train_service_log.log
\ No newline at end of file
...@@ -593,7 +593,7 @@ if __name__ == '__main__': ...@@ -593,7 +593,7 @@ if __name__ == '__main__':
endDay = addDays(-1) endDay = addDays(-1)
startDay = addDays(-(1 + int(trainDays))) startDay = addDays(-(1 + int(trainDays)))
print(startDay,endDay) print("train_data start:{} end:{}".format(startDay,endDay))
itemDF = get_service_feature_df(spark) itemDF = get_service_feature_df(spark)
# 行为数据 # 行为数据
...@@ -675,40 +675,4 @@ if __name__ == '__main__': ...@@ -675,40 +675,4 @@ if __name__ == '__main__':
splitAndSaveTrainingTestSamplesByTimeStamp(samplesWithUserFeatures, splitTimestamp, TRAIN_FILE_PATH) splitAndSaveTrainingTestSamplesByTimeStamp(samplesWithUserFeatures, splitTimestamp, TRAIN_FILE_PATH)
print("write to hdfs success...") print("write to hdfs success...")
# # user画像数据
# # userDF = get_user_portrait(spark)
#
# # 数据处理
#
# # 数据写入
# item_csv_dir = "/service_item"
# user_csv_dir = "/service_user"
# action_csv_dir = "/service_action"
# os.system("hdfs dfs -rmr {}".format(item_csv_dir))
# os.system("hdfs dfs -rmr {}".format(user_csv_dir))
# os.system("hdfs dfs -rmr {}".format(action_csv_dir))
#
# itemDF.write.option("header", "true").option("delimiter", "|").csv(item_csv_dir)
# print("service_item write successful", flush=True)
#
# user_endDay = addDays(-1,format="%Y-%m-%d")
# user_startDay = addDays(-(1 + int(trainDays)),format="%Y-%m-%d")
# userTmpTable = "user_tag3_portrait"
# userDF.createOrReplaceTempView(userTmpTable)
# user_sql = "select * from {} where date between '{}' and '{}' ".format(userTmpTable,user_startDay,user_endDay)
# userDF = spark.sql(user_sql)
# userDF.write.option("header", "true").option("delimiter", "|").csv(user_csv_dir)
# print("service_user write successful", flush=True)
#
# actionTmpTable = "action"
# actionDF.createOrReplaceTempView(actionTmpTable)
# action_sql = "select * from {}".format(actionTmpTable)
# actionDF = spark.sql(action_sql)
# actionDF.write.option("header", "true").option("delimiter", "|").csv(action_csv_dir)
# print("service_action write successful", flush=True)
spark.stop() spark.stop()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment