Commit 34cb81cc authored by 张彦钊's avatar 张彦钊

修改cid、deviceid、pkl、model保存路径

parent d934e2c4
...@@ -24,6 +24,7 @@ def get_video_id(): ...@@ -24,6 +24,7 @@ def get_video_id():
db.close() db.close()
return video_id return video_id
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致 # 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def feature_en(x_list, device_id): def feature_en(x_list, device_id):
data = pd.DataFrame(x_list) data = pd.DataFrame(x_list)
......
...@@ -10,11 +10,11 @@ def train(): ...@@ -10,11 +10,11 @@ def train():
# log保存路径,如果不加这个参数,日志默认保存在/temp路径下,不符合规范 # log保存路径,如果不加这个参数,日志默认保存在/temp路径下,不符合规范
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc',"log":"/data2/models/result"} param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc',"log":"/data2/models/result"}
ffm_model.fit(param, DIRECTORY_PATH + "model.out") ffm_model.fit(param, DIRECTORY_PATH + "train/model.out")
print("predicting") print("predicting")
ffm_model.setTest(DIRECTORY_PATH + "test_ffm_data.csv") ffm_model.setTest(DIRECTORY_PATH + "test_ffm_data.csv")
ffm_model.setSigmoid() ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model.out",DIRECTORY_PATH + "test_set_predict_output.txt") ffm_model.predict(DIRECTORY_PATH + "train/model.out",DIRECTORY_PATH + "test_set_predict_output.txt")
...@@ -52,14 +52,14 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date): ...@@ -52,14 +52,14 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date):
data_set_cid = data["cid"].unique() data_set_cid = data["cid"].unique()
cid_df = pd.DataFrame() cid_df = pd.DataFrame()
cid_df['cid'] = data_set_cid cid_df['cid'] = data_set_cid
cid_df.to_csv(DIRECTORY_PATH + "data_set_cid.csv", index=False) cid_df.to_csv(DIRECTORY_PATH + "train/data_set_cid.csv", index=False)
print("成功保存data_set_cid") print("成功保存data_set_cid")
# 将device_id 保存,目的是为了判断预测的device_id是否在这个集合里,如果不在,不需要预测 # 将device_id 保存,目的是为了判断预测的device_id是否在这个集合里,如果不在,不需要预测
data_set_device_id = data["device_id"].unique() data_set_device_id = data["device_id"].unique()
device_id_df = pd.DataFrame() device_id_df = pd.DataFrame()
device_id_df['device_id'] = data_set_device_id device_id_df['device_id'] = data_set_device_id
device_id_df.to_csv(DIRECTORY_PATH + "data_set_device_id.csv", index=False) device_id_df.to_csv(DIRECTORY_PATH + "train/data_set_device_id.csv", index=False)
print("成功保存data_set_device_id") print("成功保存data_set_device_id")
return data, test_number, validation_number return data, test_number, validation_number
...@@ -69,7 +69,7 @@ def ffm_transform(data, test_number, validation_number): ...@@ -69,7 +69,7 @@ def ffm_transform(data, test_number, validation_number):
start = time.time() start = time.time()
ffm_train = multiFFMFormatPandas() ffm_train = multiFFMFormatPandas()
data = ffm_train.fit_transform(data, y='y',n=50000,processes=8) data = ffm_train.fit_transform(data, y='y',n=50000,processes=8)
with open(DIRECTORY_PATH+"ffm.pkl", "wb") as f: with open(DIRECTORY_PATH+"train/ffm.pkl", "wb") as f:
pickle.dump(ffm_train, f) pickle.dump(ffm_train, f)
print("done transform ffm") print("done transform ffm")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment