Commit c9ffad87 authored by 张彦钊's avatar 张彦钊

fix bugs

parent f57de602
...@@ -6,5 +6,10 @@ DATA_START_DATE = '2018-07-05' ...@@ -6,5 +6,10 @@ DATA_START_DATE = '2018-07-05'
DATA_END_DATE = '2018-08-06' DATA_END_DATE = '2018-08-06'
MODEL_VERSION = '' MODEL_VERSION = ''
lr = 0.03
l2_lambda = 0.002
# processData.py # processData.py
# diaryTraining.py # diaryTraining.py
...@@ -6,22 +6,21 @@ print("Start training") ...@@ -6,22 +6,21 @@ print("Start training")
ffm_model = xl.create_ffm() ffm_model = xl.create_ffm()
ffm_model.setTrain(DIRECTORY_PATH + "train{0}-{1}.csv".format(DATA_START_DATE, VALIDATION_DATE)) ffm_model.setTrain(DIRECTORY_PATH + "train{0}-{1}.csv".format(DATA_START_DATE, VALIDATION_DATE))
ffm_model.setValidate(DIRECTORY_PATH + "validation{0}.csv".format(VALIDATION_DATE)) ffm_model.setValidate(DIRECTORY_PATH + "validation{0}.csv".format(VALIDATION_DATE))
lr =0.03
l2_lambda = 0.002
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc'}
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc'}
ffm_model.fit(param, DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE, ffm_model.fit(param, DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE,lr,l2_lambda)) DATA_END_DATE, lr, l2_lambda))
print("predicting") print("predicting")
ffm_model.setTest(DIRECTORY_PATH + "test{0}.csv".format(TEST_DATE)) ffm_model.setTest(DIRECTORY_PATH + "test{0}.csv".format(TEST_DATE))
ffm_model.setSigmoid() ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE, ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE,"0.03","0.002"), DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt".format(TEST_DATE, DIRECTORY_PATH + "testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt".format(TEST_DATE,
DATA_START_DATE,DATA_END_DATE,"0.03","0.002")) DATA_START_DATE,
DATA_END_DATE, lr,
l2_lambda))
print('---------------candidates--------------') print('---------------candidates--------------')
get_eachCityDiaryTop2000() get_eachCityDiaryTop2000()
...@@ -53,7 +53,7 @@ def feature_en(): ...@@ -53,7 +53,7 @@ def feature_en():
print(data.head(2)) print(data.head(2))
# 持久化候选cid # 持久化候选cid
data_set_cid = data[["cid"]].unique() data_set_cid = data["cid"].unique()
cid_df = pd.DataFrame() cid_df = pd.DataFrame()
cid_df['cid'] = data_set_cid cid_df['cid'] = data_set_cid
print("data_set_cid :") print("data_set_cid :")
......
...@@ -13,6 +13,7 @@ from userProfile import fetch_user_profile ...@@ -13,6 +13,7 @@ from userProfile import fetch_user_profile
def device_id_merge(user_profile): def device_id_merge(user_profile):
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop2000.csv".format(user_profile['city_id']) file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop2000.csv".format(user_profile['city_id'])
data = pd.read_csv(file_name) data = pd.read_csv(file_name)
data["device_id"] = user_profile['device_id'] data["device_id"] = user_profile['device_id']
now = datetime.datetime.now() now = datetime.datetime.now()
data["hour"] = now.hour data["hour"] = now.hour
...@@ -22,49 +23,49 @@ def device_id_merge(user_profile): ...@@ -22,49 +23,49 @@ def device_id_merge(user_profile):
data["hour"] = data["hour"].astype("category") data["hour"] = data["hour"].astype("category")
data["minute"] = data["minute"].astype("category") data["minute"] = data["minute"].astype("category")
data["y"] = 0 data["y"] = 0
data = data.drop("city_id",axis=1) data = data.drop("city_id", axis=1)
print(data.head(2)) print(data.head(2))
return data return data
# 把ffm.pkl load进来,将上面的表转化为ffm格式 # 把ffm.pkl load进来,将上面的表转化为ffm格式
def transform_ffm_format(ffm_format_pandas, df, device_id): def transform_ffm_format(df, device_id):
data = ffm_format_pandas.transform(df) file_path = DIRECTORY_PATH + "ffm_{0}_{1}.pkl".format(DATA_START_DATE, DATA_END_DATE)
now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M") with open(file_path, "rb") as f:
ffm_format_pandas = pickle.load(f)
data = ffm_format_pandas.transform(df)
now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
predict_file_name = DIRECTORY_PATH + "diaryPredictSet/{0}_{1}DiaryTop2000.csv".format(device_id, now) predict_file_name = DIRECTORY_PATH + "diaryPredictSet/{0}_{1}DiaryTop2000.csv".format(device_id, now)
data.to_csv(predict_file_name) data.to_csv(predict_file_name)
user_instance_file_path = '' user_instance_file_path = ''
return user_instance_file_path return user_instance_file_path
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里 # 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
def predict(user_profile): def predict(user_profile):
ffm_model = xl.create_ffm() ffm_model = xl.create_ffm()
user_instance_file_path = device_id_merge(device_id) user_instance = device_id_merge(user_profile)
user_instance_file_path = transform_ffm_format(user_instance)
ffm_model.setTest(user_instance_file_path) ffm_model.setTest(user_instance_file_path)
ffm_model.predict(DIRECTORY_PATH + MODEL_VERSION, "./{0}_output.txt".format(device_id)) ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "/{0}_output.txt".format(user_profile['device_id']))
def router(device_id): def router(device_id):
user_profile, is_exist = fetch_user_profile(device_id) user_profile, is_exist = fetch_user_profile(device_id)
file_path = DIRECTORY_PATH + "ffm_{0}_{1}.pkl".format(DATA_START_DATE, DATA_END_DATE)
with open(file_path, "rb") as f:
ffm_format_pandas = pickle.load(f)
if is_exist: if is_exist:
predict() predict(user_profile)
else: else:
pass # do something print('Sorry, we don\'t have you')
if __name__ == "__main__": if __name__ == "__main__":
router(device_id='358035085192742') router(device_id='358035085192742')
# 预测一些真实的device_id # 预测一些真实的device_id
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment