Commit c9ffad87 authored by 张彦钊's avatar 张彦钊

fix bugs

parent f57de602
......@@ -6,5 +6,10 @@ DATA_START_DATE = '2018-07-05'
DATA_END_DATE = '2018-08-06'
MODEL_VERSION = ''
lr = 0.03
l2_lambda = 0.002
# processData.py
# diaryTraining.py
......@@ -6,22 +6,21 @@ print("Start training")
ffm_model = xl.create_ffm()
ffm_model.setTrain(DIRECTORY_PATH + "train{0}-{1}.csv".format(DATA_START_DATE, VALIDATION_DATE))
ffm_model.setValidate(DIRECTORY_PATH + "validation{0}.csv".format(VALIDATION_DATE))
lr =0.03
l2_lambda = 0.002
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc'}
param = {'task': 'binary', 'lr': lr, 'lambda': l2_lambda, 'metric': 'auc'}
ffm_model.fit(param, DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE,lr,l2_lambda))
DATA_END_DATE, lr, l2_lambda))
print("predicting")
ffm_model.setTest(DIRECTORY_PATH + "test{0}.csv".format(TEST_DATE))
ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE,"0.03","0.002"),
DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "testset{0}_output_model_{1}-{2}_lr{3}_lambda{4}.txt".format(TEST_DATE,
DATA_START_DATE,DATA_END_DATE,"0.03","0.002"))
DATA_START_DATE,
DATA_END_DATE, lr,
l2_lambda))
print('---------------candidates--------------')
get_eachCityDiaryTop2000()
......@@ -53,7 +53,7 @@ def feature_en():
print(data.head(2))
# 持久化候选cid
data_set_cid = data[["cid"]].unique()
data_set_cid = data["cid"].unique()
cid_df = pd.DataFrame()
cid_df['cid'] = data_set_cid
print("data_set_cid :")
......
......@@ -13,6 +13,7 @@ from userProfile import fetch_user_profile
def device_id_merge(user_profile):
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop2000.csv".format(user_profile['city_id'])
data = pd.read_csv(file_name)
data["device_id"] = user_profile['device_id']
now = datetime.datetime.now()
data["hour"] = now.hour
......@@ -22,49 +23,49 @@ def device_id_merge(user_profile):
data["hour"] = data["hour"].astype("category")
data["minute"] = data["minute"].astype("category")
data["y"] = 0
data = data.drop("city_id",axis=1)
data = data.drop("city_id", axis=1)
print(data.head(2))
return data
# 把ffm.pkl load进来,将上面的表转化为ffm格式
def transform_ffm_format(ffm_format_pandas, df, device_id):
data = ffm_format_pandas.transform(df)
now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
def transform_ffm_format(df, device_id):
file_path = DIRECTORY_PATH + "ffm_{0}_{1}.pkl".format(DATA_START_DATE, DATA_END_DATE)
with open(file_path, "rb") as f:
ffm_format_pandas = pickle.load(f)
data = ffm_format_pandas.transform(df)
now = datetime.datetime.now().strftime("%Y-%m-%d-%H-%M")
predict_file_name = DIRECTORY_PATH + "diaryPredictSet/{0}_{1}DiaryTop2000.csv".format(device_id, now)
data.to_csv(predict_file_name)
user_instance_file_path = ''
return user_instance_file_path
predict_file_name = DIRECTORY_PATH + "diaryPredictSet/{0}_{1}DiaryTop2000.csv".format(device_id, now)
data.to_csv(predict_file_name)
user_instance_file_path = ''
return user_instance_file_path
# 将模型加载,预测,把预测日记的概率值按照降序排序,存到一个表里
def predict(user_profile):
ffm_model = xl.create_ffm()
user_instance_file_path = device_id_merge(device_id)
user_instance = device_id_merge(user_profile)
user_instance_file_path = transform_ffm_format(user_instance)
ffm_model.setTest(user_instance_file_path)
ffm_model.predict(DIRECTORY_PATH + MODEL_VERSION, "./{0}_output.txt".format(device_id))
ffm_model.predict(DIRECTORY_PATH + "model_{0}-{1}_lr{2}_lambda{3}.out".format(DATA_START_DATE,
DATA_END_DATE, lr, l2_lambda),
DIRECTORY_PATH + "/{0}_output.txt".format(user_profile['device_id']))
def router(device_id):
user_profile, is_exist = fetch_user_profile(device_id)
file_path = DIRECTORY_PATH + "ffm_{0}_{1}.pkl".format(DATA_START_DATE, DATA_END_DATE)
with open(file_path, "rb") as f:
ffm_format_pandas = pickle.load(f)
if is_exist:
predict()
predict(user_profile)
else:
pass # do something
print('Sorry, we don\'t have you')
if __name__ == "__main__":
router(device_id='358035085192742')
# 预测一些真实的device_id
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment