Commit 6136dc3d authored by 张彦钊's avatar 张彦钊

update test file

parent 5fc6569c
......@@ -7,11 +7,10 @@ from datetime import datetime
import utils
import warnings
from multiprocessing import Pool
from config import *
import json
from sklearn.preprocessing import MinMaxScaler
import time
from userProfile import get_active_users
# from userProfile import get_active_users
import os
......@@ -27,9 +26,6 @@ def get_video_id():
db.close()
return video_id
# 将device_id、city_id拼接到对应的城市热门日记表。注意:下面预测集特征顺序要与训练集保持一致
def feature_en(x_list, device_id):
data = pd.DataFrame(x_list)
......@@ -52,12 +48,14 @@ def feature_en(x_list, device_id):
# 把ffm.pkl load进来,将上面的表转化为ffm格式
def transform_ffm_format(df,queue_name):
with open(DIRECTORY_PATH + "ffm.pkl", "rb") as f:
# with open(DIRECTORY_PATH + "ffm.pkl", "rb") as f:
with open("/Users/mac/utils/ffm.pkl", "rb") as f:
ffm_format_pandas = pickle.load(f)
data = ffm_format_pandas.native_transform(df)
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}.csv".format(device_city[0], queue_name)
# predict_file_name = DIRECTORY_PATH + "result/{0}_{1}.csv".format(device_city[0], queue_name)
predict_file_name = "/Users/mac/utils/result/{0}_{1}.csv".format(queue_name)
data.to_csv(predict_file_name, index=False, header=None)
# print("done ffm")
print("done ffm")
return predict_file_name
......@@ -70,13 +68,16 @@ def predict(queue_name, name_dict):
ffm_model.setTest(data_file_path)
ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model.out",
DIRECTORY_PATH + "result/output{0}_{1}.csv".format(device_city[0], queue_name))
ffm_model.predict("/Users/mac/utils/model.out",
"/Users/mac/utils/result/{0}_output.txt".format(queue_name))
# ffm_model.predict(DIRECTORY_PATH + "model.out",
# DIRECTORY_PATH + "result/output{0}_{1}.csv".format(device_city[0], queue_name))
return save_result(queue_name, name_dict)
def save_result(queue_name, name_dict):
score_df = pd.read_csv(DIRECTORY_PATH + "result/output{0}_{1}.csv".format(device_city[0], queue_name), header=None)
# score_df = pd.read_csv(DIRECTORY_PATH + "result/output{0}_{1}.csv".format(device_city[0], queue_name), header=None)
score_df = pd.read_csv("/Users/mac/utils/result/{0}_output.txt".format(queue_name), header=None)
# print(score_df)
mm_scaler = MinMaxScaler()
mm_scaler.fit(score_df)
......@@ -167,23 +168,27 @@ def update_dairy_queue(score_df,predict_score_df):
return score_df["cid"].values.tolist()
def update_sql_dairy_queue(queue_name, diary_id,device_city):
def update_sql_dairy_queue(queue_name, diary_id):
db = pymysql.connect(host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
passwd='workwork', db='doris_test')
cursor = db.cursor()
id_str = str(diary_id[0])
for i in range(1, len(diary_id)):
id_str = id_str + "," + str(diary_id[i])
print("写入前")
print(diary_id)
print(id_str[:80])
sql = "update device_diary_queue set {}='{}' where device_id = '{}' and city_id = '{}'".format\
(queue_name, diary_id, device_city[0],device_city[1])
cursor.execute(sql)
db.commit()
db.close()
print("成功写入diaryid")
# 更新前获取最新的native_queue
def get_native_queue(device_id,city_id):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod')
db = pymysql.connect(host='rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com', port=3306, user='work',
passwd='workwork', db='doris_test')
cursor = db.cursor()
sql = "select native_queue from device_diary_queue where device_id = '{}' and city_id = '{}';".format(device_id,city_id)
cursor.execute(sql)
......@@ -203,7 +208,7 @@ def multi_update(queue_name, name_dict, native_queue):
if name_dict[queue_name] != []:
diary_id = predict(queue_name, name_dict)
if get_native_queue(device_city[0], device_city[1]) == native_queue:
update_sql_dairy_queue(queue_name, diary_id,device_city)
update_sql_dairy_queue(queue_name, diary_id)
print("更新结束")
else:
print("不需要更新日记队列")
......@@ -215,10 +220,11 @@ def get_queue(device_id, city_id):
cursor = db.cursor()
sql = "select native_queue,nearby_queue,nation_queue,megacity_queue from device_diary_queue " \
"where device_id = '{}' and city = '{}';".format(device_id, city_id)
"where device_id = '{}' and city_id = '{}';".format(device_id, city_id)
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
if not df.empty:
df = df.rename(columns={0: "native_queue", 1: "nearby_queue", 2: "nation_queue", 3: "megacity_queue"})
......@@ -293,9 +299,12 @@ if __name__ == "__main__":
# device_city_list = list(zip(device_list,city_list))
# start = time.time()
# 测试改生产改一下模型、pickle、输出文件路径、读取文件路径
warnings.filterwarnings("ignore")
data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist()
device_city_list = [("356156075348110","tainjin")]
# data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist()
data_set_cid = pd.read_csv("/Users/mac/utils/data_set_cid.csv")["cid"].values.tolist()
device_city_list = [("356156075348110","tianjin")]
if device_city_list != []:
for device_city in device_city_list:
user_update(device_city[0], device_city[1])
......@@ -305,13 +314,9 @@ if __name__ == "__main__":
end = time.time()
# # TODO 上线后把预测用户改成多进程预测
# # TODO 上线后把预测用户改成多进程预测
......@@ -7,6 +7,7 @@ def get_active_users():
now = datetime.now()
now_start = str(now)[:16] + ":00"
now_end = str(now)[:16] + ":59"
没有city_id的是“” 这个值可能是空
sql = "select device_id from user_active_time order by active_time desc limit 1;"
# sql = "select device_id from user_active_time " \
# "where active_time <= '{}' and active_time >= '{}'".format(now_end,now_start)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment