Commit 109574b7 authored by 张彦钊's avatar 张彦钊

add testcase

parent 232cabf1
......@@ -20,9 +20,13 @@ LOCAL_EAGLE_HOST = "192.168.15.12"
# 本地地址
LOCAL_DIRCTORY = "/Users/mac/utils/"
# 线上日记队列域名
QUEUE_ONLINE_HOST = 'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
# 本地日记队列域名
LOCAL_HOST = 'rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com'
# # 线下pkl
# "/Users/mac/utils/ffm.pkl"
# #线下预测文件
# "/Users/mac/utils/result/{0}.csv".format(queue_name)
# # 线下模型、预测产出文件
......
......@@ -52,17 +52,17 @@ def feature_en(x_list, device_id):
data["minute"] = data["minute"].astype("category")
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0
# print("done 特征工程")
print("done 特征工程")
return data
# 把ffm.pkl load进来,将上面的数据转化为ffm格式
def transform_ffm_format(df,queue_name,device_id):
with open(DIRECTORY_PATH + "ffm.pkl", "rb") as f:
with open(path + "ffm.pkl", "rb") as f:
ffm_format_pandas = pickle.load(f)
data = ffm_format_pandas.native_transform(df)
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}.csv".format(device_id, queue_name)
predict_file_name = path + "result/{0}_{1}.csv".format(device_id, queue_name)
data.to_csv(predict_file_name, index=False, header=None)
# print("done ffm")
return predict_file_name
......@@ -74,12 +74,12 @@ def predict(queue_name,queue_arg,device_id):
ffm_model = xl.create_ffm()
ffm_model.setTest(data_file_path)
ffm_model.setSigmoid()
ffm_model.predict(DIRECTORY_PATH + "model.out",
DIRECTORY_PATH + "result/output{0}_{1}.csv".format(device_id, queue_name))
ffm_model.predict(path + "model.out",
path + "result/output{0}_{1}.csv".format(device_id, queue_name))
def save_result(queue_name,queue_arg,device_id):
score_df = pd.read_csv(DIRECTORY_PATH + "result/output{0}_{1}.csv".format(device_id, queue_name), header=None)
score_df = pd.read_csv(path + "result/output{0}_{1}.csv".format(device_id, queue_name), header=None)
mm_scaler = MinMaxScaler()
mm_scaler.fit(score_df)
score_df = pd.DataFrame(mm_scaler.transform(score_df))
......@@ -105,7 +105,11 @@ def save_result(queue_name,queue_arg,device_id):
def get_score(queue_arg):
db = pymysql.connect(host='10.66.157.22', port=4000, user='root',passwd='3SYz54LS9#^9sBvC', db='eagle')
if flag:
db = pymysql.connect(host='10.66.157.22', port=4000, user='root',passwd='3SYz54LS9#^9sBvC', db='eagle')
else:
db = pymysql.connect(host=LOCAL_HOST, port=3306, user='work', passwd='workwork', db='zhengxing_tes')
cursor = db.cursor()
# 去除diary_id 前面的"diary|"
diary_list = tuple(list(map(lambda x:x[6:],queue_arg[2])))
......@@ -163,9 +167,12 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
if flag:
db = pymysql.connect(host=QUEUE_ONLINE_HOST, port=3306, user='doris', passwd='o5gbA27hXHHm',
db='doris_prod')
else:
db = pymysql.connect(host=LOCAL_HOST, port=3306, user='work',passwd='workwork', db='doris_test')
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod')
cursor = db.cursor()
id_str = str(diary_id[0])
for i in range(1, len(diary_id)):
......@@ -205,8 +212,13 @@ def queue_compare(old_list, new_list):
def get_queue(device_id, city_id,queue_name):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod')
if flag:
db = pymysql.connect(host=QUEUE_ONLINE_HOST, port=3306, user='doris',passwd='o5gbA27hXHHm',
db='doris_prod')
else:
db = pymysql.connect(host=LOCAL_HOST, port=3306, user='work',
passwd='workwork', db='doris_test')
cursor = db.cursor()
sql = "select {} from device_diary_queue " \
"where device_id = '{}' and city_id = '{}';".format(queue_name,device_id, city_id)
......@@ -271,19 +283,18 @@ def multi_proecess_update(device_id, city_id, data_set_cid,total_video_id):
if __name__ == "__main__":
warnings.filterwarnings("ignore")
flag = False
path = LOCAL_DIRCTORY
# 下面这个ip是线上服务器ip
if socket.gethostbyname(socket.gethostname()) == '10.31.242.83':
flag = True
path = DIRECTORY_PATH
total_number = 0
# 增加缓存日记视频列表
cache_video_id = []
cache_device_city_list = []
update_queue_numbers = 0
while True:
if flag:
data_set_cid = pd.read_csv(DIRECTORY_PATH + "data_set_cid.csv")["cid"].values.tolist()
else:
data_set_cid = pd.read_csv(LOCAL_DIRCTORY + "data_set_cid.csv")["cid"].values.tolist()
data_set_cid = pd.read_csv(path + "data_set_cid.csv")["cid"].values.tolist()
total_video_id = get_video_id(cache_video_id)
cache_video_id = total_video_id
device_city_list = get_active_users(flag)
......
......@@ -9,11 +9,10 @@ def get_video_id():
sql = "select diary_id from feed_diary_boost;"
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
video_id = df[0].values.tolist()
print(video_id[:10])
df=pd.DataFrame(list(result))
df.to_csv('/data2/models/video_diary_id.csv',index=None)
print("成功")
db.close()
return video_id
def queue():
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
......@@ -29,12 +28,4 @@ def queue():
return all
if __name__=="__main__":
all = queue()
video = get_video_id()
video_id = []
x = 1
while x < len(all):
video_id.append(all[x])
x += 3
a = list(set(video)&set(video_id))
print(a == video_id)
\ No newline at end of file
get_video_id()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment