Commit 90cddbc5 authored by 张彦钊's avatar 张彦钊

delete prints

parent 2cd09bf7
...@@ -23,7 +23,6 @@ def get_allCitiesDiaryTop3000(): ...@@ -23,7 +23,6 @@ def get_allCitiesDiaryTop3000():
allCitiesTop3000 = allCitiesTop3000.rename(columns={0: "city_id", 1: "cid"}) allCitiesTop3000 = allCitiesTop3000.rename(columns={0: "city_id", 1: "cid"})
allCitiesTop3000 = filter_cid(allCitiesTop3000) allCitiesTop3000 = filter_cid(allCitiesTop3000)
allCitiesTop3000.to_csv(DIRECTORY_PATH + "diaryTestSet/allCitiesDiaryTop3000.csv",index=False) allCitiesTop3000.to_csv(DIRECTORY_PATH + "diaryTestSet/allCitiesDiaryTop3000.csv",index=False)
print("成功获取全国日记点击量TOP3000")
return allCitiesTop3000 return allCitiesTop3000
...@@ -33,7 +32,6 @@ def get_cityList(): ...@@ -33,7 +32,6 @@ def get_cityList():
cityList = con_sql(sql) cityList = con_sql(sql)
cityList.to_csv(DIRECTORY_PATH + "diaryTestSet/cityList.csv",index=False) cityList.to_csv(DIRECTORY_PATH + "diaryTestSet/cityList.csv",index=False)
cityList = cityList[0].values.tolist() cityList = cityList[0].values.tolist()
print("成功获取全国城市列表")
return cityList return cityList
...@@ -58,7 +56,7 @@ def get_eachCityDiaryTop3000(): ...@@ -58,7 +56,7 @@ def get_eachCityDiaryTop3000():
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(i) file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(i)
data.to_csv(file_name,index=False) data.to_csv(file_name,index=False)
print("成功保存{}地区DiaryTop3000".format(i))
def pool_method(city,sql,allCitiesTop3000): def pool_method(city,sql,allCitiesTop3000):
data = con_sql(sql) data = con_sql(sql)
...@@ -72,7 +70,6 @@ def pool_method(city,sql,allCitiesTop3000): ...@@ -72,7 +70,6 @@ def pool_method(city,sql,allCitiesTop3000):
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(city) file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(city)
data.to_csv(file_name, index=False) data.to_csv(file_name, index=False)
print("成功保存{}地区DiaryTop3000".format(city))
# 多线程方法获取全国城市热门日记 # 多线程方法获取全国城市热门日记
......
...@@ -40,7 +40,7 @@ def feature_en(x_list, device_id): ...@@ -40,7 +40,7 @@ def feature_en(x_list, device_id):
data["minute"] = data["minute"].astype("category") data["minute"] = data["minute"].astype("category")
# 虽然预测y,但ffm转化需要y,并不影响预测结果 # 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0 data["y"] = 0
print("done 特征工程") # print("done 特征工程")
return data return data
...@@ -52,7 +52,7 @@ def transform_ffm_format(df,queue_name,device_id): ...@@ -52,7 +52,7 @@ def transform_ffm_format(df,queue_name,device_id):
data = ffm_format_pandas.native_transform(df) data = ffm_format_pandas.native_transform(df)
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}.csv".format(device_id, queue_name) predict_file_name = DIRECTORY_PATH + "result/{0}_{1}.csv".format(device_id, queue_name)
data.to_csv(predict_file_name, index=False, header=None) data.to_csv(predict_file_name, index=False, header=None)
print("done ffm") # print("done ffm")
return predict_file_name return predict_file_name
...@@ -131,7 +131,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id): ...@@ -131,7 +131,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
new_queue.insert(i, j) new_queue.insert(i, j)
i += 5 i += 5
print("分数合并成功") # print("分数合并成功")
return new_queue return new_queue
# 如果没有视频日记 # 如果没有视频日记
else: else:
...@@ -139,7 +139,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id): ...@@ -139,7 +139,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
predict_score_df = predict_score_df.set_index(["cid"]) predict_score_df = predict_score_df.set_index(["cid"])
score_df["score"]=score_df["score"]+predict_score_df["score"] score_df["score"]=score_df["score"]+predict_score_df["score"]
score_df = score_df.sort_values(by="score", ascending=False) score_df = score_df.sort_values(by="score", ascending=False)
print("1分数合并成功") # print("分数合并成功1")
return score_df.index.tolist() return score_df.index.tolist()
...@@ -159,26 +159,6 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id): ...@@ -159,26 +159,6 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
print("成功写入diaryid") print("成功写入diaryid")
# 更新前获取最新的native_queue
def get_megacity_queue(device_id, city_id):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod')
cursor = db.cursor()
sql = "select megacity_queue from device_diary_queue " \
"where device_id = '{}' and city_id = '{}';".format(device_id,city_id)
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
if not df.empty:
megacity_queue = df.loc[0, 0].split(",")
megacity_queue = list(map(lambda x: "diary|" + str(x), megacity_queue))
db.close()
print("成功获取megacity_queue")
return megacity_queue
else:
return False
def get_queue(device_id, city_id,queue_name): def get_queue(device_id, city_id,queue_name):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris', db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod') passwd='o5gbA27hXHHm', db='doris_prod')
...@@ -190,13 +170,13 @@ def get_queue(device_id, city_id,queue_name): ...@@ -190,13 +170,13 @@ def get_queue(device_id, city_id,queue_name):
df = pd.DataFrame(list(result)) df = pd.DataFrame(list(result))
if df.empty: if df.empty:
print("该用户对应的日记为空") # print("该用户对应的日记为空")
return False return False
else: else:
queue_list = df.loc[0, 0].split(",") queue_list = df.loc[0, 0].split(",")
queue_list = list(map(lambda x: "diary|" + str(x), queue_list)) queue_list = list(map(lambda x: "diary|" + str(x), queue_list))
db.close() db.close()
print("成功获取queue") # print("成功获取queue")
return queue_list return queue_list
...@@ -205,7 +185,7 @@ def pipe_line(queue_name, queue_arg, device_id,total_video_id): ...@@ -205,7 +185,7 @@ def pipe_line(queue_name, queue_arg, device_id,total_video_id):
predict_score_df = save_result(queue_name, queue_arg, device_id) predict_score_df = save_result(queue_name, queue_arg, device_id)
score_df = get_score(queue_arg) score_df = get_score(queue_arg)
if score_df.empty: if score_df.empty:
print("获取的日记列表是空") # print("获取的日记列表是空")
return False return False
else: else:
score_df = score_df.rename(columns={0: "score", 1: "cid"}) score_df = score_df.rename(columns={0: "score", 1: "cid"})
...@@ -224,7 +204,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id): ...@@ -224,7 +204,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id):
diary_queue = pipe_line(queue_name, queue_arg, device_id,total_video_id) diary_queue = pipe_line(queue_name, queue_arg, device_id,total_video_id)
if diary_queue: if diary_queue:
update_sql_dairy_queue(queue_name, diary_queue, device_id, city_id) update_sql_dairy_queue(queue_name, diary_queue, device_id, city_id)
print("更新结束") # print("更新结束")
else: else:
print("获取的日记列表是空,所以不更新日记队列") print("获取的日记列表是空,所以不更新日记队列")
else: else:
......
...@@ -65,7 +65,6 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date): ...@@ -65,7 +65,6 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date):
def ffm_transform(data, test_number, validation_number): def ffm_transform(data, test_number, validation_number):
print("Start ffm transform") print("Start ffm transform")
start = time.time() start = time.time()
ffm_train = multiFFMFormatPandas() ffm_train = multiFFMFormatPandas()
......
...@@ -23,7 +23,7 @@ if __name__ == "__main__": ...@@ -23,7 +23,7 @@ if __name__ == "__main__":
multi_get_eachCityDiaryTop3000() multi_get_eachCityDiaryTop3000()
end = time.time() end = time.time()
print("获取各城市热门日记耗时{}分".format((end - start) / 60)) print("获取各城市热门日记耗时{}分".format((end - start) / 60))
print("end")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment