Commit 90cddbc5 authored by 张彦钊's avatar 张彦钊

delete prints

parent 2cd09bf7
......@@ -23,7 +23,6 @@ def get_allCitiesDiaryTop3000():
allCitiesTop3000 = allCitiesTop3000.rename(columns={0: "city_id", 1: "cid"})
allCitiesTop3000 = filter_cid(allCitiesTop3000)
allCitiesTop3000.to_csv(DIRECTORY_PATH + "diaryTestSet/allCitiesDiaryTop3000.csv",index=False)
print("成功获取全国日记点击量TOP3000")
return allCitiesTop3000
......@@ -33,7 +32,6 @@ def get_cityList():
cityList = con_sql(sql)
cityList.to_csv(DIRECTORY_PATH + "diaryTestSet/cityList.csv",index=False)
cityList = cityList[0].values.tolist()
print("成功获取全国城市列表")
return cityList
......@@ -58,7 +56,7 @@ def get_eachCityDiaryTop3000():
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(i)
data.to_csv(file_name,index=False)
print("成功保存{}地区DiaryTop3000".format(i))
def pool_method(city,sql,allCitiesTop3000):
data = con_sql(sql)
......@@ -72,7 +70,6 @@ def pool_method(city,sql,allCitiesTop3000):
file_name = DIRECTORY_PATH + "diaryTestSet/{0}DiaryTop3000.csv".format(city)
data.to_csv(file_name, index=False)
print("成功保存{}地区DiaryTop3000".format(city))
# 多线程方法获取全国城市热门日记
......
......@@ -40,7 +40,7 @@ def feature_en(x_list, device_id):
data["minute"] = data["minute"].astype("category")
# 虽然预测y,但ffm转化需要y,并不影响预测结果
data["y"] = 0
print("done 特征工程")
# print("done 特征工程")
return data
......@@ -52,7 +52,7 @@ def transform_ffm_format(df,queue_name,device_id):
data = ffm_format_pandas.native_transform(df)
predict_file_name = DIRECTORY_PATH + "result/{0}_{1}.csv".format(device_id, queue_name)
data.to_csv(predict_file_name, index=False, header=None)
print("done ffm")
# print("done ffm")
return predict_file_name
......@@ -131,7 +131,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
new_queue.insert(i, j)
i += 5
print("分数合并成功")
# print("分数合并成功")
return new_queue
# 如果没有视频日记
else:
......@@ -139,7 +139,7 @@ def update_dairy_queue(score_df,predict_score_df,total_video_id):
predict_score_df = predict_score_df.set_index(["cid"])
score_df["score"]=score_df["score"]+predict_score_df["score"]
score_df = score_df.sort_values(by="score", ascending=False)
print("1分数合并成功")
# print("分数合并成功1")
return score_df.index.tolist()
......@@ -159,26 +159,6 @@ def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
print("成功写入diaryid")
# 更新前获取最新的native_queue
def get_megacity_queue(device_id, city_id):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod')
cursor = db.cursor()
sql = "select megacity_queue from device_diary_queue " \
"where device_id = '{}' and city_id = '{}';".format(device_id,city_id)
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
if not df.empty:
megacity_queue = df.loc[0, 0].split(",")
megacity_queue = list(map(lambda x: "diary|" + str(x), megacity_queue))
db.close()
print("成功获取megacity_queue")
return megacity_queue
else:
return False
def get_queue(device_id, city_id,queue_name):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', db='doris_prod')
......@@ -190,13 +170,13 @@ def get_queue(device_id, city_id,queue_name):
df = pd.DataFrame(list(result))
if df.empty:
print("该用户对应的日记为空")
# print("该用户对应的日记为空")
return False
else:
queue_list = df.loc[0, 0].split(",")
queue_list = list(map(lambda x: "diary|" + str(x), queue_list))
db.close()
print("成功获取queue")
# print("成功获取queue")
return queue_list
......@@ -205,7 +185,7 @@ def pipe_line(queue_name, queue_arg, device_id,total_video_id):
predict_score_df = save_result(queue_name, queue_arg, device_id)
score_df = get_score(queue_arg)
if score_df.empty:
print("获取的日记列表是空")
# print("获取的日记列表是空")
return False
else:
score_df = score_df.rename(columns={0: "score", 1: "cid"})
......@@ -224,7 +204,7 @@ def user_update(device_id, city_id, queue_name,data_set_cid,total_video_id):
diary_queue = pipe_line(queue_name, queue_arg, device_id,total_video_id)
if diary_queue:
update_sql_dairy_queue(queue_name, diary_queue, device_id, city_id)
print("更新结束")
# print("更新结束")
else:
print("获取的日记列表是空,所以不更新日记队列")
else:
......
......@@ -65,7 +65,6 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date):
def ffm_transform(data, test_number, validation_number):
print("Start ffm transform")
start = time.time()
ffm_train = multiFFMFormatPandas()
......
......@@ -23,7 +23,7 @@ if __name__ == "__main__":
multi_get_eachCityDiaryTop3000()
end = time.time()
print("获取各城市热门日记耗时{}分".format((end - start) / 60))
print("end")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment