Commit e5a2d469 authored by 张彦钊's avatar 张彦钊

删除获取热门日记集合

parent 0eacdc06
......@@ -17,7 +17,7 @@ def filter_cid(df):
def get_allCitiesDiaryTop3000():
# 获取全国点击量TOP3000日记
sql = "select city_id,cid from data_feed_click " \
sql = "select city_id,cid from data_feed_click2 " \
"where cid_type = 'diary' group by cid order by max(click_count_choice) desc limit 3000"
allCitiesTop3000 = con_sql(sql)
allCitiesTop3000 = allCitiesTop3000.rename(columns={0: "city_id", 1: "cid"})
......@@ -28,7 +28,7 @@ def get_allCitiesDiaryTop3000():
def get_cityList():
# 获取全国城市列表
sql = "select distinct city_id from data_feed_click"
sql = "select distinct city_id from data_feed_click2"
cityList = con_sql(sql)
cityList.to_csv(DIRECTORY_PATH + "diaryTestSet/cityList.csv",index=False)
cityList = cityList[0].values.tolist()
......@@ -40,7 +40,7 @@ def get_eachCityDiaryTop3000():
cityList = get_cityList()
allCitiesTop3000 = get_allCitiesDiaryTop3000()
for i in cityList:
sql = "select city_id,cid from data_feed_click " \
sql = "select city_id,cid from data_feed_click2 " \
"where cid_type = 'diary' and city_id = '{0}' group by cid " \
"order by max(click_count_choice) desc limit 3000".format(i)
data = con_sql(sql)
......@@ -78,7 +78,7 @@ def multi_get_eachCityDiaryTop3000(processes=8):
allCitiesTop3000 = get_allCitiesDiaryTop3000()
pool = Pool(processes)
for city in city_list:
sql = "select city_id,cid from data_feed_click " \
sql = "select city_id,cid from data_feed_click2 " \
"where cid_type = 'diary' and city_id = '{0}' group by cid " \
"order by max(click_count_choice) desc limit 3000".format(city)
......
......@@ -18,11 +18,6 @@ if __name__ == "__main__":
train()
end_train = time.time()
print("训练模型耗时{}分".format((end_train-start_train)/60))
print('---------------prepare candidates--------------')
start = time.time()
multi_get_eachCityDiaryTop3000()
end = time.time()
print("获取各城市热门日记耗时{}分".format((end - start) / 60))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment