change sql

55a957a7 · 张彦钊 · 5b296bc4 · 55a957a7 · 55a957a7 · 55a957a7
Commit 55a957a7 authored Aug 08, 2018 by 张彦钊
Show whitespace changes
Inline Side-by-side

Showing with 6 additions and 5 deletions

.DS_Store .DS_Store +0 -0

diaryCandidateSet.py diaryCandidateSet.py +5 -4

predictDiary.py predictDiary.py +1 -1

No files found.
--- a/.DS_Store
+++ b/.DS_Store
--- a/diaryCandidateSet.py
+++ b/diaryCandidateSet.py
@@ -18,7 +18,9 @@ def filter_cid(df):

 def get_allCitiesDiaryTop2000():
    # 获取全国点击量TOP2000日记
-    sql = "select city_id,cid from data_feed_click where cid_type = 'diary' order by click_count_choice desc limit 2000"
+
+    sql = "select city_id,cid from data_feed_click " \
+          "where cid_type = 'diary' group by cid order by max(click_count_choice) desc limit 2000"
    allCitiesTop2000 = con_sql(sql)
    allCitiesTop2000 = allCitiesTop2000.rename(columns={0: "city_id", 1: "cid"})
    allCitiesTop2000 = filter_cid(allCitiesTop2000)
@@ -43,9 +45,8 @@ def get_eachCityDiaryTop2000():
    cityList = get_cityList()
    allCitiesTop2000 = get_allCitiesDiaryTop2000()
    for i in cityList:
-        sql = "select city_id,cid from data_feed_click " \
-              "where cid_type = 'diary' and city_id = '{0}' " \
-              "order by click_count_choice desc limit 2000".format(i)
+        sql = "select '{0}',cid from data_feed_click " \
+              "where cid_type = 'diary' group by cid order by max(click_count_choice) desc limit 2000".format(i)
        data = con_sql(sql)
        data = data.rename(columns={0: "city_id", 1: "cid"})
        data = filter_cid(data)

--- a/predictDiary.py
+++ b/predictDiary.py
@@ -72,7 +72,7 @@ def upload_predict(user_profile, instance):

 def wrapper_result(prob, device_id):
    prob = prob.head(500)
-    prob.loc[:,"url"] = prob["cid"].apply(lambda x: "http://m.igengmei.com/diary_book/" + str(x) + '/')
+    prob.loc[:,"url"] = prob["cid"].apply(lambda x: "http://m.igengmei.com/diary_book/" + str(x[6:]) + '/')
    prob.to_csv(DIRECTORY_PATH + "result/{}_feed".format(device_id),index= False)