增加日记视频集合是空的判断

0eacdc06 · 张彦钊 · 9ef83045 · 0eacdc06 · 0eacdc06 · 0eacdc06
Commit 0eacdc06 authored 6 years ago by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 46 additions and 35 deletions

diaryQueueUpdate.py diaryQueueUpdate.py +42 -31

prepareData.py prepareData.py +3 -3

processData.py processData.py +1 -1

No files found.
--- a/diaryQueueUpdate.py
+++ b/diaryQueueUpdate.py
@@ -21,10 +21,13 @@ def get_video_id():
    result = cursor.fetchall()
    df = pd.DataFrame(list(result))
    print("videio_id 预览")
-    print(df.head(2))
+    print(df.head(1))
-    video_id = df[0].values.tolist()
    db.close()
-    return video_id
+    if df.empty:
+        return False
+    else:
+        video_id = df[0].values.tolist()
+        return video_id
 # 将device_id、city_id拼接到对应的城市热门日记表。注意：下面预测集特征顺序要与训练集保持一致
@@ -109,42 +112,50 @@ def get_score(queue_arg):
 def update_dairy_queue(score_df,predict_score_df,total_video_id):
    diary_id = score_df["cid"].values.tolist()
+    if total_video_id:
-    video_id = list(set(diary_id)&set(total_video_id))
+        video_id = list(set(diary_id)&set(total_video_id))
+        if len(video_id)>0:
-    if len(video_id)>0:
+            not_video = list(set(diary_id) - set(video_id))
-        not_video = list(set(diary_id) - set(video_id))
+            # 为了相加时cid能够匹配，先把cid变成索引
-        # 为了相加时cid能够匹配，先把cid变成索引
+            not_video_df = score_df.loc[score_df["cid"].isin(not_video)].set_index(["cid"])
-        not_video_df = score_df.loc[score_df["cid"].isin(not_video)].set_index(["cid"])
+            not_video_predict_df = predict_score_df.loc[predict_score_df["cid"].isin(not_video)].set_index(["cid"])
-        not_video_predict_df = predict_score_df.loc[predict_score_df["cid"].isin(not_video)].set_index(["cid"])
+            not_video_df["score"] = not_video_df["score"] + not_video_predict_df["score"]
-        not_video_df["score"] = not_video_df["score"] + not_video_predict_df["score"]
+            not_video_df = not_video_df.sort_values(by="score", ascending=False)
-        not_video_df = not_video_df.sort_values(by="score", ascending=False)
+            video_df = score_df.loc[score_df["cid"].isin(video_id)].set_index(["cid"])
-        video_df = score_df.loc[score_df["cid"].isin(video_id)].set_index(["cid"])
+            video_predict_df = predict_score_df.loc[predict_score_df["cid"].isin(video_id)].set_index(["cid"])
-        video_predict_df = predict_score_df.loc[predict_score_df["cid"].isin(video_id)].set_index(["cid"])
+            video_df["score"] = video_df["score"] + video_predict_df["score"]
-        video_df["score"] = video_df["score"] + video_predict_df["score"]
+            video_df = video_df.sort_values(by="score", ascending=False)
-        video_df = video_df.sort_values(by="score", ascending=False)
+            not_video_id = not_video_df.index.tolist()
-        not_video_id = not_video_df.index.tolist()
+            video_id = video_df.index.tolist()
-        video_id = video_df.index.tolist()
+            new_queue = not_video_id
-        new_queue = not_video_id
+            i = 1
-        i = 1
+            for j in video_id:
-        for j in video_id:
+                new_queue.insert(i, j)
-            new_queue.insert(i, j)
+                i += 5
-            i += 5
+            # print("分数合并成功")
-        # print("分数合并成功")
+            return new_queue
-        return new_queue
+        # 如果取交集后没有视频日记
-    # 如果没有视频日记
+        else:
+            score_df = score_df.set_index(["cid"])
+            predict_score_df = predict_score_df.set_index(["cid"])
+            score_df["score"]=score_df["score"]+predict_score_df["score"]
+            score_df = score_df.sort_values(by="score", ascending=False)
+            # print("分数合并成功1")
+            return score_df.index.tolist()
+    # 如果total_video_id是空
    else:
        score_df = score_df.set_index(["cid"])
        predict_score_df = predict_score_df.set_index(["cid"])
-        score_df["score"]=score_df["score"]+predict_score_df["score"]
+        score_df["score"] = score_df["score"] + predict_score_df["score"]
        score_df = score_df.sort_values(by="score", ascending=False)
        # print("分数合并成功1")
        return score_df.index.tolist()
 def update_sql_dairy_queue(queue_name, diary_id,device_id, city_id):
    db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
                         passwd='o5gbA27hXHHm', db='doris_prod')

--- a/prepareData.py
+++ b/prepareData.py
@@ -6,12 +6,12 @@ import time
 def fetch_data(start_date, end_date):
    # 获取点击表里的device_id
-    sql = "select distinct device_id from data_feed_click"
+    sql = "select distinct device_id from data_feed_click2"
    click_device_id = con_sql(sql)[0].values.tolist()
    print("成功获取点击表里的device_id")
    # 获取点击表里的数据
-    sql = "select cid,device_id,time,stat_date from data_feed_click " \
+    sql = "select cid,device_id,time,stat_date from data_feed_click2 " \
          "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
    click = con_sql(sql)
    click = click.rename(columns={0: "cid", 1: "device_id", 2: "time_date", 3: "stat_date"})
@@ -22,7 +22,7 @@ def fetch_data(start_date, end_date):
    click = click.drop("time_date", axis=1)
    # 获取曝光表里的数据
-    sql = "select cid,device_id,time,stat_date from data_feed_exposure " \
+    sql = "select cid,device_id,time,stat_date from data_feed_exposure2 " \
          "where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
    start = time.time()
    exposure = con_sql(sql)

--- a/processData.py
+++ b/processData.py
@@ -48,7 +48,7 @@ def feature_en(data_start_date, data_end_date, validation_date, test_date):
    data["hour"] = data["hour"].astype("category")
    data["minute"] = data["minute"].astype("category")
-    # 持久化候选cid
+    # 持久化候选cid,选预测候选集时用这个过滤
    data_set_cid = data["cid"].unique()
    cid_df = pd.DataFrame()
    cid_df['cid'] = data_set_cid