update when not empty

7b103f10 · 赵威 · 2f5972f0 · 7b103f10
Commit 7b103f10 authored Mar 19, 2020 by 赵威
Hide whitespace changes
Inline Side-by-side

Showing with 31 additions and 26 deletions

tag3_update_user_portrait_offline.py eda/smart_rank/tag3_update_user_portrait_offline.py +31 -26

No files found.
--- a/eda/smart_rank/tag3_update_user_portrait_offline.py
+++ b/eda/smart_rank/tag3_update_user_portrait_offline.py
@@ -25,7 +25,7 @@ def make_dict_from_pair(x):
    return dict(zip(x[0], [x[1]] * len(x[0])))


-def update_tag3_user_portrait(cl_id):
+def update_tag3_user_portrait(cl_id, redis_client):
    user_df = get_tag3_user_log(cl_id)
    if not user_df.empty:
        user_df["first_solutions"] = list(zip(user_df["first_solutions"].apply(lambda x: x.split(",")), user_df["tag_score"]))
@@ -70,30 +70,33 @@ def update_tag3_user_portrait(cl_id):
            "projects": projects_score
        }

-        key = "doris:user_portrait:tag3:device_id:" + str(cl_id)
-        redis_client = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN9@172.16.40.173:6379")
-        redis_client.set(key, json.dumps(res))
-        redis_client.expire(key, 60 * 60 * 24 * 30)
-
-        # only need the first time
-        res2 = {
-            "first_demands": list(first_demands_score.keys()),
-            "second_demands": list(second_demands_score.keys()),
-            "first_solutions": list(first_solutions_score.keys()),
-            "second_solutions": list(second_solutions_score.keys()),
-            "first_positions": list(first_positions_score.keys()),
-            "second_positions": list(second_positions_score.keys()),
-            "projects": list(projects_score.keys())
-        }
-        key2 = "doris:user_portrait:tag3:increment_update:device_id:" + str(cl_id)
-        redis_client.delete(key2)
-        redis_client.set(key2, json.dumps(res2))
-        redis_client.expire(key2, 60 * 60 * 24 * 30)
-
-        write_user_portrait(cl_id, ",".join(first_solutions_score.keys()), ",".join(second_solutions_score.keys()),
-                            ",".join(first_demands_score.keys()), ",".join(second_demands_score.keys()),
-                            ",".join(first_positions_score.keys()), ",".join(second_positions_score.keys()),
-                            ",".join(projects_score.keys()))
+        if (len(first_demands_score.keys()) > 0) or (len(second_demands_score.keys()) > 0) or \
+           (len(first_solutions_score.keys()) > 0) or (len(second_solutions_score.keys()) > 0) or \
+           (len(first_positions_score.keys()) > 0) or (len(second_positions_score.keys()) > 0) or \
+           (len(projects_score.keys()) > 0):
+            key = "doris:user_portrait:tag3:device_id:" + str(cl_id)
+            redis_client.set(key, json.dumps(res))
+            redis_client.expire(key, 60 * 60 * 24 * 30)
+
+            # only need the first time
+            res2 = {
+                "first_demands": list(first_demands_score.keys()),
+                "second_demands": list(second_demands_score.keys()),
+                "first_solutions": list(first_solutions_score.keys()),
+                "second_solutions": list(second_solutions_score.keys()),
+                "first_positions": list(first_positions_score.keys()),
+                "second_positions": list(second_positions_score.keys()),
+                "projects": list(projects_score.keys())
+            }
+            key2 = "doris:user_portrait:tag3:increment_update:device_id:" + str(cl_id)
+            redis_client.delete(key2)
+            redis_client.set(key2, json.dumps(res2))
+            redis_client.expire(key2, 60 * 60 * 24 * 30)
+
+            write_user_portrait(cl_id, ",".join(first_solutions_score.keys()), ",".join(second_solutions_score.keys()),
+                                ",".join(first_demands_score.keys()), ",".join(second_demands_score.keys()),
+                                ",".join(first_positions_score.keys()), ",".join(second_positions_score.keys()),
+                                ",".join(projects_score.keys()))

        return cl_id

@@ -121,8 +124,10 @@ def consume_kafka():
        spark.sparkContext.setLogLevel("WARN")
        spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")

+        redis_client = redis.StrictRedis.from_url("redis://:ReDis!GmTx*0aN9@172.16.40.173:6379")
+
        device_ids_lst_rdd = spark.sparkContext.parallelize(device_ids_lst, numSlices=1000)
-        result = device_ids_lst_rdd.repartition(100).map(lambda x: update_tag3_user_portrait(x))
+        result = device_ids_lst_rdd.repartition(100).map(lambda x: update_tag3_user_portrait(x, redis_client))
        # result.foreach(print)
        result.collect()
        spark.stop()