change test file

a2fe59b3 · 张彦钊 · 0565065a · a2fe59b3
Commit a2fe59b3 authored Aug 16, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 39 additions and 4 deletions

monitor.py monitor.py +39 -4

No files found.
--- a/monitor.py
+++ b/monitor.py
@@ -44,8 +44,8 @@ def filter_na(x):
 def model(rdd):
    try:
-        rdd = rdd.filter(lambda x:Json(x)).repartition(10).map(lambda x:get_data(x))\
+        rdd = rdd.filter(lambda x:Json(x)).repartition(10).map(lambda x:get_data(x))
-            .map(lambda x:write_redis(x[0],x[1]))
+            # .map(lambda x:write_redis(x[0],x[1]))
        return  rdd
    except:
        print("fail")
@@ -89,12 +89,46 @@ def write_redis(device_id,cid_list):
                    r.set(key, json.dumps(cids))
                else:
                    r.set(key, json.dumps(cids))
-                    r.expire(key, 60*60)
+                    r.expire(key, 7*24*60*60)
        return "good"
    except Exception as e:
        print("insert redis fail")
        print(e)
+def group_redis(device_id,cid_list):
+    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
+    sql = "select b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b " \
+          "on a.tag_id = b.id where b.tag_type = '3' and a.diary_id in {}".format(tuple(cid_list))
+    cursor = db.cursor()
+    cursor.execute(sql)
+    result = cursor.fetchall()
+    tags = list(set([i[0] for i in result]))
+    if len(tags) != 0:
+        sql = "select a.id from src_mimas_prod_api_diary a left join src_mimas_prod_api_diary_tags b " \
+              "on a.id=b.diary_id left join src_zhengxing_api_tag c on b.tag_id=c.id " \
+              "where a.is_online = 1 and a.content_level >= '3' " \
+              "and c.id in {} and c.tag_type = '3'".format(tuple(tags))
+        cursor.execute(sql)
+        result = cursor.fetchall()
+        cids = list(set([i[0] for i in result]))
+        if len(cids) >1 :
+            r = redis.StrictRedis.from_url('redis://:ReDis!GmTx*0aN6@172.16.40.133:6379')
+            key = str(device_id) + "_dislike_diary"
+            if r.exists(key):
+                value = eval(r.get(key))
+                value.extend(cids)
+                cids = json.dumps(list(set(value)))
+                r.set(key, json.dumps(cids))
+            else:
+                r.set(key, json.dumps(cids))
+                r.expire(key, 7 * 24 * 60 * 60)
+def group_write(rdd):
+    rdd.groupByKey().map(lambda x,y:group_redis(x,y))
+    return good
 # Spark-Streaming-Kafka
 sc = SparkContext(conf=SparkConf().setMaster("spark://nvwa01:7077").setAppName("filter").set("spark.io.compression.codec", "lzf"))
 ssc=SQLContext(sc)
@@ -107,7 +141,7 @@ kafkaParams = {"metadata.broker.list": "172.16.44.25:9092,172.16.44.31:9092,172.
 stream = KafkaUtils.createDirectStream(ssc, ["gm-maidian-data"], kafkaParams)
-transformstream = stream.transform(lambda x:model(x))
+transformstream = stream.transform(lambda x:model(x)).foreachRDD(group_write)
 transformstream.pprint()
 ssc.start()
 ssc.awaitTermination()
\ No newline at end of file