Commit a2fe59b3 authored by 张彦钊's avatar 张彦钊

change test file

parent 0565065a
......@@ -44,8 +44,8 @@ def filter_na(x):
def model(rdd):
try:
rdd = rdd.filter(lambda x:Json(x)).repartition(10).map(lambda x:get_data(x))\
.map(lambda x:write_redis(x[0],x[1]))
rdd = rdd.filter(lambda x:Json(x)).repartition(10).map(lambda x:get_data(x))
# .map(lambda x:write_redis(x[0],x[1]))
return rdd
except:
print("fail")
......@@ -89,12 +89,46 @@ def write_redis(device_id,cid_list):
r.set(key, json.dumps(cids))
else:
r.set(key, json.dumps(cids))
r.expire(key, 60*60)
r.expire(key, 7*24*60*60)
return "good"
except Exception as e:
print("insert redis fail")
print(e)
def group_redis(device_id,cid_list):
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
sql = "select b.id from src_mimas_prod_api_diary_tags a left join src_zhengxing_api_tag b " \
"on a.tag_id = b.id where b.tag_type = '3' and a.diary_id in {}".format(tuple(cid_list))
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
tags = list(set([i[0] for i in result]))
if len(tags) != 0:
sql = "select a.id from src_mimas_prod_api_diary a left join src_mimas_prod_api_diary_tags b " \
"on a.id=b.diary_id left join src_zhengxing_api_tag c on b.tag_id=c.id " \
"where a.is_online = 1 and a.content_level >= '3' " \
"and c.id in {} and c.tag_type = '3'".format(tuple(tags))
cursor.execute(sql)
result = cursor.fetchall()
cids = list(set([i[0] for i in result]))
if len(cids) >1 :
r = redis.StrictRedis.from_url('redis://:ReDis!GmTx*0aN6@172.16.40.133:6379')
key = str(device_id) + "_dislike_diary"
if r.exists(key):
value = eval(r.get(key))
value.extend(cids)
cids = json.dumps(list(set(value)))
r.set(key, json.dumps(cids))
else:
r.set(key, json.dumps(cids))
r.expire(key, 7 * 24 * 60 * 60)
def group_write(rdd):
rdd.groupByKey().map(lambda x,y:group_redis(x,y))
return good
# Spark-Streaming-Kafka
sc = SparkContext(conf=SparkConf().setMaster("spark://nvwa01:7077").setAppName("filter").set("spark.io.compression.codec", "lzf"))
ssc=SQLContext(sc)
......@@ -107,7 +141,7 @@ kafkaParams = {"metadata.broker.list": "172.16.44.25:9092,172.16.44.31:9092,172.
stream = KafkaUtils.createDirectStream(ssc, ["gm-maidian-data"], kafkaParams)
transformstream = stream.transform(lambda x:model(x))
transformstream = stream.transform(lambda x:model(x)).foreachRDD(group_write)
transformstream.pprint()
ssc.start()
ssc.awaitTermination()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment