Commit abb9f06b authored by 张彦钊's avatar 张彦钊

add

parent 772f5536
......@@ -11,15 +11,17 @@ from pyspark import StorageLevel
def position(x,y):
sum = 0
for i in x:
if int(i['absolute_position']) <= y:
sum = sum + 1
if 'absolute_position' in i:
if int(i['absolute_position']) <= y:
sum = sum + 1
return sum
def cpc(x,y):
sum = 0
for i in x:
if (int(i['absolute_position']) <= y) and (i["is_cpc"] == 1):
sum = sum + 1
if ("is_cpc" in i) and ('absolute_position' in i):
if (int(i['absolute_position']) <= y) and (i["is_cpc"] == 1):
sum = sum + 1
return sum
......@@ -37,34 +39,34 @@ if __name__ == '__main__':
sql = "select params['exposure_cards'] from online.ml_community_precise_exposure_detail " \
"where action = 'page_precise_exposure' and page_name = 'search_result_welfare' " \
"AND partition_date='20190926'"
df = spark.sql(sql)
df = spark.sql(sql).rdd
# # df.show(6)
# # params['exposure_cards'],
df.persist()
total = []
rdd = df.rdd.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
rdd = df.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
tmp = rdd.collect()[0]
total.append(tmp)
rdd = df.rdd.map(lambda x: ("a", position(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
rdd = df.map(lambda x: ("a", position(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
tmp = rdd.collect()[0]
total.append(tmp)
rdd = df.rdd.map(lambda x: ("a", position(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
rdd = df.map(lambda x: ("a", position(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
tmp = rdd.collect()[0]
total.append(tmp)
rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
tmp = rdd.collect()[0]
total.append(tmp)
rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
tmp = rdd.collect()[0]
total.append(tmp)
rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
tmp = rdd.collect()[0]
total.append(tmp)
rdd = df.rdd.map(lambda x: ("a", len(eval(x[0])))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
rdd = df.map(lambda x: ("a", len(eval(x[0])))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
tmp = rdd.collect()[0]
total.append(tmp)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment