add

abb9f06b · 张彦钊 · 772f5536 · abb9f06b
Commit abb9f06b authored Sep 27, 2019 by 张彦钊
Show whitespace changes
Inline Side-by-side

Showing with 10 additions and 8 deletions

hello.py hello.py +10 -8

No files found.
--- a/hello.py
+++ b/hello.py
@@ -11,6 +11,7 @@ from pyspark import StorageLevel
 def position(x,y):
    sum = 0
    for i in x:
+        if 'absolute_position' in i:
            if int(i['absolute_position']) <= y:
                sum = sum + 1
    return sum
@@ -18,6 +19,7 @@ def position(x,y):
 def cpc(x,y):
    sum = 0
    for i in x:
+        if ("is_cpc" in i) and ('absolute_position' in i):
            if (int(i['absolute_position']) <= y) and (i["is_cpc"] == 1):
                sum = sum + 1
    return sum
@@ -37,34 +39,34 @@ if __name__ == '__main__':
    sql = "select params['exposure_cards'] from online.ml_community_precise_exposure_detail " \
          "where action = 'page_precise_exposure' and page_name = 'search_result_welfare' " \
          "AND partition_date='20190926'"
-    df = spark.sql(sql)
+    df = spark.sql(sql).rdd
    # # df.show(6)
    # # params['exposure_cards'],
    df.persist()
    total = []
-    rdd = df.rdd.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
+    rdd = df.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", position(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", position(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", position(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", position(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", len(eval(x[0])))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", len(eval(x[0])))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)