add

abb9f06b · 张彦钊 · 772f5536 · abb9f06b
Commit abb9f06b authored Sep 27, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 14 additions and 12 deletions

hello.py hello.py +14 -12

No files found.
--- a/hello.py
+++ b/hello.py
@@ -11,15 +11,17 @@ from pyspark import StorageLevel
 def position(x,y):
    sum = 0
    for i in x:
-        if int(i['absolute_position']) <= y:
+        if 'absolute_position' in i:
-            sum = sum + 1
+            if int(i['absolute_position']) <= y:
+                sum = sum + 1
    return sum
 def cpc(x,y):
    sum = 0
    for i in x:
-        if (int(i['absolute_position']) <= y) and (i["is_cpc"] == 1):
+        if ("is_cpc" in i) and ('absolute_position' in i):
-            sum = sum + 1
+            if (int(i['absolute_position']) <= y) and (i["is_cpc"] == 1):
+                sum = sum + 1
    return sum
@@ -37,34 +39,34 @@ if __name__ == '__main__':
    sql = "select params['exposure_cards'] from online.ml_community_precise_exposure_detail " \
          "where action = 'page_precise_exposure' and page_name = 'search_result_welfare' " \
          "AND partition_date='20190926'"
-    df = spark.sql(sql)
+    df = spark.sql(sql).rdd
    # # df.show(6)
    # # params['exposure_cards'],
    df.persist()
    total = []
-    rdd = df.rdd.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
+    rdd = df.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", position(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", position(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", position(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", position(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 30))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", cpc(eval(x[0]), 50))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)
-    rdd = df.rdd.map(lambda x: ("a", len(eval(x[0])))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    rdd = df.map(lambda x: ("a", len(eval(x[0])))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
    tmp = rdd.collect()[0]
    total.append(tmp)