add

45302231 · 张彦钊 · 42843dd9 · 45302231
Commit 45302231 authored Sep 27, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 1 deletion

hello.py hello.py +17 -1

No files found.
--- a/hello.py
+++ b/hello.py
@@ -15,6 +15,14 @@ def position(x,y):
            sum = sum + 1
    return sum
+def cpc(x,y):
+    sum = 0
+    for i in x:
+        if int(i['absolute_position']) <= y and i["is_cpc"] == 1:
+            sum = sum + 1
+    return sum
 if __name__ == '__main__':
    sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
@@ -32,8 +40,16 @@ if __name__ == '__main__':
    df = spark.sql(sql)
    # # df.show(6)
    # # params['exposure_cards'],
+    df.persist()
    rdd = df.rdd.map(lambda x:("a",position(eval(x[0]),10))).reduceByKey(lambda x,y:x+y).map(lambda x:x[1])
-    print(rdd.collect())
+    print("<=10")
+    print(rdd.collect()[0])
+    rdd = df.rdd.map(lambda x: ("a", cpc(eval(x[0]), 10))).reduceByKey(lambda x, y: x + y).map(lambda x: x[1])
+    print("<=10 cpc ==1 ")
+    print(rdd.collect()[0])
+    df.unpersist()