Commit 7c7cdda8 authored by 张彦钊's avatar 张彦钊

add

parent 8d90596e
......@@ -8,17 +8,13 @@ import time
from pyspark import StorageLevel
def ctr(x):
def position(x,y):
sum = 0
for i in x:
if i['is_cpc'] == 1:
if int(i['absolute_position']) <= y:
sum = sum + 1
return sum
def f(x):
print(x)
if __name__ == '__main__':
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......@@ -30,7 +26,7 @@ if __name__ == '__main__':
.set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
sql = "select partition_date from online.ml_community_precise_exposure_detail " \
sql = "select params['exposure_cards'] from online.ml_community_precise_exposure_detail " \
"where action = 'page_precise_exposure' and page_name = 'search_result_welfare' " \
"AND partition_date='20190926' limit 20"
df = spark.sql(sql)
......@@ -38,9 +34,9 @@ if __name__ == '__main__':
# # params['exposure_cards'],
rdd = df.rdd.map(lambda x:(x[0],))
#
print(rdd.collect())
# print(rdd.collect())
#
spark.createDataFrame(rdd).show(6)
spark.createDataFrame(rdd).show(20)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment