Commit 4490b0c6 authored by 高雅喆's avatar 高雅喆

update

parent 55fbd6b4
......@@ -10,7 +10,6 @@ from email.mime.application import MIMEApplication
import redis
import datetime
from pyspark import SparkConf
from pyspark import SparkContext
import time
from pyspark.sql import SparkSession
import json
......@@ -79,14 +78,13 @@ sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true")
.set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
.set("spark.tispark.pd.addresses", "172.16.40.170:2379").set("spark.io.compression.codec", "lzf") \
.set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
sc = SparkContext()
sqlContext = SparkSession(sc)
spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
spark.sparkContext.setLogLevel("WARN")
spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")
device_ids_lst_rdd = spark.sparkContext.parallelize(device_info)
result = device_ids_lst_rdd.repartition(100).map(lambda x: get_user_service_portrait(x, all_word_tags, all_tag_tag_type, all_3tag_2tag, all_tags_name, size=None, pay_time=pay_time))
result.collect()
result.write.format('csv').save("~/test_df.csv")
print(result.take(10))
# result.write.format('csv').save("~/test_df.csv")
spark.stop()
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment