Commit b69ad3c9 authored by 高雅喆's avatar 高雅喆

update

parent ac75cb2b
...@@ -68,25 +68,25 @@ if __name__ == '__main__': ...@@ -68,25 +68,25 @@ if __name__ == '__main__':
all_3tag_2tag = get_all_3tag_2tag() all_3tag_2tag = get_all_3tag_2tag()
# rdd # rdd
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
# .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \ .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
# .set("spark.tispark.plan.allow_index_double_read", "false") \ .set("spark.tispark.plan.allow_index_double_read", "false") \
# .set("spark.tispark.plan.allow_index_read", "true") \ .set("spark.tispark.plan.allow_index_read", "true") \
# .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \ .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
# .set("spark.tispark.pd.addresses", "172.16.40.170:2379").set("spark.io.compression.codec", "lzf") \ .set("spark.tispark.pd.addresses", "172.16.40.170:2379").set("spark.io.compression.codec", "lzf") \
# .set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy") .set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
#
# spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate() spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
# spark.sparkContext.setLogLevel("WARN") spark.sparkContext.setLogLevel("WARN")
# spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py") spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")
# device_ids_lst_rdd = spark.sparkContext.parallelize(action_type) device_ids_lst_rdd = spark.sparkContext.parallelize(action_type)
# print("="*100) print("="*100)
# print(action_type) print(action_type)
# print(type(device_ids_lst_rdd)) print(type(device_ids_lst_rdd))
# print(device_ids_lst_rdd) print(device_ids_lst_rdd)
# print("=" * 100) print("=" * 100)
# result = device_ids_lst_rdd.repartition(100).map( result = device_ids_lst_rdd.repartition(100).map(
# lambda x: args_test(x)) lambda x: args_test(x))
# print(result) print(result)
# print(result.collect()) print(result.collect())
# print(result.foreach(print)) print(result.foreach(print))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment