Commit b69ad3c9 authored by 高雅喆's avatar 高雅喆

update

parent ac75cb2b
......@@ -68,25 +68,25 @@ if __name__ == '__main__':
all_3tag_2tag = get_all_3tag_2tag()
# rdd
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
# .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
# .set("spark.tispark.plan.allow_index_double_read", "false") \
# .set("spark.tispark.plan.allow_index_read", "true") \
# .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
# .set("spark.tispark.pd.addresses", "172.16.40.170:2379").set("spark.io.compression.codec", "lzf") \
# .set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
#
# spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
# spark.sparkContext.setLogLevel("WARN")
# spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")
# device_ids_lst_rdd = spark.sparkContext.parallelize(action_type)
# print("="*100)
# print(action_type)
# print(type(device_ids_lst_rdd))
# print(device_ids_lst_rdd)
# print("=" * 100)
# result = device_ids_lst_rdd.repartition(100).map(
# lambda x: args_test(x))
# print(result)
# print(result.collect())
# print(result.foreach(print))
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
.set("spark.tispark.plan.allow_index_double_read", "false") \
.set("spark.tispark.plan.allow_index_read", "true") \
.set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
.set("spark.tispark.pd.addresses", "172.16.40.170:2379").set("spark.io.compression.codec", "lzf") \
.set("spark.driver.maxResultSize", "8g").set("spark.sql.avro.compression.codec", "snappy")
spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
spark.sparkContext.setLogLevel("WARN")
spark.sparkContext.addPyFile("/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py")
device_ids_lst_rdd = spark.sparkContext.parallelize(action_type)
print("="*100)
print(action_type)
print(type(device_ids_lst_rdd))
print(device_ids_lst_rdd)
print("=" * 100)
result = device_ids_lst_rdd.repartition(100).map(
lambda x: args_test(x))
print(result)
print(result.collect())
print(result.foreach(print))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment