Commit 3eaa7b18 authored by 张彦钊's avatar 张彦钊

修改测试文件

parent c89eb6de
...@@ -36,7 +36,7 @@ def feature_engineer(): ...@@ -36,7 +36,7 @@ def feature_engineer():
validate_date = con_sql(db, sql)[0].values.tolist()[0] validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date) print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d") temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=3)).strftime("%Y-%m-%d") start = (temp - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
print(start) print(start)
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
...@@ -44,8 +44,8 @@ def feature_engineer(): ...@@ -44,8 +44,8 @@ def feature_engineer():
.set("spark.tispark.plan.allow_index_double_read", "false") \ .set("spark.tispark.plan.allow_index_double_read", "false") \
.set("spark.tispark.plan.allow_index_read", "true") \ .set("spark.tispark.plan.allow_index_read", "true") \
.set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \ .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
.set("spark.tispark.pd.addresses", "172.16.40.158:2379").set("spark.io.compression.codec", "lzf")\ .set("spark.tispark.pd.addresses", "172.16.40.158:2379").set("spark.io.compression.codec", "lzf")
.set("spark.driver.maxResultSize", "4g") # .set("spark.driver.maxResultSize", "4g")
spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate() spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
ti = pti.TiContext(spark) ti = pti.TiContext(spark)
...@@ -118,11 +118,12 @@ def feature_engineer(): ...@@ -118,11 +118,12 @@ def feature_engineer():
value_map[x[13]], value_map[x[14]], value_map[x[15]], value_map[x[16]], value_map[x[13]], value_map[x[14]], value_map[x[15]], value_map[x[16]],
value_map[x[17]], x[18], x[19])) value_map[x[17]], x[18], x[19]))
print("test.count",test.count()) # print("test.count",test.count())
print("train count",train.count()) # print("train count",train.count())
spark.createDataFrame(test).write.csv('/recommend/va', mode='overwrite', header=True) spark.createDataFrame(test).write.csv('/recommend/va', mode='overwrite', header=True)
spark.createDataFrame(train).write.csv('/recommend/tr', mode='overwrite', header=True) spark.createDataFrame(train).write.csv('/recommend/tr', mode='overwrite', header=True)
print("done")
return validate_date,value_map,app_list_map,leve2_map,leve3_map return validate_date,value_map,app_list_map,leve2_map,leve3_map
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment