Commit 5d132b92 authored by 张彦钊's avatar 张彦钊

change test fliw

parent cec6a113
......@@ -139,7 +139,7 @@ def feature_engineer():
validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=100)).strftime("%Y-%m-%d")
start = (temp - datetime.timedelta(days=10)).strftime("%Y-%m-%d")
print(start)
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC')
......@@ -266,6 +266,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
"treatment_method", "price_min", "price_max", "treatment_time", "maintain_time", "recover_time"]
df = spark.sql(sql)
df = df.drop_duplicates(["treatment_method","price_min","price_max","treatment_time","maintain_time","recover_time"])
df = df.na.fill(dict(zip(features, features)))
f = time.time()
rdd = df.select("label", "y", "z", "ucity_id", "device_id", "cid_id", "app_list", "level2_ids", "level3_ids",
......@@ -295,6 +296,9 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
.toDF("city","uid","cid_id")
print("native csv")
native_pre.toPandas().to_csv(local_path+"native.csv", header=True)
print("预测集总数")
print(rdd.count())
# TODO 写成csv文件改成下面这样
# native_pre.coalesce(1).write.format('com.databricks.spark.csv').save(path+"native/",header = 'true')
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment