Commit 0e241f2e authored by 张彦钊's avatar 张彦钊

change test file

parent 4ba42d54
......@@ -188,7 +188,8 @@ def feature_engineer():
"tag1","tag2","tag3","tag4","tag5","tag6","tag7",
"ucity_id", "ccity_name","device_type", "manufacturer", "channel", "top", "time",
"hospital_id","treatment_method", "price_min", "price_max", "treatment_time",
"maintain_time","recover_time").rdd.repartition(200).map(lambda x: (x[0],float(x[1]),float(x[2]),app_list_func(x[3], app_list_map), app_list_func(x[4], leve2_map),
"maintain_time","recover_time").rdd.coalesce(200).map(lambda x: (x[0],float(x[1]),float(x[2]),
app_list_func(x[3], app_list_map), app_list_func(x[4], level2_map),
app_list_func(x[5], level3_map), app_list_func(x[6], level2_map),app_list_func(x[7], level2_map),
app_list_func(x[8], level2_map), app_list_func(x[9], level2_map),app_list_func(x[10], level2_map),
app_list_func(x[11], level2_map),app_list_func(x[12], level2_map),
......@@ -196,9 +197,9 @@ def feature_engineer():
value_map[x[17]],value_map[x[18]], value_map[x[19]], value_map[x[20]],value_map[x[21]],
value_map[x[22]], value_map[x[23]], value_map[x[24]],value_map[x[25]],value_map[x[26]]]))
d = time.time()
print("rdd")
print((d-c)/60)
rdd.persist()
print("rdd")
print((d - c) / 60)
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
train = rdd.filter(lambda x: x[0] != validate_date).map(lambda x:(x[1],x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment