Commit 8ec237c0 authored by 张彦钊's avatar 张彦钊

修改测试文件

parent 648f190f
......@@ -90,7 +90,7 @@ def feature_engineer():
rdd = df.select("app_list","level2_ids","level3_ids","stat_date","ucity_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "hospital_id","treatment_method", "price_min",
"price_max", "treatment_time","maintain_time", "recover_time","y","z",).rdd
"price_max", "treatment_time","maintain_time", "recover_time","y","z").rdd
rdd.persist()
# TODO 上线后把下面train fliter 删除,因为最近一天的数据也要作为训练集
train = rdd.filter(lambda x: x[3]!= validate_date).map(lambda x: (app_list_func(x[0], app_list_map), app_list_func(x[1], leve2_map),
......@@ -201,7 +201,7 @@ def test():
from hdfs.ext.dataframe import read_dataframe
client = InsecureClient('http://nvwa01:50070')
df = read_dataframe(client,"/recommend/va/part-00199-a6aad7c8-149c-4eee-b718-5d350b26e8d2-c000.avro")
df = read_dataframe(client,"/recommend/va/part-00000-a6aad7c8-149c-4eee-b718-5d350b26e8d2-c000.avro")
print(df.head())
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment