Commit 0f0fc498 authored by 张彦钊's avatar 张彦钊

修改测试文件

parent 41ea4ac6
......@@ -80,22 +80,25 @@ def feature_engineer():
"channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
df = df.fillna("na")
v = df.select("app_list").collect()
print(type(v))
print(v[:2])
app_list_value = [i.split(",") for i in set(df.select("app_list").collect())]
app_list_unique = []
for i in app_list_value:
app_list_unique.extend(i)
app_list_unique = list(set(app_list_unique))
number = len(app_list_unique)
app_list_map = dict(zip(app_list_unique, list(range(1, number + 1))))
df = df.select("app_list","ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer","channel",
"top", "time", "stat_date", "hospital_id", "level3_ids","y","z",
"treatment_method","price_min","price_max","treatment_time","maintain_time","recover_time")\
.map(lambda x :app_list_func(x[0],app_list_map))
df.show(6)
# app_list_value = [i.split(",") for i in v]
#
# app_list_unique = []
# for i in app_list_value:
# app_list_unique.extend(i)
# app_list_unique = list(set(app_list_unique))
# number = len(app_list_unique)
# app_list_map = dict(zip(app_list_unique, list(range(1, number + 1))))
#
# df = df.select("app_list","ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer","channel",
# "top", "time", "stat_date", "hospital_id", "level3_ids","y","z",
# "treatment_method","price_min","price_max","treatment_time","maintain_time","recover_time")\
# .map(lambda x :app_list_func(x[0],app_list_map))
#
# df.show(6)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment