Commit a2049b7f authored by 张彦钊's avatar 张彦钊

修改测试文件

parent 6122a37d
...@@ -18,15 +18,15 @@ def app_list_func(x,l): ...@@ -18,15 +18,15 @@ def app_list_func(x,l):
return ",".join([str(j) for j in e]) return ",".join([str(j) for j in e])
def multi_hot(df,column,n): def multi_hot(df,column,n):
df[column] = df[column].fillna("lost_na") df = df.select[column].fillna("lost_na")
app_list_value = [i.split(",") for i in df[column].unique()] app_list_value = [i.split(",") for i in df.select[column].unique()]
app_list_unique = [] app_list_unique = []
for i in app_list_value: for i in app_list_value:
app_list_unique.extend(i) app_list_unique.extend(i)
app_list_unique = list(set(app_list_unique)) app_list_unique = list(set(app_list_unique))
number = len(app_list_unique) number = len(app_list_unique)
app_list_map = dict(zip(app_list_unique, list(range(n, number + n)))) app_list_map = dict(zip(app_list_unique, list(range(n, number + n))))
df[column] = df[column].apply(app_list_func, args=(app_list_map,)) df = df.select[column].apply(app_list_func, args=(app_list_map,))
return number,app_list_map return number,app_list_map
def feature_engineer(): def feature_engineer():
...@@ -75,11 +75,13 @@ def feature_engineer(): ...@@ -75,11 +75,13 @@ def feature_engineer():
hospital = spark.sql(sql) hospital = spark.sql(sql)
df = df.join(hospital,"diary_service_id","left_outer").fillna("na") df = df.join(hospital,"diary_service_id","left_outer").fillna("na")
print(df.count())
df = df.drop("level2").drop("diary_service_id") df = df.drop("level2").drop("diary_service_id")
df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer", df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"]) "channel", "top", "time", "stat_date", "app_list", "hospital_id", "level3_ids"])
print(df.count())
multi_hot(df, "app_list", 1)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment