Commit 9350f8f1 authored by 张彦钊's avatar 张彦钊

修改测试文件

parent a2049b7f
......@@ -18,15 +18,16 @@ def app_list_func(x,l):
return ",".join([str(j) for j in e])
def multi_hot(df,column,n):
df = df.select[column].fillna("lost_na")
app_list_value = [i.split(",") for i in df.select[column].unique()]
df = df.select(column).fillna("lost_na")
df.show(6)
app_list_value = [i.split(",") for i in df.select(column).unique()]
app_list_unique = []
for i in app_list_value:
app_list_unique.extend(i)
app_list_unique = list(set(app_list_unique))
number = len(app_list_unique)
app_list_map = dict(zip(app_list_unique, list(range(n, number + n))))
df = df.select[column].apply(app_list_func, args=(app_list_map,))
df = df.select(column).apply(app_list_func, args=(app_list_map,))
return number,app_list_map
def feature_engineer():
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment