Commit 2a413f5c authored by 张彦钊's avatar 张彦钊

multi hot

parent 96420a74
......@@ -78,22 +78,6 @@ def get_data():
return validate_date,value_map
def multi_hot(df,i,n):
ItemID_set = set()
for i in df[i].unique():
ItemID_set.update(set(i.split(",")))
ItemID2int = dict(zip(list(ItemID_set),list(range(n+1,n+1+len(ItemID_set),1))))
ItemID_map = {val: [ItemID2int[row] for row in val.split(',')] \
for ii, val in enumerate(set(df[1]))}
ItemID_map_max_len = 3
for key in ItemID_map:
for cnt in range(ItemID_map_max_len - len(ItemID_map[key])):
ItemID_map[key].insert(len(ItemID_map[key]) + cnt, 88)
df[i] = df[i].map(ItemID_map)
def write_csv(df,name,n):
for i in range(0, df.shape[0], n):
if i == 0:
......
......@@ -26,7 +26,7 @@ def multi():
df["l2"] = "lost"
df["l3"] = "lost"
for i in list(df["level"].unique()):
l = i.split(",")
l = i.split(";")
if len(l) == 3:
df.loc[df["level"] == i, ["l1"]] = l[0]
df.loc[df["level"] == i, ["l2"]] = l[1]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment