Commit 98d4166e authored by 赵威's avatar 赵威

update null checker

parent 34560c08
...@@ -14,9 +14,11 @@ def click_feature_engineering(click_df, conversion_df): ...@@ -14,9 +14,11 @@ def click_feature_engineering(click_df, conversion_df):
cc_df.drop(["partition_date_x", "partition_date_y"], axis=1, inplace=True) cc_df.drop(["partition_date_x", "partition_date_y"], axis=1, inplace=True)
cc_df["conversion_label"].fillna(0, inplace=True) cc_df["conversion_label"].fillna(0, inplace=True)
print("click:") print("click: " + str(cc_df.shape))
nullseries = cc_df.isnull().sum() nullseries = cc_df.isnull().sum()
print(nullseries[nullseries > 0]) nulls = nullseries[nullseries > 0]
print(cc_df.shape) if nulls.any():
print(nulls)
print("!!!!!!!!!!!!!!!!!!!!!!\n")
return cc_df return cc_df
...@@ -87,7 +87,9 @@ def device_feature_engineering(device_df, content_type): ...@@ -87,7 +87,9 @@ def device_feature_engineering(device_df, content_type):
columns = TRACTATE_DEVICE_COLUMNS columns = TRACTATE_DEVICE_COLUMNS
nullseries = df.isnull().sum() nullseries = df.isnull().sum()
print("device:") print("device: " + str(df.shape))
print(nullseries[nullseries > 0]) nulls = nullseries[nullseries > 0]
print(df.shape) if nulls.any():
print(nulls)
print("!!!!!!!!!!!!!!!!!!!!!!\n")
return df[columns] return df[columns]
...@@ -85,10 +85,12 @@ def diary_feature_engineering(df): ...@@ -85,10 +85,12 @@ def diary_feature_engineering(df):
diary_df = diary_df[DIARY_COLUMNS] diary_df = diary_df[DIARY_COLUMNS]
print("diary:") print("diary: " + str(diary_df.shape))
nullseries = diary_df.isnull().sum() nullseries = diary_df.isnull().sum()
print(nullseries[nullseries > 0]) nulls = nullseries[nullseries > 0]
print(diary_df.shape) if nulls.any():
print(nulls)
print("!!!!!!!!!!!!!!!!!!!!!!\n")
return diary_df return diary_df
......
...@@ -86,10 +86,12 @@ def tractate_feature_engineering(tractate_df): ...@@ -86,10 +86,12 @@ def tractate_feature_engineering(tractate_df):
df = df[TRACTATE_COLUMNS] df = df[TRACTATE_COLUMNS]
print("tractate:") print("tractate: " + str(df.shape))
nullseries = df.isnull().sum() nullseries = df.isnull().sum()
print(nullseries[nullseries > 0]) nulls = nullseries[nullseries > 0]
print(df.shape) if nulls.any():
print(nulls)
print("!!!!!!!!!!!!!!!!!!!!!!\n")
return df return df
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment