Commit 055f2a56 authored by 赵威's avatar 赵威

update field

parent 847e44b0
...@@ -19,15 +19,15 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict ...@@ -19,15 +19,15 @@ from models.esmm.model import esmm_model_fn, model_export, model_predict
def main(): def main():
time_begin = time.time() time_begin = time.time()
# df = get_device_df_from_redis() # df = get_device_df_from_redis()
# df2 = get_diary_df_from_redis() df2 = get_diary_df_from_redis()
# print(df2.sample(1)) print(df2.sample(1))
# print(df.size) # print(df.size)
# print(df2.size) # print(df2.size)
# a = device_feature_engineering(df) # a = device_feature_engineering(df)
# print(a.size) # print(a.size)
# b = diary_feature_engineering(df2) b = diary_feature_engineering(df2, from_redis=True)
# print(b.sample(1)) print(b.sample(1))
device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/")) device_df, diary_df, click_df, conversion_df = read_csv_data(Path("~/data/cvr_data/"))
print(diary_df.sample(1)) print(diary_df.sample(1))
......
...@@ -66,7 +66,7 @@ def device_feature_engineering(df): ...@@ -66,7 +66,7 @@ def device_feature_engineering(df):
return device_df[device_columns] return device_df[device_columns]
def diary_feature_engineering(df): def diary_feature_engineering(df, from_redis=False):
diary_df = df.copy() diary_df = df.copy()
str_bool_map = {"true": True, "false": False} str_bool_map = {"true": True, "false": False}
...@@ -86,9 +86,10 @@ def diary_feature_engineering(df): ...@@ -86,9 +86,10 @@ def diary_feature_engineering(df):
diary_df["second_positions"] = diary_df["second_positions"].apply(lambda d: d if isinstance(d, list) else []) diary_df["second_positions"] = diary_df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
diary_df["projects"] = diary_df["projects"].apply(lambda d: d if isinstance(d, list) else []) diary_df["projects"] = diary_df["projects"].apply(lambda d: d if isinstance(d, list) else [])
# diary_df["is_pure_author"] = diary_df["is_pure_author"].map(str_bool_map) if from_redis:
# diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].map(str_bool_map) diary_df["is_pure_author"] = diary_df["is_pure_author"].map(str_bool_map)
# diary_df["is_have_reply"] = diary_df["is_have_reply"].map(str_bool_map) diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].map(str_bool_map)
diary_df["is_have_reply"] = diary_df["is_have_reply"].map(str_bool_map)
diary_df["is_pure_author"] = diary_df["is_pure_author"].astype(int) diary_df["is_pure_author"] = diary_df["is_pure_author"].astype(int)
diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int) diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment