Commit c60da982 authored by 赵威's avatar 赵威

Merge branch 'fe' into 'offic'

Fe

See merge request !21
parents e110acf5 a2df4105
......@@ -87,11 +87,11 @@ _int_columns = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
# "is_related_service",
# "effect_second_skip_num",
# "business_second_skip_num",
# "service_price",
# "service_sold_num",
]
_float_columns = [
"one_ctr",
......@@ -126,8 +126,8 @@ _float_columns = [
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
# "effect_second_skip_rate",
# "business_second_skip_rate",
]
_categorical_columns = [
"device_id",
......@@ -174,25 +174,25 @@ _categorical_columns = [
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"service_id",
"service_city",
"recommend_service_id",
"recommend_service_city",
"recommend_service_price",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
# "service_id",
# "service_city",
# "recommend_service_id",
# "recommend_service_city",
# "recommend_service_price",
# "device_fd2",
# "device_sd2",
# "device_fs2",
# "device_ss2",
# "device_fp2",
# "device_sp2",
# "device_p2",
# "device_fd3",
# "device_sd3",
# "device_fs3",
# "device_ss3",
# "device_fp3",
# "device_sp3",
# "device_p3",
]
PREDICTION_ALL_COLUMNS = _int_columns + _float_columns + _categorical_columns
......
......@@ -124,18 +124,18 @@ DIARY_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"is_related_service",
"effect_second_skip_num",
"business_second_skip_num",
"effect_second_skip_rate",
"business_second_skip_rate",
"service_id",
"service_price",
"service_sold_num",
"service_city",
"recommend_service_id",
"recommend_service_city",
"recommend_service_price",
# "is_related_service",
# "effect_second_skip_num",
# "business_second_skip_num",
# "effect_second_skip_rate",
# "business_second_skip_rate",
# "service_id",
# "service_price",
# "service_sold_num",
# "service_city",
# "recommend_service_id",
# "recommend_service_city",
# "recommend_service_price",
]
INT_COLUMNS = [
"active_days",
......@@ -213,10 +213,10 @@ INT_COLUMNS = [
"first_positions_num",
"second_positions_num",
"projects_num",
"effect_second_skip_num",
"business_second_skip_num",
"service_price",
"service_sold_num",
# "effect_second_skip_num",
# "business_second_skip_num",
# "service_price",
# "service_sold_num",
]
FLOAT_COLUMNS = [
"one_ctr",
......@@ -251,8 +251,8 @@ FLOAT_COLUMNS = [
"sixty_browse_duration_avg",
"ninety_browse_duration_avg",
"history_browse_duration_avg",
"effect_second_skip_rate",
"business_second_skip_rate",
# "effect_second_skip_rate",
# "business_second_skip_rate",
]
CATEGORICAL_COLUMNS = [
"device_id",
......@@ -305,26 +305,26 @@ CATEGORICAL_COLUMNS = [
"click_diary_id3",
"click_diary_id4",
"click_diary_id5",
"is_related_service",
"service_id",
"service_city",
"recommend_service_id",
"recommend_service_city",
"recommend_service_price",
"device_fd2",
"device_sd2",
"device_fs2",
"device_ss2",
"device_fp2",
"device_sp2",
"device_p2",
"device_fd3",
"device_sd3",
"device_fs3",
"device_ss3",
"device_fp3",
"device_sp3",
"device_p3",
# "is_related_service",
# "service_id",
# "service_city",
# "recommend_service_id",
# "recommend_service_city",
# "recommend_service_price",
# "device_fd2",
# "device_sd2",
# "device_fs2",
# "device_ss2",
# "device_fp2",
# "device_sp2",
# "device_p2",
# "device_fd3",
# "device_sd3",
# "device_fs3",
# "device_ss3",
# "device_fp3",
# "device_sp3",
# "device_p3",
]
CROSS_COLUMNS = [
["device_fd", "content_fd"],
......@@ -334,20 +334,20 @@ CROSS_COLUMNS = [
["device_fp", "content_fp"],
["device_sp", "content_sp"],
["device_p", "content_p"],
["device_fd2", "content_fd"],
["device_sd2", "content_sd"],
["device_fs2", "content_fs"],
["device_ss2", "content_ss"],
["device_fp2", "content_fp"],
["device_sp2", "content_sp"],
["device_p2", "content_p"],
["device_fd3", "content_fd"],
["device_sd3", "content_sd"],
["device_fs3", "content_fs"],
["device_ss3", "content_ss"],
["device_fp3", "content_fp"],
["device_sp3", "content_sp"],
["device_p3", "content_p"],
# ["device_fd2", "content_fd"],
# ["device_sd2", "content_sd"],
# ["device_fs2", "content_fs"],
# ["device_ss2", "content_ss"],
# ["device_fp2", "content_fp"],
# ["device_sp2", "content_sp"],
# ["device_p2", "content_p"],
# ["device_fd3", "content_fd"],
# ["device_sd3", "content_sd"],
# ["device_fs3", "content_fs"],
# ["device_ss3", "content_ss"],
# ["device_fp3", "content_fp"],
# ["device_sp3", "content_sp"],
# ["device_p3", "content_p"],
]
......@@ -422,15 +422,15 @@ def diary_feature_engineering(df):
diary_df["is_have_pure_reply"] = diary_df["is_have_pure_reply"].astype(int)
diary_df["is_have_reply"] = diary_df["is_have_reply"].astype(int)
diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
diary_df["service_id"] = diary_df["service_id"].astype(str)
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str)
diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(str)
# diary_df["is_related_service"] = diary_df["is_related_service"].astype(int)
# diary_df["service_id"] = diary_df["service_id"].astype(str)
# diary_df["recommend_service_id"] = diary_df["recommend_service_id"].astype(str)
# diary_df["recommend_service_price"] = diary_df["recommend_service_price"].astype(str)
diary_df["service_id"] = diary_df["service_id"].fillna("-1")
diary_df["service_city"] = diary_df["service_city"].fillna("")
diary_df["recommend_service_id"] = diary_df["recommend_service_id"].fillna("-1")
diary_df["recommend_service_city"] = diary_df["recommend_service_city"].fillna("")
# diary_df["service_id"] = diary_df["service_id"].fillna("-1")
# diary_df["service_city"] = diary_df["service_city"].fillna("")
# diary_df["recommend_service_id"] = diary_df["recommend_service_id"].fillna("-1")
# diary_df["recommend_service_city"] = diary_df["recommend_service_city"].fillna("")
diary_df = diary_df[DIARY_COLUMNS]
......@@ -463,21 +463,21 @@ def join_features(device_df, diary_df, cc_df):
df["device_sp"] = df["second_positions_x"].apply(lambda x: nth_element(x, 0))
df["device_p"] = df["projects_x"].apply(lambda x: nth_element(x, 0))
df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
# df["device_fd2"] = df["first_demands_x"].apply(lambda x: nth_element(x, 1))
# df["device_sd2"] = df["second_demands_x"].apply(lambda x: nth_element(x, 1))
# df["device_fs2"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 1))
# df["device_ss2"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 1))
# df["device_fp2"] = df["first_positions_x"].apply(lambda x: nth_element(x, 1))
# df["device_sp2"] = df["second_positions_x"].apply(lambda x: nth_element(x, 1))
# df["device_p2"] = df["projects_x"].apply(lambda x: nth_element(x, 1))
df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
# df["device_fd3"] = df["first_demands_x"].apply(lambda x: nth_element(x, 2))
# df["device_sd3"] = df["second_demands_x"].apply(lambda x: nth_element(x, 2))
# df["device_fs3"] = df["first_solutions_x"].apply(lambda x: nth_element(x, 2))
# df["device_ss3"] = df["second_solutions_x"].apply(lambda x: nth_element(x, 2))
# df["device_fp3"] = df["first_positions_x"].apply(lambda x: nth_element(x, 2))
# df["device_sp3"] = df["second_positions_x"].apply(lambda x: nth_element(x, 2))
# df["device_p3"] = df["projects_x"].apply(lambda x: nth_element(x, 2))
df["content_fd"] = df["first_demands_y"].apply(lambda x: nth_element(x, 0))
df["content_sd"] = df["second_demands_y"].apply(lambda x: nth_element(x, 0))
......@@ -566,20 +566,20 @@ def device_diary_fe(device_id, diary_ids, device_dict, diary_dict):
device_info["device_fp"] = nth_element(device_fp, 0)
device_info["device_sp"] = nth_element(device_sp, 0)
device_info["device_p"] = nth_element(device_p, 0)
device_info["device_fd2"] = nth_element(device_fd, 1)
device_info["device_sd2"] = nth_element(device_sd, 1)
device_info["device_fs2"] = nth_element(device_fs, 1)
device_info["device_ss2"] = nth_element(device_ss, 1)
device_info["device_fp2"] = nth_element(device_fp, 1)
device_info["device_sp2"] = nth_element(device_sp, 1)
device_info["device_p2"] = nth_element(device_p, 1)
device_info["device_fd3"] = nth_element(device_fd, 2)
device_info["device_sd3"] = nth_element(device_sd, 2)
device_info["device_fs3"] = nth_element(device_fs, 2)
device_info["device_ss3"] = nth_element(device_ss, 2)
device_info["device_fp3"] = nth_element(device_fp, 2)
device_info["device_sp3"] = nth_element(device_sp, 2)
device_info["device_p3"] = nth_element(device_p, 2)
# device_info["device_fd2"] = nth_element(device_fd, 1)
# device_info["device_sd2"] = nth_element(device_sd, 1)
# device_info["device_fs2"] = nth_element(device_fs, 1)
# device_info["device_ss2"] = nth_element(device_ss, 1)
# device_info["device_fp2"] = nth_element(device_fp, 1)
# device_info["device_sp2"] = nth_element(device_sp, 1)
# device_info["device_p2"] = nth_element(device_p, 1)
# device_info["device_fd3"] = nth_element(device_fd, 2)
# device_info["device_sd3"] = nth_element(device_sd, 2)
# device_info["device_fs3"] = nth_element(device_fs, 2)
# device_info["device_ss3"] = nth_element(device_ss, 2)
# device_info["device_fp3"] = nth_element(device_fp, 2)
# device_info["device_sp3"] = nth_element(device_sp, 2)
# device_info["device_p3"] = nth_element(device_p, 2)
diary_lst = []
diary_ids_res = []
for id in diary_ids:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment