Commit 37995a94 authored by 郭羽's avatar 郭羽

update feature

parent 5c3a1c78
......@@ -112,6 +112,7 @@ def addItemFeatures(samples,itemDF,dataVocab,multiVocab):
samples = samples.join(staticFeatures, on=['itemid'], how='left')
print("连续特征处理...")
# todo 分桶比较耗时,可以考虑做非线性转换
# 连续特征处理
pipelineStage = []
# Normalization
......
......@@ -14,7 +14,7 @@ import utils.configUtils as configUtils
ITEM_NUMBER_COLUMNS = ["item_"+c for c in ["smart_rank2"]]
embedding_columns = ["itemid","userid"] + ["item_"+c for c in ["doctor_id","hospital_id"]]
multi_columns = ["tags_v3","first_demands","second_demands","first_solutions","second_solutions","first_positions","second_positions"]
one_hot_columns = ["item_"+c for c in ["service_type","doctor_type","doctor_famous","hospital_city_tag_id","hospital_type","hospital_is_high_quality"]]
one_hot_columns = ["user_os"] + ["item_"+c for c in ["service_type","doctor_type","doctor_famous","hospital_city_tag_id","hospital_type","hospital_is_high_quality"]]
# history_columns = ["userRatedHistory"]
# 数据加载
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment