Commit 14181a35 authored by 郭羽's avatar 郭羽

update feature

parent 44dda8af
......@@ -52,7 +52,7 @@ USER_MULTI_COLUMN_EXTRA_MAP = {"first_demands": 1,
}
ITEM_NUMBER_COLUMNS = ["lowest_price","smart_rank2","case_count","ordered_user_ids_count"]
ITEM_CATE_COLUMNS = ["service_type","doctor_type","doctor_id","doctor_famous","hospital_id","hospital_city_tag_id","hospital_type","hospital_is_high_quality"]
ITEM_CATE_COLUMNS = ["service_type","merchant_id","doctor_type","doctor_id","doctor_famous","hospital_id","hospital_city_tag_id","hospital_type","hospital_is_high_quality"]
NUMBER_PRECISION = 2
VERSION = configUtils.SERVICE_VERSION
......@@ -666,6 +666,8 @@ def parseSource(_source):
lowest_price_arr = _source.setdefault("lowest_price",[])
lowest_price = lowest_price_arr[0].setdefault("price",0.0) if len(lowest_price_arr) > 0 else 0.0
#merchant_id
merchant_id = _source.setdefault("merchant_id","-1")
# doctor_type id famous_doctor
doctor = _source.setdefault("doctor",{})
doctor_type = doctor.setdefault("doctor_type","-1")
......@@ -685,6 +687,7 @@ def parseSource(_source):
case_count,
service_type,
ordered_user_ids_count,
merchant_id,
doctor_type,
doctor_id,
doctor_famous,
......@@ -720,7 +723,7 @@ def get_service_feature_df(spark):
print("item size:",len(datas))
dataRDD = spark.sparkContext.parallelize(datas)
itemColumns = ['id', 'lowest_price', 'smart_rank2', 'case_count', 'service_type', 'ordered_user_ids_count',
itemColumns = ['id', 'lowest_price', 'smart_rank2', 'case_count', 'service_type', 'ordered_user_ids_count','merchant_id',
'doctor_type', 'doctor_id', 'doctor_famous', 'hospital_id', 'hospital_city_tag_id', 'hospital_type',
'hospital_is_high_quality', 'first_demands', 'second_demands', 'first_solutions',
'second_solutions', 'first_positions', 'second_positions', 'tags_v3']
......
......@@ -12,7 +12,7 @@ import utils.connUtils as connUtils
import utils.configUtils as configUtils
ITEM_NUMBER_COLUMNS = ["item_"+c for c in ["smart_rank2"]]
embedding_columns = ["itemid","userid"] + ["item_"+c for c in ["doctor_id","hospital_id"]]
embedding_columns = ["itemid","userid"] + ["item_"+c for c in ["doctor_id","hospital_id","merchant_id"]]
multi_columns = ["tags_v3","first_demands","second_demands","first_solutions","second_solutions","first_positions","second_positions"]
one_hot_columns = ["user_os"] + ["item_"+c for c in ["service_type","doctor_type","doctor_famous","hospital_city_tag_id","hospital_type","hospital_is_high_quality"]]
# history_columns = ["userRatedHistory"]
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment