diff --git a/tensnsorflow/feature_engineering.py b/tensnsorflow/feature_engineering.py index e1938f6524dac7ddaa1c98b0a50df4e54fd1b4fb..18c99117f4fb921f0da48e06f9c3dc696dc0d5e4 100644 --- a/tensnsorflow/feature_engineering.py +++ b/tensnsorflow/feature_engineering.py @@ -188,7 +188,8 @@ def feature_engineer(): "tag1","tag2","tag3","tag4","tag5","tag6","tag7", "ucity_id", "ccity_name","device_type", "manufacturer", "channel", "top", "time", "hospital_id","treatment_method", "price_min", "price_max", "treatment_time", - "maintain_time","recover_time").rdd.repartition(200).map(lambda x: (x[0],float(x[1]),float(x[2]),app_list_func(x[3], app_list_map), app_list_func(x[4], leve2_map), + "maintain_time","recover_time").rdd.coalesce(200).map(lambda x: (x[0],float(x[1]),float(x[2]), + app_list_func(x[3], app_list_map), app_list_func(x[4], level2_map), app_list_func(x[5], level3_map), app_list_func(x[6], level2_map),app_list_func(x[7], level2_map), app_list_func(x[8], level2_map), app_list_func(x[9], level2_map),app_list_func(x[10], level2_map), app_list_func(x[11], level2_map),app_list_func(x[12], level2_map), @@ -196,9 +197,9 @@ def feature_engineer(): value_map[x[17]],value_map[x[18]], value_map[x[19]], value_map[x[20]],value_map[x[21]], value_map[x[22]], value_map[x[23]], value_map[x[24]],value_map[x[25]],value_map[x[26]]])) d = time.time() - print("rdd") - print((d-c)/60) rdd.persist() + print("rdd") + print((d - c) / 60) # TODO 上线åŽæŠŠä¸‹é¢train fliter åˆ é™¤ï¼Œå› ä¸ºæœ€è¿‘ä¸€å¤©çš„æ•°æ®ä¹Ÿè¦ä½œä¸ºè®ç»ƒé›† train = rdd.filter(lambda x: x[0] != validate_date).map(lambda x:(x[1],x[2],x[3],x[4],x[5],x[6],x[7],x[8],x[9],