diff --git a/tensnsorflow/zhang.py b/tensnsorflow/zhang.py index ec118bd95816d2a363db62defc63b8f8e6b5a396..2edfd392dad0c29b175f048e5bc31afcb13678c4 100644 --- a/tensnsorflow/zhang.py +++ b/tensnsorflow/zhang.py @@ -297,7 +297,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map): "left join jerry_test.cart_tag cart on e.device_id = cart.device_id " \ "left join jerry_test.knowledge k on feat.level2 = k.level2_id " \ "left join jerry_test.search_doris doris on e.device_id = doris.device_id and e.stat_date = doris.get_date " \ - "limit 60000" + "limit 600000" features = ["ucity_id", "ccity_name", "device_type", "manufacturer", "channel", "top", "time", "hospital_id", @@ -334,6 +334,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map): rdd.persist(storageLevel= StorageLevel.MEMORY_ONLY_SER) + println(rdd.count()) native_pre = spark.createDataFrame(rdd.filter(lambda x:x[0] == 0).map(lambda x:(x[3],x[4],x[5])))\ .toDF("city","uid","cid_id")