Commit 06fe12d1 authored by 张彦钊's avatar 张彦钊

把最近一天的数据集放进训练集

parent 82076f91
...@@ -4,7 +4,6 @@ from pyspark.conf import SparkConf ...@@ -4,7 +4,6 @@ from pyspark.conf import SparkConf
import pytispark.pytispark as pti import pytispark.pytispark as pti
# from pyspark.sql import SQLContext # from pyspark.sql import SQLContext
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
from pyspark.sql.functions import _lit_doc
import datetime import datetime
import pandas as pd import pandas as pd
...@@ -133,7 +132,7 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map): ...@@ -133,7 +132,7 @@ def get_predict(date,value_map,app_list_map,level2_map,level3_map):
df = df.na.fill(dict(zip(features, features))) df = df.na.fill(dict(zip(features, features)))
df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer", df = df.drop_duplicates(["ucity_id", "level2_ids", "ccity_name", "device_type", "manufacturer",
"device_id","cid,id","label", "device_id","cid_id","label",
"channel", "top", "time", "app_list", "hospital_id", "level3_ids"]) "channel", "top", "time", "app_list", "hospital_id", "level3_ids"])
rdd = df.select("app_list", "level2_ids", "level3_ids","ucity_id","device_id","cid_id","label", "y", "z", rdd = df.select("app_list", "level2_ids", "level3_ids","ucity_id","device_id","cid_id","label", "y", "z",
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment