Commit 38964cf1 authored by 张彦钊's avatar 张彦钊

训练集去重

parent 1fdbee9f
......@@ -145,7 +145,7 @@ def get_data():
validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=30)).strftime("%Y-%m-%d")
start = (temp - datetime.timedelta(days=60)).strftime("%Y-%m-%d")
print(start)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select e.y,e.z,e.stat_date,e.ucity_id,e.clevel1_id,e.ccity_name," \
......@@ -160,6 +160,11 @@ def get_data():
6:"device_type",7:"manufacturer",8:"channel",9:"top",10:"level2_ids",11:"device_id"})
print("esmm data ok")
# print(df.head(2)
print("before")
print(df.shape)
print("after")
df = df.drop_duplicates()
print(df.shape)
df["clevel1_id"] = df["clevel1_id"].astype("str")
df["y"] = df["y"].astype("str")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment