Commit 454179e8 authored by 张彦钊's avatar 张彦钊

训练集按照feat去重

parent 38964cf1
......@@ -137,6 +137,7 @@ class multiFFMFormatPandas:
return False
def check():
print("check")
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from esmm_train_data"
validate_date = con_sql(db, sql)[0].values.tolist()[0]
......@@ -173,7 +174,7 @@ def check():
print("before drop duplicate")
print(df.shape[0])
print("after drop duplicate")
df = df.drop_duplicates()
df = df.drop_duplicates("feat")
print(df.shape[0])
print("after group by")
print(len(df.groupby("feat")))
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment