Commit 03f86762 authored by 张彦钊's avatar 张彦钊

删除device id特征

parent 1342e3ec
......@@ -45,12 +45,12 @@ def get_data():
print("after")
df = df.drop_duplicates()
df = df.drop_duplicates(["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1","l2", "time", "stat_date","device_id"])
"channel", "top", "l1","l2", "time", "stat_date"])
print(df.shape)
unique_values = []
features = ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date","device_id"]
"channel", "top", "time", "stat_date"]
for i in features:
df[i] = df[i].astype("str")
df[i] = df[i].fillna("lost")
......@@ -71,10 +71,11 @@ def get_data():
temp = list(range(1,len(unique_values)+1))
value_map = dict(zip(unique_values,temp))
df = df.drop("device_id", axis=1)
train = df[df["stat_date"] != validate_date+"stat_date"]
test = df[df["stat_date"] == validate_date+"stat_date"]
for i in ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1", "time", "stat_date","l2","device_id"]:
"channel", "top", "l1", "time", "stat_date","l2"]:
train[i] = train[i].map(value_map)
test[i] = test[i].map(value_map)
......@@ -107,8 +108,7 @@ def get_predict(date,value_map):
"from esmm_pre_data e left join user_feature u on e.device_id = u.device_id " \
"left join cid_type_top c on e.device_id = c.device_id " \
"left join cid_level2 cl on e.cid_id = cl.cid " \
"left join cid_time_cut cut on e.cid_id = cut.cid " \
"where e.device_id = '358035085192742'"
"left join cid_time_cut cut on e.cid_id = cut.cid"
df = con_sql(db, sql)
df = df.rename(columns={0: "y", 1: "z", 2: "label", 3: "ucity_id", 4: "clevel1_id", 5: "ccity_name",
6: "device_type", 7: "manufacturer", 8: "channel", 9: "top", 10: "l1",11:"l2",
......@@ -118,12 +118,10 @@ def get_predict(date,value_map):
print("predict shape")
print(df.shape)
df["uid"] = df["device_id"]
df["city"] = df["ucity_id"]
features = ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "time", "stat_date","device_id"]
"channel", "top", "time", "stat_date"]
for i in features:
df[i] = df[i].astype("str")
df[i] = df[i].fillna("lost")
......@@ -139,9 +137,8 @@ def get_predict(date,value_map):
nearby_pre = df[df["label"] == 1]
nearby_pre = nearby_pre.drop("label", axis=1)
for i in ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1", "time", "stat_date","l2","device_id"]:
"channel", "top", "l1", "time", "stat_date","l2"]:
native_pre[i] = native_pre[i].map(value_map)
# TODO 没有覆盖到的类别会处理成na,暂时用0填充,后续完善一下
native_pre[i] = native_pre[i].fillna(0)
......@@ -150,6 +147,7 @@ def get_predict(date,value_map):
# TODO 没有覆盖到的类别会处理成na,暂时用0填充,后续完善一下
nearby_pre[i] = nearby_pre[i].fillna(0)
print("native")
print(native_pre.shape)
print(native_pre.head())
......@@ -170,4 +168,3 @@ if __name__ == '__main__':
......@@ -29,7 +29,7 @@ def gen_tfrecords(in_file):
for i in range(df.shape[0]):
feats = ["ucity_id", "clevel1_id", "ccity_name", "device_type", "manufacturer",
"channel", "top", "l1", "time", "stat_date","l2","device_id"]
"channel", "top", "l1", "time", "stat_date","l2"]
id = np.array([])
for j in feats:
id = np.append(id,df[j][i])
......@@ -58,4 +58,4 @@ def main(_):
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
tf.app.run()
tf.app.run()
\ No newline at end of file
......@@ -53,7 +53,7 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
features = {
"y": tf.FixedLenFeature([], tf.float32),
"z": tf.FixedLenFeature([], tf.float32),
"ids": tf.FixedLenFeature([12], tf.int64)
"ids": tf.FixedLenFeature([11], tf.int64)
}
parsed = tf.parse_single_example(record, features)
......@@ -351,20 +351,6 @@ def main(_):
fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
elif FLAGS.task_type == 'export':
print("Not Implemented, Do It Yourself!")
#feature_spec = tf.feature_column.make_parse_example_spec(feature_columns)
#feature_spec = {
# 'feat_ids': tf.FixedLenFeature(dtype=tf.int64, shape=[None, FLAGS.field_size]),
# 'feat_vals': tf.FixedLenFeature(dtype=tf.float32, shape=[None, FLAGS.field_size])
#}
#serving_input_receiver_fn = tf.estimator.export.build_parsing_serving_input_receiver_fn(feature_spec)
#feature_spec = {
# 'feat_ids': tf.placeholder(dtype=tf.int64, shape=[None, FLAGS.field_size], name='feat_ids'),
# 'feat_vals': tf.placeholder(dtype=tf.float32, shape=[None, FLAGS.field_size], name='feat_vals')
#}
#serving_input_receiver_fn = tf.estimator.export.build_raw_serving_input_receiver_fn(feature_spec)
#Estimator.export_savedmodel(FLAGS.servable_model_dir, serving_input_receiver_fn)
if __name__ == "__main__":
tf.logging.set_verbosity(tf.logging.INFO)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment