Commit 41a53fa4 authored by 张彦钊's avatar 张彦钊

delete prints

parent 36a2cdf5
...@@ -174,7 +174,8 @@ def get_data(): ...@@ -174,7 +174,8 @@ def get_data():
features = features + len(df[i].unique()) features = features + len(df[i].unique())
print("fields:{}".format(df.shape[1]-1)) print("fields:{}".format(df.shape[1]-1))
print("features:{}".format(features)) print("features:{}".format(features))
filter_list = ["ccity_name","ucity_id","manufacturer","channel","level2_ids"] # filter_list 中没有device_type,这个类别只有安卓、ios两种类型,转化前能完全覆盖到这两种类型
filter_list = ["ccity_name","ucity_id","manufacturer","channel","level2_ids","clevel1_id","top"]
column_map = dict() column_map = dict()
for i in filter_list: for i in filter_list:
column_map[i] = list(set(df[i].values.tolist())) column_map[i] = list(set(df[i].values.tolist()))
...@@ -225,7 +226,6 @@ def get_predict_set(model,columns): ...@@ -225,7 +226,6 @@ def get_predict_set(model,columns):
print(df.shape) print(df.shape)
for i in columns.keys(): for i in columns.keys():
df.loc[~df[i].isin(columns[i]), [i]] = "na" df.loc[~df[i].isin(columns[i]), [i]] = "na"
print(df.shape)
df["cid_id"] = df["cid_id"].astype("str") df["cid_id"] = df["cid_id"].astype("str")
df["clevel1_id"] = df["clevel1_id"].astype("str") df["clevel1_id"] = df["clevel1_id"].astype("str")
df["top"] = df["top"].astype("str") df["top"] = df["top"].astype("str")
...@@ -260,8 +260,6 @@ def get_predict_set(model,columns): ...@@ -260,8 +260,6 @@ def get_predict_set(model,columns):
print(native_pre.shape) print(native_pre.shape)
# print(native_pre.loc[native_pre["device_id"] == "358035085192742"].shape) # print(native_pre.loc[native_pre["device_id"] == "358035085192742"].shape)
native_pre.to_csv(path+"native.csv",sep="\t",index=False) native_pre.to_csv(path+"native.csv",sep="\t",index=False)
print("native_pre shape")
print(native_pre.shape)
nearby_pre = df[df["label"] == "1"] nearby_pre = df[df["label"] == "1"]
nearby_pre = nearby_pre.drop("label", axis=1) nearby_pre = nearby_pre.drop("label", axis=1)
...@@ -269,9 +267,6 @@ def get_predict_set(model,columns): ...@@ -269,9 +267,6 @@ def get_predict_set(model,columns):
print(nearby_pre.shape) print(nearby_pre.shape)
# print(nearby_pre.loc[nearby_pre["device_id"] == "358035085192742"].shape) # print(nearby_pre.loc[nearby_pre["device_id"] == "358035085192742"].shape)
nearby_pre.to_csv(path + "nearby.csv", sep="\t", index=False) nearby_pre.to_csv(path + "nearby.csv", sep="\t", index=False)
print("nearby_pre shape")
print(nearby_pre.shape)
if __name__ == "__main__": if __name__ == "__main__":
......
...@@ -73,6 +73,25 @@ def get_cid(): ...@@ -73,6 +73,25 @@ def get_cid():
df = con_sql(db, sql)[0].values.tolist() df = con_sql(db, sql)[0].values.tolist()
print(",".join(df)) print(",".join(df))
def gen_tfrecords(in_file):
import os
import tensorflow as tf
path = "/home/data/"
basename = os.path.basename(in_file) + ".tfrecord"
# 拼接文件路径
out_file = os.path.join(path, basename)
tfrecord_out = tf.python_io.TFRecordWriter(out_file)
df = pd.read_csv(in_file)
for i in range(df.shape[0]):
features = tf.train.Features(feature={
"y": tf.train.Feature(int64_list=tf.train.Int64List(value=[df["y"][i]])),
"z": tf.train.Feature(int64_list=tf.train.Int64List(value=[df["z"][i]])),
"top": tf.train.Feature(int64_list=tf.train.Int64List(value=[df["top"][i]]))
})
example = tf.train.Example(features=features)
serialized = example.SerializeToString()
tfrecord_out.write(serialized)
tfrecord_out.close()
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment