delete prints

41a53fa4 · 张彦钊 · 36a2cdf5 · 41a53fa4 · 41a53fa4
Commit 41a53fa4 authored Jan 11, 2019 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 21 additions and 7 deletions

data2ffm.py eda/esmm/Feature_pipline/data2ffm.py +2 -7

test.py tensnsorflow/test.py +19 -0

No files found.
--- a/eda/esmm/Feature_pipline/data2ffm.py
+++ b/eda/esmm/Feature_pipline/data2ffm.py
@@ -174,7 +174,8 @@ def get_data():
        features = features + len(df[i].unique())
    print("fields:{}".format(df.shape[1]-1))
    print("features:{}".format(features))
-    filter_list = ["ccity_name","ucity_id","manufacturer","channel","level2_ids"]
+    # filter_list 中没有device_type，这个类别只有安卓、ios两种类型，转化前能完全覆盖到这两种类型
+    filter_list = ["ccity_name","ucity_id","manufacturer","channel","level2_ids","clevel1_id","top"]
    column_map = dict()
    for i in filter_list:
        column_map[i] = list(set(df[i].values.tolist()))
@@ -225,7 +226,6 @@ def get_predict_set(model,columns):
    print(df.shape)
    for i in columns.keys():
        df.loc[~df[i].isin(columns[i]), [i]] = "na"
-    print(df.shape)
    df["cid_id"] = df["cid_id"].astype("str")
    df["clevel1_id"] = df["clevel1_id"].astype("str")
    df["top"] = df["top"].astype("str")
@@ -260,8 +260,6 @@ def get_predict_set(model,columns):
    print(native_pre.shape)
    # print(native_pre.loc[native_pre["device_id"] == "358035085192742"].shape)
    native_pre.to_csv(path+"native.csv",sep="\t",index=False)
-    print("native_pre shape")
-    print(native_pre.shape)

    nearby_pre = df[df["label"] == "1"]
    nearby_pre = nearby_pre.drop("label", axis=1)
@@ -269,9 +267,6 @@ def get_predict_set(model,columns):
    print(nearby_pre.shape)
    # print(nearby_pre.loc[nearby_pre["device_id"] == "358035085192742"].shape)
    nearby_pre.to_csv(path + "nearby.csv", sep="\t", index=False)
-    print("nearby_pre shape")
-    print(nearby_pre.shape)
-


 if __name__ == "__main__":

--- a/tensnsorflow/test.py
+++ b/tensnsorflow/test.py
@@ -73,6 +73,25 @@ def get_cid():
    df = con_sql(db, sql)[0].values.tolist()
    print(",".join(df))

+def gen_tfrecords(in_file):
+    import os
+    import tensorflow as tf
+    path = "/home/data/"
+    basename = os.path.basename(in_file) + ".tfrecord"
+    # 拼接文件路径
+    out_file = os.path.join(path, basename)
+    tfrecord_out = tf.python_io.TFRecordWriter(out_file)
+    df = pd.read_csv(in_file)
+    for i in range(df.shape[0]):
+        features = tf.train.Features(feature={
+            "y": tf.train.Feature(int64_list=tf.train.Int64List(value=[df["y"][i]])),
+            "z": tf.train.Feature(int64_list=tf.train.Int64List(value=[df["z"][i]])),
+            "top": tf.train.Feature(int64_list=tf.train.Int64List(value=[df["top"][i]]))
+        })
+        example = tf.train.Example(features=features)
+        serialized = example.SerializeToString()
+        tfrecord_out.write(serialized)
+    tfrecord_out.close()


 if __name__ == "__main__":