Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

ab8de91d · Your Name · 4064f254 · 08ab56d4 · ab8de91d · ab8de91d
Commit ab8de91d authored Jun 05, 2019 by Your Name
Hide whitespace changes
Inline Side-by-side

Showing with 17 additions and 3 deletions

feature_engineering.py eda/esmm/Model_pipline/feature_engineering.py +14 -1

multi.py tensnsorflow/multi.py +3 -2

No files found.
--- a/eda/esmm/Model_pipline/feature_engineering.py
+++ b/eda/esmm/Model_pipline/feature_engineering.py
@@ -76,6 +76,17 @@ def con_sql(db,sql):
    return df


+def get_pre_number():
+    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
+    sql = "select count(*) from esmm_pre_data"
+    cursor = db.cursor()
+    cursor.execute(sql)
+    result = cursor.fetchone()[0]
+    print("预测集数量：")
+    print(result)
+    db.close()
+
+
 def feature_engineer():
    apps_number, app_list_map, level2_number, leve2_map, level3_number, leve3_map = get_map()
    unique_values = []
@@ -221,9 +232,11 @@ def feature_engineer():
    print("train tfrecord done")
    print((h - f) / 60)

-    print("样本总量：")
+    print("训练集样本总量：")
    print(rdd.count())

+    get_pre_number()
+
    test = rdd.filter(lambda x: x[0] == validate_date).map(
        lambda x: (x[1], x[2], x[3], x[4], x[5], x[6], x[7], x[8], x[9],
                   x[10], x[11], x[12], x[13]))

--- a/tensnsorflow/multi.py
+++ b/tensnsorflow/multi.py
@@ -158,18 +158,19 @@ def get_hdfs(dir_in):
    return a


-def get_pre():
+def get_pre_number():
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
    sql = "select count(*) from esmm_pre_data"
    cursor = db.cursor()
    cursor.execute(sql)
    result = cursor.fetchone()[0]
+    print("预测集数量：")
    print(result)
    db.close()


 if __name__ == '__main__':
-   get_pre()
+   # get_pre()
    # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
    #     .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
    #     .set("spark.tispark.plan.allow_index_double_read", "false") \