Merge branch 'master' of git.wanmeizhensuo.com:ML/ffm-baseline

22de0c8b · Apple · f1c59911 · ebdc3e98 · 22de0c8b · 22de0c8b
Commit 22de0c8b authored May 28, 2019 by Apple
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 35 deletions

feature_engineering.py eda/esmm/Model_pipline/feature_engineering.py +12 -10

train.py eda/esmm/Model_pipline/train.py +9 -8

test.py tensnsorflow/test.py +42 -17

No files found.
--- a/eda/esmm/Model_pipline/feature_engineering.py
+++ b/eda/esmm/Model_pipline/feature_engineering.py
@@ -111,27 +111,27 @@ def feature_engineer():
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select distinct price_min from train_Knowledge_network_data"
+    sql = "select distinct price_min from knowledge"
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select distinct treatment_method from train_Knowledge_network_data"
+    sql = "select distinct treatment_method from knowledge"
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select distinct price_max from train_Knowledge_network_data"
+    sql = "select distinct price_max from knowledge"
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select distinct treatment_time from train_Knowledge_network_data"
+    sql = "select distinct treatment_time from knowledge"
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select distinct maintain_time from train_Knowledge_network_data"
+    sql = "select distinct maintain_time from knowledge"
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select distinct recover_time from train_Knowledge_network_data"
+    sql = "select distinct recover_time from knowledge"
    unique_values.extend(get_unique(db, sql))
    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
@@ -154,9 +154,11 @@ def feature_engineer():
    unique_values.extend(features)
    print("unique_values length")
    print(len(unique_values))
+    print("特征维度：")
+    print(apps_number + level2_number + level3_number + len(unique_values))
-    temp = list(range(2 + apps_number + level2_number + level3_number,
+    temp = list(range(16 + apps_number + level2_number + level3_number,
-                      2 + apps_number + level2_number + level3_number + len(unique_values)))
+                      16 + apps_number + level2_number + level3_number + len(unique_values)))
    value_map = dict(zip(unique_values, temp))
    sql = "select e.y,e.z,e.stat_date,e.ucity_id,feat.level2_ids,e.ccity_name,u.device_type,u.manufacturer," \
@@ -169,7 +171,7 @@ def feature_engineer():
          "left join jerry_test.cid_time_cut cut on e.cid_id = cut.cid " \
          "left join jerry_test.device_app_list dl on e.device_id = dl.device_id " \
          "left join jerry_test.diary_feat feat on e.cid_id = feat.diary_id " \
-          "left join jerry_test.train_Knowledge_network_data k on feat.level2 = k.level2_id " \
+          "left join jerry_test.knowledge k on feat.level2 = k.level2_id " \
          "left join jerry_test.wiki_tag wiki on e.device_id = wiki.device_id " \
          "left join jerry_test.question_tag question on e.device_id = question.device_id " \
          "left join jerry_test.search_tag search on e.device_id = search.device_id " \
@@ -254,7 +256,7 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
          "left join jerry_test.order_tag ot on e.device_id = ot.device_id " \
          "left join jerry_test.sixin_tag sixin on e.device_id = sixin.device_id " \
          "left join jerry_test.cart_tag cart on e.device_id = cart.device_id " \
-          "left join jerry_test.train_Knowledge_network_data k on feat.level2 = k.level2_id"
+          "left join jerry_test.knowledge k on feat.level2 = k.level2_id"
    features = ["ucity_id", "ccity_name", "device_type", "manufacturer",
                "channel", "top", "time", "hospital_id",

--- a/eda/esmm/Model_pipline/train.py
+++ b/eda/esmm/Model_pipline/train.py
@@ -11,6 +11,7 @@ import json
 from datetime import date, timedelta
 import tensorflow as tf
 import subprocess
+import time
 #################### CMD Arguments ####################
 FLAGS = tf.app.flags.FLAGS
@@ -361,11 +362,9 @@ def main(_):
            print('%s: %s' % (key,value))
    elif FLAGS.task_type == 'infer':
        preds = Estimator.predict(input_fn=lambda: input_fn(te_files, num_epochs=1, batch_size=FLAGS.batch_size), predict_keys=["pctcvr","pctr","pcvr"])
-        with open(FLAGS.local_dir+"/pred.txt", "w") as fo:
+        with open(FLAGS.local_dir + "/pred.txt", "w") as fo:
-            print("-"*100)
+            for prob in preds:
-            with open(FLAGS.local_dir + "/pred.txt", "w") as fo:
+                fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
-                for prob in preds:
-                    fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
    elif FLAGS.task_type == 'export':
        print("Not Implemented, Do It Yourself!")
@@ -385,9 +384,11 @@ def get_filename(dir_in):
 if __name__ == "__main__":
    # classpath = "$CLASSPATH:%JAVA_HOME%/lib/dt.jar:%JAVA_HOME%/lib/tools.jar:$(/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api-asn1-api-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/common/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/jsch-0.1.42.jar:/opt/hadoop/share/hadoop/common/lib/hadoop-auth-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/common/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-1.9.2.jar:/opt/hadoop/share/hadoop/common/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/api-util-1.0.0-M20.jar:/opt/hadoop/share/hadoop/common/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/common/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-kerberos-codec-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/common/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/common/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/common/lib/commons-digester-1.8.jar:/opt/hadoop/share/hadoop/common/lib/jasper-compiler-5.5.23.jar:/opt/hadoop/share/hadoop/common/lib/java-xmlbuilder-0.4.jar:/opt/hadoop/share/hadoop/common/lib/curator-client-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/common/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-log4j12-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/gson-2.2.4.jar:/opt/hadoop/share/hadoop/common/lib/commons-configuration-1.6.jar:/opt/hadoop/share/hadoop/common/lib/commons-httpclient-3.1.jar:/opt/hadoop/share/hadoop/common/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/common/lib/httpclient-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/jets3t-0.9.0.jar:/opt/hadoop/share/hadoop/common/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/common/lib/logredactor-1.0.3.jar:/opt/hadoop/share/hadoop/common/lib/slf4j-api-1.7.5.jar:/opt/hadoop/share/hadoop/common/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/common/lib/curator-recipes-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/apacheds-i18n-2.0.0-M15.jar:/opt/hadoop/share/hadoop/common/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/common/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/common/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/common/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/common/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/common/lib/commons-beanutils-core-1.8.0.jar:/opt/hadoop/share/hadoop/common/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/common/lib/commons-net-3.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/common/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/common/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/common/lib/mockito-all-1.8.5.jar:/opt/hadoop/share/hadoop/common/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/common/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/common/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/common/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/lib/httpcore-4.2.5.jar:/opt/hadoop/share/hadoop/common/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/common/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/common/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/common/lib/curator-framework-2.7.1.jar:/opt/hadoop/share/hadoop/common/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/common/lib/commons-math3-3.1.1.jar:/opt/hadoop/share/hadoop/common/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/common/hadoop-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/common/hadoop-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs:/opt/hadoop/share/hadoop/hdfs/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/jasper-runtime-5.5.23.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsp-api-2.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/xml-apis-1.3.04.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/hdfs/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-daemon-1.0.13.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/hdfs/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/xmlenc-0.52.jar:/opt/hadoop/share/hadoop/hdfs/lib/htrace-core4-4.0.1-incubating.jar:/opt/hadoop/share/hadoop/hdfs/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/hdfs/lib/xercesImpl-2.9.1.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-el-1.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/hdfs/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/hdfs/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/hdfs/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/hdfs/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-nfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/hdfs/hadoop-hdfs-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/yarn/lib/activation-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-codec-1.4.jar:/opt/hadoop/share/hadoop/yarn/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-lang-2.6.jar:/opt/hadoop/share/hadoop/yarn/lib/zookeeper-3.4.5-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-util-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/guava-11.0.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-cli-1.2.jar:/opt/hadoop/share/hadoop/yarn/lib/servlet-api-2.5.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-collections-3.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-logging-1.1.3.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-jaxrs-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-impl-2.2.3-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jline-2.11.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jsr305-3.0.0.jar:/opt/hadoop/share/hadoop/yarn/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/yarn/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/yarn/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/yarn/lib/jaxb-api-2.2.2.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-json-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/stax-api-1.0-2.jar:/opt/hadoop/share/hadoop/yarn/lib/jetty-6.1.26.cloudera.4.jar:/opt/hadoop/share/hadoop/yarn/lib/jettison-1.1.jar:/opt/hadoop/share/hadoop/yarn/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/yarn/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/yarn/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/yarn/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-client-1.9.jar:/opt/hadoop/share/hadoop/yarn/lib/jackson-xc-1.8.10.jar:/opt/hadoop/share/hadoop/yarn/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-client-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-web-proxy-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-tests-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-nodemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-api-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-registry-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-applicationhistoryservice-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-distributedshell-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-server-resourcemanager-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/yarn/hadoop-yarn-applications-unmanaged-am-launcher-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hadoop-annotations-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/aopalliance-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/asm-3.2.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/snappy-java-1.0.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-server-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-core-asl-1.8.10.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jackson-mapper-asl-1.8.10-cloudera.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/hamcrest-core-1.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-guice-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/lib/log4j-1.2.17.jar:/opt/hadoop/share/hadoop/mapreduce/lib/xz-1.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/junit-4.11.jar:/opt/hadoop/share/hadoop/mapreduce/lib/javax.inject-1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-compress-1.4.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/protobuf-java-2.5.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/avro-1.7.6-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/lib/guice-servlet-3.0.jar:/opt/hadoop/share/hadoop/mapreduce/lib/commons-io-2.4.jar:/opt/hadoop/share/hadoop/mapreduce/lib/leveldbjni-all-1.8.jar:/opt/hadoop/share/hadoop/mapreduce/lib/netty-3.10.5.Final.jar:/opt/hadoop/share/hadoop/mapreduce/lib/paranamer-2.3.jar:/opt/hadoop/share/hadoop/mapreduce/lib/jersey-core-1.9.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-plugins-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-app-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-examples-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-shuffle-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-common-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-nativetask-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1-tests.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-hs-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-core-2.6.0-cdh5.16.1.jar:/opt/hadoop/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-2.6.0-cdh5.16.1.jar)"
-    # w
+    b = time.time()
    a = "export CLASSPATH='$(hadoop classpath --glob)'"
    os.system(a)
    path = "hdfs://172.16.32.4:8020/strategy/esmm/"
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.app.run()
\ No newline at end of file
+    print("耗时(分钟)：")
+    print((time.time()-b)/60)
\ No newline at end of file
--- a/tensnsorflow/test.py
+++ b/tensnsorflow/test.py
 # -*- coding: utf-8 -*-
-from pyspark.context import SparkContext
+# from pyspark.context import SparkContext
-from pyspark.conf import SparkConf
+# from pyspark.conf import SparkConf
-import pytispark.pytispark as pti
+# import pytispark.pytispark as pti
-from pyspark.sql import SparkSession
+# from pyspark.sql import SparkSession
-import numpy as np
+import pandas as pd
+import pymysql
+from sqlalchemy import create_engine
 def test():
@@ -28,26 +31,48 @@ def test():
        spark.sql(sql).show(6)
-def some_function(x):
-    # Use the libraries to do work
-    return np.sin(x)**2 + 2
+def con_sql(db,sql):
+    cursor = db.cursor()
+    cursor.execute(sql)
+    result = cursor.fetchall()
+    df = pd.DataFrame(list(result))
+    db.close()
+    return df
 if __name__ == '__main__':
-        sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
+        # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
-                .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
+        #         .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
-                .set("spark.tispark.plan.allow_index_double_read", "false") \
+        #         .set("spark.tispark.plan.allow_index_double_read", "false") \
-                .set("spark.tispark.plan.allow_index_read", "true") \
+        #         .set("spark.tispark.plan.allow_index_read", "true") \
-                .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
+        #         .set("spark.sql.extensions", "org.apache.spark.sql.TiExtensions") \
-                .set("spark.tispark.pd.addresses", "172.16.40.158:2379").set("spark.io.compression.codec", "lzf") \
+        #         .set("spark.tispark.pd.addresses", "172.16.40.158:2379").set("spark.io.compression.codec", "lzf") \
-                .set("spark.driver.maxResultSize", "8g")
+        #         .set("spark.driver.maxResultSize", "8g")
+        #
-        spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
+        # spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
        # ti = pti.TiContext(spark)
        # ti.tidbMapDatabase("jerry_test")
        # spark.sparkContext.setLogLevel("WARN")
        # sql = "select stat_date,cid_id,y,ccity_name from esmm_train_data limit 60"
        # spark.sql(sql).show(6)
+        sql = "select level2_id,concat('t',treatment_method)," \
+              "concat('min',price_min),concat('max',price_max)," \
+              "concat('tr',treatment_time),concat('m',maintain_time)," \
+              "concat('r',recover_time) from jerry_test.train_Knowledge_network_data"
+        db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
+        df = con_sql(db, sql)
+        df = df.rename(columns={0: "level2_id", 1: "treatment_method",2:"price_min",3:"price_max",4:"treatment_time",
+                           5:"maintain_time",6:"recover_time"})
+        print(df.head(6))
+        host = '172.16.40.158'
+        port = 4000
+        user = 'root'
+        password = '3SYz54LS9#^9sBvC'
+        db = 'jerry_test'
+        charset = 'utf8'
+        engine = create_engine(str(r"mysql+pymysql://%s:" + '%s' + "@%s:%s/%s") % (user, password, host, port, db))
+        df.to_sql('knowledge', con=engine, if_exists='append', index=False, chunksize=8000)
+        print("insert done")