修改测试文件

6ba9c40b · 张彦钊 · 353a37ab · 6ba9c40b
Commit 6ba9c40b authored Nov 15, 2018 by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 63 additions and 35 deletions

test.py local/test.py +63 -35

No files found.
--- a/local/test.py
+++ b/local/test.py
+# from __future__ import print_function
+# import datetime
+# import time
+# import pymysql
+# from pyspark.sql import SparkSession
+# from pyspark.sql import SQLContext
+# from pyspark import SparkConf,SparkContext
+from __future__ import absolute_import
+from __future__ import division
 from __future__ import print_function
-import datetime
+from pyspark.context import SparkContext
+from pyspark.conf import SparkConf
+from pyspark.streaming import StreamingContext
+from pyspark.sql import SQLContext
+from pyspark.streaming.kafka import KafkaUtils
+import argparse
 import time
+from datetime import datetime
+import msgpack
+from tensorflowonspark import TFCluster
+import json
 import pymysql
-from pyspark.sql import SparkSession
-from pyspark.sql import SQLContext
-from pyspark import SparkConf,SparkContext
-
+import tensorflow as tf


-def fetch_data(start_date, end_date):
-    # sc = SparkSession.builder.appName("Python Spark SQL basic example") \
-    #     .config('spark.some.config,option0', 'some-value') \
-    #     .getOrCreate()
-    sc = SparkContext(conf=SparkConf().setAppName("mnist_streaming"))
-    ctx = SQLContext(sc)
-    # jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000",
-    #                                          driver="com.mysql.jdbc.Driver",
-    #                                          # dbtable="((select device_id,cid_id,time,device_type,city_id,1 as clicked from jerry_test.data_feed_click where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5)  and  cid_type = 'diary' and stat_date = '2018-08-12') union (select device_id,cid_id,time,device_type,city_id,0 as clicked from jerry_test.data_feed_exposure where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5) and  cid_type = 'diary' and stat_date = '2018-08-12')) tmp",user="root",
-    #                                          dbtable="(select id as diary_id,doctor_id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5 and datediff(current_date,created_time)<90) tmp",
-    #                                          user="root",
-    #                                          password="").load()
-    # df = ctx.read.format("jdbc").options(url="jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com:3306/doris_test",
-    #                                      driver="com.mysql.jdbc.Driver",
-    #                                      dbtable="device diary_queue",
-    #                                      user="work", password="workwork").load()
-    # df = ctx.read.format("jdbc").options(url="jdbc:mysql://rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com:3306/doris_prod",
-    #                                      driver="com.mysql.jdbc.Driver",
-    #                                      dbtable="device diary_queue",
-    #                                      user="doris", password="o5gbA27hXHHm").load()

-    jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000",
-                                             driver="com.mysql.jdbc.Driver",
-                                             dbtable = "(select device_id from data_feed_click limit 8) tmp",
-                                             user="root",password = "3SYz54LS9#^9sBvC").load()
-    jdbcDf.show(6)

-    # url = "jdbc:mysql://10.66.157.22:4000/jerry_prod"
-    # table = "data_feed_click"
-    # properties = {"user": "root", "password": "3SYz54LS9#^9sBvC"}
-    # df = sqlContext.read.jdbc(url, table, properties)
+# def fetch_data(start_date, end_date):
+#     # sc = SparkSession.builder.appName("Python Spark SQL basic example") \
+#     #     .config('spark.some.config,option0', 'some-value') \
+#     #     .getOrCreate()
+#     sc = SparkContext(conf=SparkConf().setAppName("mnist_streaming"))
+#     ctx = SQLContext(sc)
+#     # jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000",
+#     #                                          driver="com.mysql.jdbc.Driver",
+#     #                                          # dbtable="((select device_id,cid_id,time,device_type,city_id,1 as clicked from jerry_test.data_feed_click where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5)  and  cid_type = 'diary' and stat_date = '2018-08-12') union (select device_id,cid_id,time,device_type,city_id,0 as clicked from jerry_test.data_feed_exposure where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5) and  cid_type = 'diary' and stat_date = '2018-08-12')) tmp",user="root",
+#     #                                          dbtable="(select id as diary_id,doctor_id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5 and datediff(current_date,created_time)<90) tmp",
+#     #                                          user="root",
+#     #                                          password="").load()
+#     # df = ctx.read.format("jdbc").options(url="jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com:3306/doris_test",
+#     #                                      driver="com.mysql.jdbc.Driver",
+#     #                                      dbtable="device diary_queue",
+#     #                                      user="work", password="workwork").load()
+#     # df = ctx.read.format("jdbc").options(url="jdbc:mysql://rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com:3306/doris_prod",
+#     #                                      driver="com.mysql.jdbc.Driver",
+#     #                                      dbtable="device diary_queue",
+#     #                                      user="doris", password="o5gbA27hXHHm").load()
+#
+#     jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000",
+#                                              driver="com.mysql.jdbc.Driver",
+#                                              dbtable = "(select device_id from data_feed_click limit 8) tmp",
+#                                              user="root",password = "3SYz54LS9#^9sBvC").load()
+#     jdbcDf.show(6)
+#
+#     # url = "jdbc:mysql://10.66.157.22:4000/jerry_prod"
+#     # table = "data_feed_click"
+#     # properties = {"user": "root", "password": "3SYz54LS9#^9sBvC"}
+#     # df = sqlContext.read.jdbc(url, table, properties)



@@ -117,7 +134,18 @@ def fetch_data(start_date, end_date):


 if __name__ == "__main__":
-    fetch_data("2018-11-11","2018-11-12")
+    sc = SparkContext(
+        conf=SparkConf().setAppName("mnist_streaming").set("spark.streaming.kafka.maxRatePerPartition", 100))
+    ctx = SQLContext(sc)
+    jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com:3306/doris_prod",
+                                             driver="com.mysql.jdbc.Driver",
+                                             # dbtable="((select device_id,cid_id,time,device_type,city_id,1 as clicked from jerry_test.data_feed_click where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5)  and  cid_type = 'diary' and stat_date = '2018-08-12') union (select device_id,cid_id,time,device_type,city_id,0 as clicked from jerry_test.data_feed_exposure where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5) and  cid_type = 'diary' and stat_date = '2018-08-12')) tmp",user="root",
+                                             dbtable="(select device_id from device_diary_queue limit 8) tmp",
+                                             user="doris",
+                                             password="o5gbA27hXHHm"
+                                             ).load()
+    jdbcDf.show(6)
+    # fetch_data("2018-11-11","2018-11-12")
  # from pyspark.context import SparkContext
  # from pyspark.conf import SparkConf
  # from tensorflowonspark import TFCluster