Commit 10ae88e2 authored by 张彦钊's avatar 张彦钊

调试pyspark

parent d0595be5
...@@ -4,24 +4,36 @@ import time ...@@ -4,24 +4,36 @@ import time
import pymysql import pymysql
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
from pyspark.sql import SQLContext from pyspark.sql import SQLContext
from pyspark import SparkConf,SparkContext
def fetch_data(start_date, end_date): def fetch_data(start_date, end_date):
sc = SparkSession.builder.appName("Python Spark SQL basic example") \ # sc = SparkSession.builder.appName("Python Spark SQL basic example") \
.config('spark.some.config,option0', 'some-value') \ # .config('spark.some.config,option0', 'some-value') \
.getOrCreate() # .getOrCreate()
sc = SparkContext(conf=SparkConf().setAppName("mnist_streaming"))
ctx = SQLContext(sc) ctx = SQLContext(sc)
df = ctx.read.format("jdbc").options(url="jdbc:mysql://rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com:3306/doris_prod", # jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000",
driver="com.mysql.jdbc.Driver",
dbtable="device diary_queue",
user="doris", password="o5gbA27hXHHm").load()
# df = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000/jerry_prod",
# driver="com.mysql.jdbc.Driver", # driver="com.mysql.jdbc.Driver",
# dbtable = "data_feed_click", # # dbtable="((select device_id,cid_id,time,device_type,city_id,1 as clicked from jerry_test.data_feed_click where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5) and cid_type = 'diary' and stat_date = '2018-08-12') union (select device_id,cid_id,time,device_type,city_id,0 as clicked from jerry_test.data_feed_exposure where cid_id in (select id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5) and cid_type = 'diary' and stat_date = '2018-08-12')) tmp",user="root",
# user="root",password = "3SYz54LS9#^9sBvC").load() # dbtable="(select id as diary_id,doctor_id from eagle.src_mimas_prod_api_diary where doctor_id is not null and content_level >3.5 and datediff(current_date,created_time)<90) tmp",
df.show(6) # user="root",
# password="").load()
# df = ctx.read.format("jdbc").options(url="jdbc:mysql://rdsmaqevmuzj6jy.mysql.rds.aliyuncs.com:3306/doris_test",
# driver="com.mysql.jdbc.Driver",
# dbtable="device diary_queue",
# user="work", password="workwork").load()
# df = ctx.read.format("jdbc").options(url="jdbc:mysql://rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com:3306/doris_prod",
# driver="com.mysql.jdbc.Driver",
# dbtable="device diary_queue",
# user="doris", password="o5gbA27hXHHm").load()
jdbcDf = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000",
driver="com.mysql.jdbc.Driver",
dbtable = "(select device_id from data_feed_click limit 8) tmp",
user="root",password = "3SYz54LS9#^9sBvC").load()
jdbcDf.show(6)
# url = "jdbc:mysql://10.66.157.22:4000/jerry_prod" # url = "jdbc:mysql://10.66.157.22:4000/jerry_prod"
# table = "data_feed_click" # table = "data_feed_click"
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment