Commit 4de7681c authored by 张彦钊's avatar 张彦钊

修改测试文件

parent 5acbce0e
from __future__ import print_function
from utils import con_sql
import datetime
import time
import pymysql
from pyspark.sql import SparkSession
from pyspark.sql import SQLContext
def fetch_data(start_date, end_date):
spark = SparkSession \
.builder \
.appName("get_data") \
sc = SparkSession.builder.appName("Python Spark SQL basic example") \
.config('spark.some.config,option0', 'some-value') \
.getOrCreate()
sql = "select cid,device_id,stat_date from data_feed_click " \
"where stat_date >= '{0}' and stat_date <= '{1}'".format(start_date, end_date)
df = spark.sql(sql).na.drop().distinct
device_id = df.select("device_id").collect()
ctx = SQLContext(sc)
df = ctx.read.format("jdbc").options(url="jdbc:mysql://192.168.15.12:4000/jerry_prod",
driver="com.mysql.jdbc.Driver",
dbtable = "data_feed_click",
user="root",password = "3SYz54LS9#^9sBvC").load()
df.show(6)
# url = "jdbc:mysql://10.66.157.22:4000/jerry_prod"
# table = "data_feed_click"
# properties = {"user": "root", "password": "3SYz54LS9#^9sBvC"}
# df = sqlContext.read.jdbc(url, table, properties)
print("成功获取点击表里的数据")
print(device_id[0:2])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment