from pyspark.sql import SQLContext
from pyspark.context import SparkContext
from pyspark.conf import SparkConf
import datetime
from pyspark.sql import HiveContext

def get_data():
    sc = SparkContext(conf=SparkConf().setAppName("esmm")).getOrCreate()
    sc.setLogLevel("WARN")
    ctx = SQLContext(sc)
    # end_date = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
    # start_date = (datetime.date.today() - datetime.timedelta(days=day)).strftime("%Y-%m-%d")
    dbtable = "(select * from esmm_data)temp"
    esmm_data = ctx.read.format("jdbc").options(url="jdbc:mysql://10.66.157.22:4000/jerry_prod",
                                             driver="com.mysql.jdbc.Driver",
                                             dbtable=dbtable,
                                             user="root",
                                             password="3SYz54LS9#^9sBvC").load().na.drop()
    esmm_data.show(6)
    column_number = {}
    for i in esmm_data.columns:
        column_number[i] = esmm_data.select(i).distinct

    esmm_data = esmm_data.map()
    dbtable = "(select * from home_tab_click)temp"
    tab_click = ctx.read.format("jdbc").options(url="jdbc:mysql://10.66.157.22:4000/eagle",
                                                driver="com.mysql.jdbc.Driver",
                                                dbtable=dbtable,
                                                user="root",
                                                password="3SYz54LS9#^9sBvC").load()
    tab_click.show(6)
    # esmm_data = esmm_data.join(tab_click,esmm_data.device_id == tab_click.device_id)
    # esmm_data.show(6)







if __name__ == '__main__':
    get_data()