test.py 3.16 KB
# -*- coding: utf-8 -*-
from pyspark.sql import HiveContext
from pyspark.context import SparkContext
from pyspark.conf import SparkConf
# import pytispark.pytispark as pti
# from pyspark.sql import SQLContext
from pyspark.sql import SparkSession
# from py4j.java_gateway import java_import
# import pytispark.pytispark as pti
# import pandas as pd
#
# def con_sql(db,sql):
#     cursor = db.cursor()
#     try:
#         cursor.execute(sql)
#         result = cursor.fetchall()
#         df = pd.DataFrame(list(result))
#     except Exception:
#         print("发生异常", Exception)
#         df = pd.DataFrame()
#     finally:
#         db.close()
#     return df


def test():
        conf = SparkConf().setAppName("My App").set("spark.io.compression.codec", "lzf")
        sc = SparkContext(conf = conf)
        hive_context = HiveContext(sc)
        spark = SparkSession.builder.enableHiveSupport().getOrCreate()
        # ti = pti.TiContext(spark)
        # ti.tidbMapDatabase("jerry_test")

        spark = SparkSession.builder.appName("hello test").enableHiveSupport().getOrCreate()

        spark.sql("use online")
        spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
        spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
        spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
        spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")

        # hive_context.sql("SET mapreduce.job.queuename=data")
        # hive_context.sql("SET mapred.input.dir.recursive=true")
        # hive_context.sql("SET hive.mapred.supports.subdirectories=true")
        sql = "select user_id from online.tl_hdfs_maidian_view where partition_date = '20190412' limit 10"
        spark.sql(sql).show(6)


# def esmm_pre():
#     yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
#     print(yesterday)
#
#     spark = SparkSession.builder.enableHiveSupport().getOrCreate()
#     # gw = SparkContext._gateway
#     #
#     # # Import TiExtensions
#     # java_import(gw.jvm, "org.apache.spark.sql.TiContext")
#
#     # Inject TiExtensions, and get a TiContext
#     # ti = gw.jvm.TiExtensions.getInstance(spark._jsparkSession).getOrCreateTiContext(spark._jsparkSession)
#     ti = pti.TiContext(spark)
#
#     ti.tidbMapDatabase("jerry_test")
#
#     # sql("use tpch_test")
#     spark.sql("select count(*) from esmm_pre_data").show(6)
#
#     # conf = SparkConf().setAppName("esmm_pre").set("spark.io.compression.codec", "lzf")
#
#     spark.sql("""
#     select concat(tmp1.device_id,",",tmp1.city_id) as device_city, tmp1.merge_queue from (select device_id,if(city_id='world','worldwide',city_id) city_id,similarity_cid as merge_queue from nd_device_cid_similarity_matrix
# union select device_id,if(city_id='world','worldwide',city_id) city_id,native_queue as merge_queue from ffm_diary_queue
# union select device_id,city_id,search_queue as merge_queue from search_queue) as tmp1 where tmp1.device_id in (select distinct device_id from data_feed_click where stat_date='{}'
#     """.format(yesterday)).show(6)


if __name__ == '__main__':
    test()