Commit ad95f086 authored by 张彦钊's avatar 张彦钊

change testflie

parent 8e1bd859
...@@ -3,6 +3,7 @@ from pyspark.context import SparkContext ...@@ -3,6 +3,7 @@ from pyspark.context import SparkContext
from pyspark.conf import SparkConf from pyspark.conf import SparkConf
import pytispark.pytispark as pti import pytispark.pytispark as pti
from pyspark.sql import SparkSession from pyspark.sql import SparkSession
import numpy as np
def test(): def test():
...@@ -27,6 +28,11 @@ def test(): ...@@ -27,6 +28,11 @@ def test():
spark.sql(sql).show(6) spark.sql(sql).show(6)
def some_function(x):
# Use the libraries to do work
return np.sin(x)**2 + 2
if __name__ == '__main__': if __name__ == '__main__':
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \ .set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", "true") \
...@@ -37,16 +43,15 @@ if __name__ == '__main__': ...@@ -37,16 +43,15 @@ if __name__ == '__main__':
.set("spark.driver.maxResultSize", "8g") .set("spark.driver.maxResultSize", "8g")
spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate() spark = SparkSession.builder.config(conf=sparkConf).enableHiveSupport().getOrCreate()
ti = pti.TiContext(spark) # ti = pti.TiContext(spark)
ti.tidbMapDatabase("jerry_test") # ti.tidbMapDatabase("jerry_test")
spark.sparkContext.setLogLevel("WARN") # spark.sparkContext.setLogLevel("WARN")
sql = "select stat_date,cid_id,y,ccity_name from esmm_train_data limit 60" # sql = "select stat_date,cid_id,y,ccity_name from esmm_train_data limit 60"
spark.sql(sql).show(6) # spark.sql(sql).show(6)
rdd = (spark.parallelize(range(1000))
.map(some_function)
.take(10))
print(rdd)
spark.sql("use online")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
sql = "select user_id from online.tl_hdfs_maidian_view where partition_date = '20190412' limit 10"
spark.sql(sql).show(6)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment