Commit 0aae1e2c authored by 张彦钊's avatar 张彦钊

change test file

parent 22de0c8b
......@@ -6,6 +6,7 @@ import pytispark.pytispark as pti
from pyspark.sql import SparkSession
import datetime
import pandas as pd
import subprocess
def app_list_func(x,l):
......@@ -126,6 +127,19 @@ def con_sql(db,sql):
db.close()
return df
def get_filename(dir_in):
pre_path = "hdfs://172.16.32.4:8020"
args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
s_output, s_err = proc.communicate()
all_dart_dirs = s_output.split()
a = []
for i in all_dart_dirs:
b = str(i).split("/")[4]
if b[:4] == "part":
tmp = pre_path + str(i)[2:-1]
a.append(tmp)
return a
if __name__ == '__main__':
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......@@ -148,7 +162,6 @@ if __name__ == '__main__':
# get_predict(validate_date, value_map, app_list_map)
# [path + "tr/part-r-00000"]
import subprocess
# spark = SparkSession.builder.getOrCreate()
# b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)]
......@@ -157,15 +170,14 @@ if __name__ == '__main__':
# df.show()
# df.createOrReplaceTempView("df")
# t = spark.sql("select id from df").map()
# print(t)
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select device_id from esmm_train_data limit 10"
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
print(result)
a = list(set([i[0] for i in result]))
print(a)
tr_files = get_filename("/strategy/esmm/tr")
print(tr_files)
va_files = get_filename("/strategy/esmm/va")
print("test")
print(va_files)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment