change test file

0aae1e2c · 张彦钊 · 22de0c8b · 0aae1e2c
Commit 0aae1e2c authored 6 years ago by 张彦钊
Hide whitespace changes
Inline Side-by-side

Showing with 22 additions and 10 deletions

multi.py tensnsorflow/multi.py +22 -10

No files found.
--- a/tensnsorflow/multi.py
+++ b/tensnsorflow/multi.py
@@ -6,6 +6,7 @@ import pytispark.pytispark as pti
 from pyspark.sql import SparkSession
 import datetime
 import pandas as pd
+import subprocess


 def app_list_func(x,l):
@@ -126,6 +127,19 @@ def con_sql(db,sql):
    db.close()
    return df

+def get_filename(dir_in):
+    pre_path = "hdfs://172.16.32.4:8020"
+    args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
+    proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
+    s_output, s_err = proc.communicate()
+    all_dart_dirs = s_output.split()
+    a = []
+    for i in all_dart_dirs:
+        b = str(i).split("/")[4]
+        if b[:4] == "part":
+            tmp = pre_path + str(i)[2:-1]
+            a.append(tmp)
+    return a

 if __name__ == '__main__':
    # sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
@@ -148,7 +162,6 @@ if __name__ == '__main__':
    # get_predict(validate_date, value_map, app_list_map)

    # [path + "tr/part-r-00000"]
-    import subprocess

    # spark = SparkSession.builder.getOrCreate()
    # b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)]
@@ -157,15 +170,14 @@ if __name__ == '__main__':
    # df.show()
    # df.createOrReplaceTempView("df")
    # t = spark.sql("select id from df").map()
-    # print(t)
-    db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
-    sql = "select device_id from esmm_train_data limit 10"
-    cursor = db.cursor()
-    cursor.execute(sql)
-    result = cursor.fetchall()
-    print(result)
-    a = list(set([i[0] for i in result]))
-    print(a)
+    tr_files = get_filename("/strategy/esmm/tr")
+    print(tr_files)
+    va_files = get_filename("/strategy/esmm/va")
+    print("test")
+    print(va_files)
+
+
+