Commit 25ff77d7 authored by 张彦钊's avatar 张彦钊

change test file

parent ae8c8c0f
...@@ -42,7 +42,7 @@ def feature_engineer(): ...@@ -42,7 +42,7 @@ def feature_engineer():
validate_date = con_sql(db, sql)[0].values.tolist()[0] validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date) print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d") temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=2)).strftime("%Y-%m-%d") start = (temp - datetime.timedelta(days=300)).strftime("%Y-%m-%d")
print(start) print(start)
sql = "select e.y,e.z,e.stat_date,e.ucity_id,feat.level2_ids,e.ccity_name,u.device_type,u.manufacturer," \ sql = "select e.y,e.z,e.stat_date,e.ucity_id,feat.level2_ids,e.ccity_name,u.device_type,u.manufacturer," \
......
...@@ -150,21 +150,6 @@ if __name__ == '__main__': ...@@ -150,21 +150,6 @@ if __name__ == '__main__':
# [path + "tr/part-r-00000"] # [path + "tr/part-r-00000"]
import subprocess import subprocess
pre_path = "hdfs://172.16.32.4:8020"
dir_in = "/strategy/esmm/tr"
args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
s_output, s_err = proc.communicate()
all_dart_dirs = s_output.split()
print(all_dart_dirs)
a = []
for i in all_dart_dirs:
b = str(i).split("/")[4]
if b[:4] == "part":
tmp = pre_path+str(i)[2:-1]
a.append(tmp)
print(a)
......
...@@ -316,17 +316,13 @@ def main(_): ...@@ -316,17 +316,13 @@ def main(_):
print('ctr_task_wgt ', FLAGS.ctr_task_wgt) print('ctr_task_wgt ', FLAGS.ctr_task_wgt)
#------init Envs------ #------init Envs------
tr_files = [path+"tr/part-r-00000"] # tr_files = [path+"tr/part-r-00000"]
va_files = [path+"va/part-r-00000"] # va_files = [path+"va/part-r-00000"]
te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir] # te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
# tr_files = glob.glob("%s/tr/*tfrecord" % FLAGS.data_dir) tr_files = get_filename("/strategy/esmm/tr")
# random.shuffle(tr_files) va_files = get_filename("/strategy/esmm/va")
# print("tr_files:", tr_files) te_files = ["%s/part-r-00000" % FLAGS.hdfs_dir]
# va_files = glob.glob("%s/va/*tfrecord" % FLAGS.data_dir)
# print("va_files:", va_files)
# te_files = glob.glob("%s/*tfrecord" % FLAGS.data_dir)
# print("te_files:", te_files)
if FLAGS.clear_existing_model: if FLAGS.clear_existing_model:
try: try:
...@@ -374,14 +370,18 @@ def main(_): ...@@ -374,14 +370,18 @@ def main(_):
print("Not Implemented, Do It Yourself!") print("Not Implemented, Do It Yourself!")
def get_filename(dir_in): def get_filename(dir_in):
dir_in = "/strategy/esmm/tr" pre_path = "hdfs://172.16.32.4:8020"
args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'" args = "hdfs dfs -ls " + dir_in + " | awk '{print $8}'"
proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True) proc = subprocess.Popen(args, stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=True)
s_output, s_err = proc.communicate() s_output, s_err = proc.communicate()
a = s_output.split() all_dart_dirs = s_output.split()
a. print(all_dart_dirs)
a = []
for i in all_dart_dirs:
b = str(i).split("/")[4]
if b[:4] == "part":
tmp = pre_path + str(i)[2:-1]
a.append(tmp)
return a return a
if __name__ == "__main__": if __name__ == "__main__":
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment