import redis import sys import os import json def getRedisConn(): pool = redis.ConnectionPool(host="172.16.50.145",password="XfkMCCdWDIU%ls$h",port=6379,db=0) conn = redis.Redis(connection_pool=pool) # conn = redis.Redis(host="172.16.50.145", port=6379, password="XfkMCCdWDIU%ls$h",db=0) # conn = redis.Redis(host="172.18.51.10", port=6379, db=0, decode_responses = True) #test return conn if len(sys.argv) == 2: save_dir = sys.argv[1] else: save_dir = '/data/files/wideAndDeep/' print('save_dir: ', save_dir) if not os.path.exists(save_dir): print('mkdir save_dir: ', save_dir) os.makedirs(save_dir) conn = getRedisConn() vocab_keys = conn.lrange("strategy:all:vocab", 0, -1) print("vocab_keys: ", vocab_keys[0]) vocab_keys = eval(vocab_keys[0]) for vocab_key in vocab_keys: print('vocab_key: ', vocab_key) splits = vocab_key.split(":") field = splits[1] filename = field + "_vocab.csv" print('filename: ', filename) with open(os.path.join(save_dir, filename), 'w') as f: texts = conn.lrange(vocab_key, 0, -1) texts = list(filter(lambda x: x != '', eval(texts[0]))) print('texts: ', len(texts)) f.write('\n'.join(texts)) os.system("hdfs dfs -getmerge /strategy/train_samples {save_dir}train_samples.csv".format(save_dir = save_dir)) os.system("hdfs dfs -getmerge /strategy/eval_samples {save_dir}eval_samples.csv".format(save_dir = save_dir))