Commit acc7b49e authored by 宋柯's avatar 宋柯

模型调试

parent d01699ac
......@@ -435,6 +435,7 @@ def dataVocabToRedis(dataVocab):
def saveVocab(key, vocab):
conn = getRedisConn()
conn.delete(key)
conn.lpush(key,vocab)
conn.expire(FEATURE_VOCAB_KEY,60 * 60 * 24)
......@@ -1030,7 +1031,7 @@ if __name__ == '__main__':
for multi_categoty_field in multi_categoty_fields:
output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab"
# train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
multi_categoty_field_rows = train_samples.selectExpr("explode(split({multi_categoty_field},',')) as {multi_categoty_field}".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().collect()
multi_categoty_field_rows = train_samples.selectExpr("explode(split({multi_categoty_field},',')) as {multi_categoty_field}".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').where(F.col(multi_categoty_field) != '').distinct().collect()
vocab_redis_keys.append("strategy:" + multi_categoty_field + ":vocab")
saveVocab(vocab_redis_keys[-1], list(map(lambda row: row[multi_categoty_field], multi_categoty_field_rows)))
......
import redis
import sys
import os
import json
def getRedisConn():
# pool = redis.ConnectionPool(host="172.16.50.145",password="XfkMCCdWDIU%ls$h",port=6379,db=0)
# conn = redis.Redis(connection_pool=pool)
# conn = redis.Redis(host="172.16.50.145", port=6379, password="XfkMCCdWDIU%ls$h",db=0)
conn = redis.Redis(host="172.18.51.10", port=6379, db=0, decode_responses = True) #test
return conn
save_dir = sys.argv[1]
print('save_dir: ', save_dir)
conn = getRedisConn()
vocab_keys = conn.lrange("strategy:all:vocab", 0, -1)
print("vocab_keys: ", vocab_keys[0])
vocab_keys = eval(vocab_keys[0])
for vocab_key in vocab_keys:
print('vocab_key: ', vocab_key)
splits = vocab_key.split(":")
field = splits[1]
filename = field + "_vocab.csv"
print('filename: ', filename)
with open(os.path.join(save_dir, filename), 'w') as f:
texts = conn.lrange(vocab_key, 0, -1)
texts = eval(texts[0])
print('texts: ', len(texts))
f.write('\n'.join(texts))
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment