Commit 4fa0a50f authored by Your Name's avatar Your Name

test

parent ced9f2c4
import tensorflow as tf import tensorflow as tf
import pymysql import pymysql
from pyspark.conf import SparkConf
import pytispark.pytispark as pti
from pyspark.sql import SparkSession
import datetime import datetime
import pandas as pd import pandas as pd
from datetime import date, timedelta from datetime import date, timedelta
import time import time
from pyspark import StorageLevel
from pyspark.sql import Row
import os import os
import sys import sys
from sqlalchemy import create_engine from sqlalchemy import create_engine
...@@ -30,7 +25,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False): ...@@ -30,7 +25,8 @@ def input_fn(filenames, batch_size=32, num_epochs=1, perform_shuffle=False):
"tag5_list": tf.VarLenFeature(tf.int64), "tag5_list": tf.VarLenFeature(tf.int64),
"tag6_list": tf.VarLenFeature(tf.int64), "tag6_list": tf.VarLenFeature(tf.int64),
"tag7_list": tf.VarLenFeature(tf.int64), "tag7_list": tf.VarLenFeature(tf.int64),
"number": tf.VarLenFeature(tf.int64), "search_tag2_list": tf.VarLenFeature(tf.int64),
"search_tag3_list": tf.VarLenFeature(tf.int64),
"uid": tf.VarLenFeature(tf.string), "uid": tf.VarLenFeature(tf.string),
"city": tf.VarLenFeature(tf.string), "city": tf.VarLenFeature(tf.string),
"cid_id": tf.VarLenFeature(tf.string) "cid_id": tf.VarLenFeature(tf.string)
...@@ -88,7 +84,8 @@ def model_fn(features, labels, mode, params): ...@@ -88,7 +84,8 @@ def model_fn(features, labels, mode, params):
tag5_list = features['tag5_list'] tag5_list = features['tag5_list']
tag6_list = features['tag6_list'] tag6_list = features['tag6_list']
tag7_list = features['tag7_list'] tag7_list = features['tag7_list']
number = features['number'] search_tag2_list = features['search_tag2_list']
search_tag3_list = features['search_tag3_list']
uid = features['uid'] uid = features['uid']
city = features['city'] city = features['city']
cid_id = features['cid_id'] cid_id = features['cid_id']
...@@ -107,12 +104,14 @@ def model_fn(features, labels, mode, params): ...@@ -107,12 +104,14 @@ def model_fn(features, labels, mode, params):
tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum") tag5 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag5_list, sp_weights=None, combiner="sum")
tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum") tag6 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag6_list, sp_weights=None, combiner="sum")
tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum") tag7 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=tag7_list, sp_weights=None, combiner="sum")
search_tag2 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag2_list, sp_weights=None, combiner="sum")
search_tag3 = tf.nn.embedding_lookup_sparse(Feat_Emb, sp_ids=search_tag3_list, sp_weights=None, combiner="sum")
# x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K) # x_concat = tf.reshape(embedding_id,shape=[-1, common_dims]) # None * (F * K)
x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1, x_concat = tf.concat([tf.reshape(embedding_id, shape=[-1, common_dims]), app_id, level2, level3, tag1,
tag2, tag3, tag4, tag5, tag6, tag7], axis=1) tag2, tag3, tag4, tag5, tag6, tag7,search_tag2,search_tag3], axis=1)
sample_id = tf.sparse.to_dense(number)
uid = tf.sparse.to_dense(uid,default_value="") uid = tf.sparse.to_dense(uid,default_value="")
city = tf.sparse.to_dense(city,default_value="") city = tf.sparse.to_dense(city,default_value="")
cid_id = tf.sparse.to_dense(cid_id,default_value="") cid_id = tf.sparse.to_dense(cid_id,default_value="")
...@@ -149,7 +148,7 @@ def model_fn(features, labels, mode, params): ...@@ -149,7 +148,7 @@ def model_fn(features, labels, mode, params):
pcvr = tf.sigmoid(y_cvr) pcvr = tf.sigmoid(y_cvr)
pctcvr = pctr*pcvr pctcvr = pctr*pcvr
predictions={"pcvr": pcvr, "pctr": pctr, "pctcvr": pctcvr, "sample_id": sample_id, "uid":uid, "city":city, "cid_id":cid_id} predictions = {"pctcvr": pctcvr, "uid": uid, "city": city, "cid_id": cid_id}
export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)} export_outputs = {tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: tf.estimator.export.PredictOutput(predictions)}
# Provide an estimator spec for `ModeKeys.PREDICT` # Provide an estimator spec for `ModeKeys.PREDICT`
if mode == tf.estimator.ModeKeys.PREDICT: if mode == tf.estimator.ModeKeys.PREDICT:
...@@ -176,25 +175,11 @@ def main(te_file): ...@@ -176,25 +175,11 @@ def main(te_file):
log_step_count_steps=100, save_summary_steps=100) log_step_count_steps=100, save_summary_steps=100)
Estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, params=model_params, config=config) Estimator = tf.estimator.Estimator(model_fn=model_fn, model_dir=model_dir, params=model_params, config=config)
preds = Estimator.predict(input_fn=lambda: input_fn(te_file, num_epochs=1, batch_size=10000), predict_keys=["pctcvr","pctr","pcvr","sample_id","uid","city","cid_id"]) preds = Estimator.predict(input_fn=lambda: input_fn(te_file, num_epochs=1, batch_size=10000), predict_keys=["pctcvr","uid","city","cid_id"])
# with open("/home/gmuser/esmm/nearby/pred.txt", "w") as fo:
# for prob in preds:
# fo.write("%f\t%f\t%f\n" % (prob['pctr'], prob['pcvr'], prob['pctcvr']))
# ctcvr = [] result = []
str_result = ""
for prob in preds: for prob in preds:
# ctcvr.append((prob["sample_id"][0],prob['pctcvr'])) result.append([str(prob["uid"][0]), str(prob["city"][0]), str(prob["cid_id"][0]), str(prob['pctcvr'])])
str_result = str_result + str(prob["sample_id"][0]) + ":"+ str(prob["uid"][0]) + ":" + str(prob["city"][0]) + ":" + str(prob["cid_id"][0]) + ":" + str(prob['pctcvr']) + ";"
# str_result = list(prob.keys())
# return str_result
return str_result[:-1]
# indices = []
# for prob in preds:
# indices.append([prob['pctr'], prob['pcvr'], prob['pctcvr']])
# return indices
def trans(x): def trans(x):
return str(x)[2:-1] if str(x)[0] == 'b' else x return str(x)[2:-1] if str(x)[0] == 'b' else x
...@@ -208,22 +193,49 @@ def set_join(lst): ...@@ -208,22 +193,49 @@ def set_join(lst):
if __name__ == "__main__": if __name__ == "__main__":
if sys.argv[1] == "nearby": if sys.argv[1] == "native":
b = time.time() b = time.time()
print("infer native task")
path = "hdfs://172.16.32.4:8020/strategy/esmm/" path = "hdfs://172.16.32.4:8020/strategy/esmm/"
# df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000") # df = spark.read.format("tfrecords").load(path+"test_native/part-r-00000")
# df.show() # df.show()
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_native/part-r-00000"]
print("dist predict native")
print("耗时(秒):")
print((time.time()-b))
if sys.argv[1] == "nearby":
print("infer nearby task")
b = time.time()
path = "hdfs://172.16.32.4:8020/strategy/esmm/"
# df = spark.read.format("tfrecords").load(path+"test_nearby/part-r-00000")
# df.show()
te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"] te_files = ["hdfs://172.16.32.4:8020/strategy/esmm/test_nearby/part-r-00000"]
print("-" * 100) result = main(te_files)
indices = main(te_files) df = pd.DataFrame(result, columns=["uid", "city", "cid_id", "pctcvr"])
print(indices[0]) df.head(10)
host = '172.16.40.158'
port = 4000
user = 'root'
password = '3SYz54LS9#^9sBvC'
db = 'jerry_test'
charset = 'utf8'
print("耗时(min):")
print((time.time()-b)/60)
print("耗时(秒):")
print((time.time()-b))
else:
print("hello")
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment