Commit 8029d31e authored by 张彦钊's avatar 张彦钊

change test file

parent 3324aab1
...@@ -358,6 +358,36 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map): ...@@ -358,6 +358,36 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
.repartition(100).write.format("tfrecords").save(path=path + "test_nearby/", mode="overwrite") .repartition(100).write.format("tfrecords").save(path=path + "test_nearby/", mode="overwrite")
print("nearby tfrecord done") print("nearby tfrecord done")
def doris():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from esmm_train_data_dwell"
validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
print(start)
sql = "select e.y,e.z,e.stat_date,e.ucity_id,doris.search_tag2,doris.search_tag3 " \
"from jerry_test.esmm_train_data_dwell e " \
"left join jerry_test.search_doris doris on e.device_id = doris.device_id " \
"and e.stat_date = doris.get_date " \
"where e.stat_date >= '{}'".format(start)
df = spark.sql(sql)
print(df.count())
sql = "select y,z,stat_date,ucity_id from jerry_test.esmm_train_data_dwell " \
"where stat_date >= '{}'".format(start)
df = spark.sql(sql)
print(df.count())
sql = "select e.y,e.z,e.label,e.ucity_id,doris.search_tag2,doris.search_tag3 " \
"from jerry_test.esmm_pre_data e " \
"left join jerry_test.search_doris doris on e.device_id = doris.device_id " \
"and e.stat_date = doris.get_date " \
"where doris.get_date = '{}'".format(validate_date)
df = spark.sql(sql)
print(df.count())
if __name__ == '__main__': if __name__ == '__main__':
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \ sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
...@@ -376,8 +406,9 @@ if __name__ == '__main__': ...@@ -376,8 +406,9 @@ if __name__ == '__main__':
path = "hdfs:///strategy/esmm/" path = "hdfs:///strategy/esmm/"
local_path = "/home/gmuser/esmm/" local_path = "/home/gmuser/esmm/"
validate_date, value_map, app_list_map, leve2_map, leve3_map = feature_engineer() # validate_date, value_map, app_list_map, leve2_map, leve3_map = feature_engineer()
get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map) # get_predict(validate_date, value_map, app_list_map, leve2_map, leve3_map)
doris()
spark.stop() spark.stop()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment