Commit c7664788 authored by 张彦钊's avatar 张彦钊

change test file

parent 6b6c865e
......@@ -212,9 +212,11 @@ def feature_engineer():
app_list_func(x[5], leve3_map), app_list_func(x[6], leve2_map), app_list_func(x[7], leve2_map),
app_list_func(x[8], leve2_map), app_list_func(x[9], leve2_map), app_list_func(x[10], leve2_map),
app_list_func(x[11], leve2_map), app_list_func(x[12], leve2_map),
[value_map[x[0]], value_map[x[13]], value_map[x[14]], value_map[x[15]], value_map[x[16]],
value_map[x[17]], value_map[x[18]], value_map[x[19]], value_map[x[20]], value_map[x[21]],
value_map[x[22]], value_map[x[23]], value_map[x[24]], value_map[x[25]], value_map[x[26]]]))\
[value_map.get(x[0], 1), value_map.get(x[13], 2), value_map.get(x[14], 3), value_map.get(x[15], 4),
value_map.get(x[16], 5), value_map.get(x[17], 6), value_map.get(x[18], 7), value_map.get(x[19], 8),
value_map.get(x[20], 9), value_map.get(x[21], 10),
value_map.get(x[22], 11), value_map.get(x[23], 12), value_map.get(x[24], 13),
value_map.get(x[25], 14), value_map.get(x[26], 15)]))\
.zipWithIndex().map(lambda x:(x[0][0],x[0][1],x[0][2],x[0][3],x[0][4],x[0][5],x[0][6],x[0][7],x[0][8],
x[0][9],x[0][10],x[0][11],x[0][12],x[0][13],
x[1]))
......
......@@ -362,39 +362,6 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
.repartition(1).write.format("tfrecords").save(path=path + "nearby/", mode="overwrite")
print("nearby tfrecord done")
def doris():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select max(stat_date) from esmm_train_data_dwell"
validate_date = con_sql(db, sql)[0].values.tolist()[0]
print("validate_date:" + validate_date)
temp = datetime.datetime.strptime(validate_date, "%Y-%m-%d")
start = (temp - datetime.timedelta(days=3)).strftime("%Y-%m-%d")
print(start)
# sql = "select e.y,e.z,e.stat_date,e.ucity_id,doris.search_tag2,doris.search_tag3 " \
# "from jerry_test.esmm_train_data_dwell e " \
# "left join jerry_test.search_doris doris on e.device_id = doris.device_id " \
# "and e.stat_date = doris.get_date " \
# "where e.stat_date >= '{}'".format(start)
#
# df = spark.sql(sql)
# print(df.count())
# sql = "select y,z,stat_date,ucity_id from jerry_test.esmm_train_data_dwell " \
# "where stat_date >= '{}'".format(start)
#
# df = spark.sql(sql)
# print(df.count())
sql = "select e.y,e.z,e.label,e.ucity_id,doris.search_tag2,doris.search_tag3 " \
"from jerry_test.esmm_pre_data e " \
"left join jerry_test.search_doris doris on e.device_id = doris.device_id " \
"and e.stat_date = doris.get_date"
df = spark.sql(sql)
print(df.count())
sql = "select y,z from jerry_test.esmm_pre_data"
df = spark.sql(sql)
print(df.count())
if __name__ == '__main__':
sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment