Commit 881af7c0 authored by 张彦钊's avatar 张彦钊

修复字典取值异常问题

parent 3113c918
......@@ -212,9 +212,11 @@ def feature_engineer():
app_list_func(x[5], leve3_map), app_list_func(x[6], leve2_map), app_list_func(x[7], leve2_map),
app_list_func(x[8], leve2_map), app_list_func(x[9], leve2_map), app_list_func(x[10], leve2_map),
app_list_func(x[11], leve2_map), app_list_func(x[12], leve2_map),
[value_map[x[0]], value_map[x[13]], value_map[x[14]], value_map[x[15]], value_map[x[16]],
value_map[x[17]], value_map[x[18]], value_map[x[19]], value_map[x[20]], value_map[x[21]],
value_map[x[22]], value_map[x[23]], value_map[x[24]], value_map[x[25]], value_map[x[26]]]))
[value_map.get(x[0],1), value_map.get(x[13],2), value_map.get(x[14],3), value_map.get(x[15],4),
value_map.get(x[16],5),value_map.get(x[17],6), value_map.get(x[18],7), value_map.get(x[19],8),
value_map.get(x[20],9), value_map.get(x[21],10),
value_map.get(x[22],11), value_map.get(x[23],12), value_map.get(x[24],13),
value_map.get(x[25],14), value_map.get(x[26],15)]))
rdd.persist(storageLevel= StorageLevel.MEMORY_ONLY_SER)
......
......@@ -322,8 +322,8 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
.map(lambda x: (x[1],x[2],x[6],x[7],x[8],x[9],x[10],x[11],
x[12],x[13],x[14],x[15],x[16],x[17],x[3],x[4],x[5]))) \
.toDF("y","z","app_list", "level2_list", "level3_list","tag1_list", "tag2_list", "tag3_list", "tag4_list",
"tag5_list", "tag6_list", "tag7_list", "ids","number","city","uid","cid_id").repartition(100).write.format("tfrecords") \
.save(path=path+"test_native/", mode="overwrite")
"tag5_list", "tag6_list", "tag7_list", "ids","number","city","uid","cid_id")\
.repartition(100).write.format("tfrecords").save(path=path+"test_native/", mode="overwrite")
print("native tfrecord done")
h = time.time()
print((h-f)/60)
......@@ -334,12 +334,11 @@ def get_predict(date,value_map,app_list_map,leve2_map,leve3_map):
# nearby_pre.toPandas().to_csv(local_path + "nearby.csv", header=True)
spark.createDataFrame(rdd.filter(lambda x: x[0] == 1)
.map(
lambda x: (x[1], x[2], x[6], x[7], x[8], x[9], x[10], x[11], x[12],
x[13], x[14], x[15], x[16],x[17],x[3],x[4],x[5]))) \
.map(lambda x: (x[1], x[2], x[6], x[7], x[8], x[9], x[10], x[11],
x[12],x[13], x[14], x[15], x[16],x[17],x[3],x[4],x[5]))) \
.toDF("y", "z", "app_list", "level2_list", "level3_list", "tag1_list", "tag2_list", "tag3_list", "tag4_list",
"tag5_list", "tag6_list", "tag7_list", "ids","number","city","uid","cid_id").repartition(100).write.format("tfrecords") \
.save(path=path + "test_nearby/", mode="overwrite")
"tag5_list", "tag6_list", "tag7_list", "ids","number","city","uid","cid_id")\
.repartition(100).write.format("tfrecords").save(path=path + "test_nearby/", mode="overwrite")
print("nearby tfrecord done")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment