Commit 73a98c5e authored by 宋柯's avatar 宋柯

模型调试

parent 51667ba2
......@@ -954,6 +954,7 @@ if __name__ == '__main__':
.drop("timestamp")
# | -- ITEM_CATEGORY_card_id: string(nullable=false)
# | -- partition_date: string(nullable=true)
# | -- USER_CATEGORY_device_id: string(nullable=false)
# | -- USER_CATEGORY_os: string(nullable=false)
# | -- USER_CATEGORY_user_city_id: string(nullable=false)
......@@ -980,10 +981,10 @@ if __name__ == '__main__':
# | -- ITEM_CATEGORY_hospital_city_tag_id: string(nullable=false)
# | -- ITEM_CATEGORY_hospital_type: string(nullable=false)
# | -- ITEM_CATEGORY_hospital_is_high_quality: string(nullable=false)
# | -- ITEM_CATEGORY_second_demands: string(nullable=false)
# | -- ITEM_CATEGORY_second_solutions: string(nullable=false)
# | -- ITEM_CATEGORY_second_positions: string(nullable=false)
# | -- ITEM_CATEGORY_projects: string(nullable=false)
# | -- ITEM_MULTI_CATEGORY_second_demands: string(nullable=false)
# | -- ITEM_MULTI_CATEGORY_second_solutions: string(nullable=false)
# | -- ITEM_MULTI_CATEGORY_second_positions: string(nullable=false)
# | -- ITEM_MULTI_CATEGORY_projects: string(nullable=false)
# | -- ITEM_NUMERIC_sku_price: double(nullable=false)
#
fields = [field.name for field in samples.schema.fields]
......@@ -1015,7 +1016,7 @@ if __name__ == '__main__':
write_time_start = time.time()
for categoty_field in categoty_fields:
output_file = "file:///home/gmuser/" + categoty_field + "_vocab"
train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
train_samples.select(categoty_field).where(F.col(categoty_field) != '-1').where(F.col(categoty_field) != '').distinct().write.mode("overwrite").options(header="false").csv(output_file)
for multi_categoty_field in multi_categoty_fields:
output_file = "file:///home/gmuser/" + multi_categoty_field + "_vocab"
train_samples.selectExpr("explode(split({multi_categoty_field},','))".format(multi_categoty_field = multi_categoty_field)).where(F.col(multi_categoty_field) != '-1').distinct().write.mode("overwrite").options(header="false").csv(output_file)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment