Commit e2d3099b authored by 宋柯's avatar 宋柯

模型上线

parent a31fe430
import tensorflow.compat.v1 as tf
tf.logging.set_verbosity(tf.logging.INFO)
DATA_DIR = '/data/files/wideAndDeep/trainData/'
def input_fn(csv_path, epoch, shuffle, batch_size):
dataset = tf.data.TextLineDataset(csv_path)
def parse_line(line_tensor):
splits = tf.compat.v1.string_split([line_tensor], delimiter='|', skip_empty=False).values
return {
'ITEM_CATEGORY_card_id': splits[0],
'USER_CATEGORY_device_id': splits[2],
'USER_CATEGORY_os': splits[3],
'USER_CATEGORY_user_city_id': splits[4],
'USER_MULTI_CATEGORY_second_solutions': tf.compat.v1.string_split([splits[6]], delimiter=',').values,
'USER_MULTI_CATEGORY_second_demands': tf.compat.v1.string_split([splits[7]], delimiter=',').values,
'USER_MULTI_CATEGORY_second_positions': tf.compat.v1.string_split([splits[8]], delimiter=',').values,
'USER_MULTI_CATEGORY_projects': tf.compat.v1.string_split([splits[9]], delimiter=',').values,
'ITEM_NUMERIC_click_count_sum': tf.compat.v1.string_to_number(splits[10]),
'ITEM_NUMERIC_click_count_avg': tf.compat.v1.string_to_number(splits[11]),
'ITEM_NUMERIC_click_count_stddev': tf.compat.v1.string_to_number(splits[12]),
'ITEM_NUMERIC_exp_count_sum': tf.compat.v1.string_to_number(splits[13]),
'ITEM_NUMERIC_exp_count_avg': tf.compat.v1.string_to_number(splits[14]),
'ITEM_NUMERIC_exp_count_stddev': tf.compat.v1.string_to_number(splits[15]),
'ITEM_NUMERIC_discount': tf.compat.v1.string_to_number(splits[16]),
'ITEM_NUMERIC_case_count': tf.compat.v1.string_to_number(splits[17]),
'ITEM_NUMERIC_sales_count': tf.compat.v1.string_to_number(splits[18]),
'ITEM_CATEGORY_service_type': splits[19],
'ITEM_CATEGORY_merchant_id': splits[20],
'ITEM_CATEGORY_doctor_type': splits[21],
'ITEM_CATEGORY_doctor_id': splits[22],
'ITEM_CATEGORY_doctor_famous': splits[23],
'ITEM_CATEGORY_hospital_id': splits[24],
'ITEM_CATEGORY_hospital_city_tag_id': splits[25],
'ITEM_CATEGORY_hospital_type': splits[26],
'ITEM_CATEGORY_hospital_is_high_quality': splits[27],
'ITEM_MULTI_CATEGORY_second_demands': tf.compat.v1.string_split([splits[28]], delimiter=',').values,
'ITEM_MULTI_CATEGORY_second_solutions': tf.compat.v1.string_split([splits[29]],
delimiter=',').values,
'ITEM_MULTI_CATEGORY_second_positions': tf.compat.v1.string_split([splits[30]],
delimiter=',').values,
'ITEM_MULTI_CATEGORY_projects': tf.compat.v1.string_split([splits[31]], delimiter=',').values,
'ITEM_NUMERIC_sku_price': tf.compat.v1.string_to_number(splits[32]),
# 'label': tf.compat.v1.string_to_number(splits[5])
}, tf.compat.v1.string_to_number(splits[5])
padded_shapes = ({'ITEM_CATEGORY_card_id': (), 'USER_CATEGORY_device_id': (), 'USER_CATEGORY_os': (),
'USER_CATEGORY_user_city_id': (), 'USER_MULTI_CATEGORY_second_solutions': [-1],
'USER_MULTI_CATEGORY_second_demands': [-1], 'USER_MULTI_CATEGORY_second_positions': [-1],
'USER_MULTI_CATEGORY_projects': [-1], 'ITEM_NUMERIC_click_count_sum': (),
'ITEM_NUMERIC_click_count_avg': (), 'ITEM_NUMERIC_click_count_stddev': (),
'ITEM_NUMERIC_exp_count_sum': (), 'ITEM_NUMERIC_exp_count_avg': (),
'ITEM_NUMERIC_exp_count_stddev': (), 'ITEM_NUMERIC_discount': (), 'ITEM_NUMERIC_case_count': (),
'ITEM_NUMERIC_sales_count': (), 'ITEM_CATEGORY_service_type': (), 'ITEM_CATEGORY_merchant_id': (),
'ITEM_CATEGORY_doctor_type': (), 'ITEM_CATEGORY_doctor_id': (), 'ITEM_CATEGORY_doctor_famous': (),
'ITEM_CATEGORY_hospital_id': (), 'ITEM_CATEGORY_hospital_city_tag_id': (),
'ITEM_CATEGORY_hospital_type': (), 'ITEM_CATEGORY_hospital_is_high_quality': (),
'ITEM_MULTI_CATEGORY_second_demands': [-1], 'ITEM_MULTI_CATEGORY_second_solutions': [-1],
'ITEM_MULTI_CATEGORY_second_positions': [-1], 'ITEM_MULTI_CATEGORY_projects': [-1],
'ITEM_NUMERIC_sku_price': ()}, ())
padding_values = ({'ITEM_CATEGORY_card_id': '-1', 'USER_CATEGORY_device_id': '-1', 'USER_CATEGORY_os': '-1',
'USER_CATEGORY_user_city_id': '-1', 'USER_MULTI_CATEGORY_second_solutions': '-1',
'USER_MULTI_CATEGORY_second_demands': '-1', 'USER_MULTI_CATEGORY_second_positions': '-1',
'USER_MULTI_CATEGORY_projects': '-1', 'ITEM_NUMERIC_click_count_sum': 0.0,
'ITEM_NUMERIC_click_count_avg': 0.0, 'ITEM_NUMERIC_click_count_stddev': 0.0,
'ITEM_NUMERIC_exp_count_sum': 0.0, 'ITEM_NUMERIC_exp_count_avg': 0.0,
'ITEM_NUMERIC_exp_count_stddev': 0.0, 'ITEM_NUMERIC_discount': 0.0,
'ITEM_NUMERIC_case_count': 0.0, 'ITEM_NUMERIC_sales_count': 0.0,
'ITEM_CATEGORY_service_type': '-1', 'ITEM_CATEGORY_merchant_id': '-1',
'ITEM_CATEGORY_doctor_type': '-1', 'ITEM_CATEGORY_doctor_id': '-1',
'ITEM_CATEGORY_doctor_famous': '-1', 'ITEM_CATEGORY_hospital_id': '-1',
'ITEM_CATEGORY_hospital_city_tag_id': '-1', 'ITEM_CATEGORY_hospital_type': '-1',
'ITEM_CATEGORY_hospital_is_high_quality': '-1', 'ITEM_MULTI_CATEGORY_second_demands': '-1',
'ITEM_MULTI_CATEGORY_second_solutions': '-1', 'ITEM_MULTI_CATEGORY_second_positions': '-1',
'ITEM_MULTI_CATEGORY_projects': '-1', 'ITEM_NUMERIC_sku_price': 0.0}, 0.0)
dataset = dataset.map(parse_line, num_parallel_calls = 8).cache()
dataset = dataset.padded_batch(batch_size, padded_shapes, padding_values=padding_values)
if shuffle:
dataset = dataset.shuffle(2048)
return dataset.prefetch(512 * 100).repeat(epoch)
dataset = input_fn(DATA_DIR + 'eval_samples.csv', 1, False, 2 ** 15)
import time
iter1 = dataset.make_one_shot_iterator()
try:
start = time.time()
while True:
iter1.get_next()
except:
print(time.time() - start)
iter2 = dataset.make_one_shot_iterator()
try:
start = time.time()
while True:
iter2.get_next()
except:
print(time.time() - start)
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment