Commit 732bf4b4 authored by 赵威's avatar 赵威

tag number for tractate

parent a522c51d
...@@ -93,6 +93,13 @@ TRACTATE_COLUMNS = [ ...@@ -93,6 +93,13 @@ TRACTATE_COLUMNS = [
"first_positions", "first_positions",
"second_positions", "second_positions",
"projects", "projects",
"first_demands_num",
"second_demands_num",
"first_solutions_num",
"second_solutions_num",
"first_positions_num",
"second_positions_num",
"projects_num",
] ]
INT_COLUMNS = [ INT_COLUMNS = [
...@@ -147,6 +154,13 @@ INT_COLUMNS = [ ...@@ -147,6 +154,13 @@ INT_COLUMNS = [
"sixty_browse_user_num", "sixty_browse_user_num",
"ninety_browse_user_num", "ninety_browse_user_num",
"history_browse_user_num", "history_browse_user_num",
"first_demands_num",
"second_demands_num",
"first_solutions_num",
"second_solutions_num",
"first_positions_num",
"second_positions_num",
"projects_num",
] ]
FLOAT_COLUMNS = [ FLOAT_COLUMNS = [
"one_ctr", "one_ctr",
...@@ -210,6 +224,7 @@ def get_tractate_dict_from_redis(): ...@@ -210,6 +224,7 @@ def get_tractate_dict_from_redis():
"second_positions", "projects" "second_positions", "projects"
]: ]:
tmp[col_name] = elem.split(",") tmp[col_name] = elem.split(",")
tmp[col_name + "_num"] = len(tmp[col_name])
elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]: elif col_name in ["is_pure_author", "is_have_pure_reply", "is_have_reply"]:
if elem == "true": if elem == "true":
tmp[col_name] = 1 tmp[col_name] = 1
...@@ -240,6 +255,14 @@ def tractate_feature_engineering(tractate_df): ...@@ -240,6 +255,14 @@ def tractate_feature_engineering(tractate_df):
df["second_positions"] = df["second_positions"].apply(lambda d: d if isinstance(d, list) else []) df["second_positions"] = df["second_positions"].apply(lambda d: d if isinstance(d, list) else [])
df["projects"] = df["projects"].apply(lambda d: d if isinstance(d, list) else []) df["projects"] = df["projects"].apply(lambda d: d if isinstance(d, list) else [])
df["first_demands_num"] = df["first_demands"].apply(lambda d: len(d))
df["second_demands_num"] = df["second_demands"].apply(lambda d: len(d))
df["first_solutions_num"] = df["first_solutions"].apply(lambda d: len(d))
df["second_solutions_num"] = df["second_solutions"].apply(lambda d: len(d))
df["first_positions_num"] = df["first_positions"].apply(lambda d: len(d))
df["second_positions_num"] = df["second_positions"].apply(lambda d: len(d))
df["projects_num"] = df["projects"].apply(lambda d: len(d))
df["is_pure_author"] = df["is_pure_author"].astype(int) df["is_pure_author"] = df["is_pure_author"].astype(int)
df["is_have_pure_reply"] = df["is_have_pure_reply"].astype(int) df["is_have_pure_reply"] = df["is_have_pure_reply"].astype(int)
df["is_have_reply"] = df["is_have_reply"].astype(int) df["is_have_reply"] = df["is_have_reply"].astype(int)
......
...@@ -58,7 +58,8 @@ def main(): ...@@ -58,7 +58,8 @@ def main():
estimator_config = tf.estimator.RunConfig(session_config=session_config) estimator_config = tf.estimator.RunConfig(session_config=session_config)
model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config) model = tf.estimator.Estimator(model_fn=esmm_model_fn, params=params, model_dir=model_path, config=estimator_config)
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=50000) # TODO 50000
train_spec = tf.estimator.TrainSpec(input_fn=lambda: esmm_input_fn(train_df, shuffle=True), max_steps=20000)
eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False)) eval_spec = tf.estimator.EvalSpec(input_fn=lambda: esmm_input_fn(val_df, shuffle=False))
res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec) res = tf.estimator.train_and_evaluate(model, train_spec, eval_spec)
print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@") print("@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@")
...@@ -70,7 +71,8 @@ def main(): ...@@ -70,7 +71,8 @@ def main():
model_export_path = str(Path("/data/files/models/tractate/").expanduser()) model_export_path = str(Path("/data/files/models/tractate/").expanduser())
save_path = model_export(model, all_features, model_export_path) save_path = model_export(model, all_features, model_export_path)
print("save to: " + save_path) print("save to: " + save_path)
set_essm_model_save_path("tractate", save_path) # TODO save model
# set_essm_model_save_path("tractate", save_path)
print("============================================================") print("============================================================")
# # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local # # save_path = str(Path("~/data/models/tractate/1596089465").expanduser()) # local
...@@ -84,9 +86,9 @@ def main(): ...@@ -84,9 +86,9 @@ def main():
device_ids = list(device_dict.keys())[:20] device_ids = list(device_dict.keys())[:20]
tractate_ids = list(tractate_dict.keys()) tractate_ids = list(tractate_dict.keys())
# print(len(device_dict), len(tractate_dict), "\n") # TODO printer
# print(device_dict[device_ids[0]], "\n") # print(device_dict[device_ids[0]], "\n")
# print(tractate_dict[tractate_ids[0]], "\n") print(tractate_dict[tractate_ids[0]], "\n")
for i in range(5): for i in range(5):
time_1 = timeit.default_timer() time_1 = timeit.default_timer()
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment