Commit 16ec9503 authored by 赵威's avatar 赵威

add column names

parent 13456735
...@@ -18,13 +18,13 @@ if __name__ == "__main__": ...@@ -18,13 +18,13 @@ if __name__ == "__main__":
days = 5 # TODO days 30 days = 5 # TODO days 30
start, end = get_ndays_before_no_minus(days), get_ndays_before_no_minus(1) start, end = get_ndays_before_no_minus(days), get_ndays_before_no_minus(1)
click_df = get_click_data(spark, card_type, start, end) # click_df = get_click_data(spark, card_type, start, end)
save_df_to_csv(click_df, "personas_tractate_click.csv") # save_df_to_csv(click_df, "personas_tractate_click.csv")
print(click_df.shape) # print(click_df.shape)
exposure_df = get_exposure_data(spark, card_type, start, end) # exposure_df = get_exposure_data(spark, card_type, start, end)
save_df_to_csv(exposure_df, "personas_tractate_exposure.csv") # save_df_to_csv(exposure_df, "personas_tractate_exposure.csv")
print(exposure_df.shape) # print(exposure_df.shape)
device_feature_df = get_device_tags(spark) device_feature_df = get_device_tags(spark)
save_df_to_csv(device_feature_df, "personas_device_feature.csv") save_df_to_csv(device_feature_df, "personas_device_feature.csv")
......
...@@ -4,6 +4,7 @@ import sys ...@@ -4,6 +4,7 @@ import sys
sys.path.append(os.path.realpath(".")) sys.path.append(os.path.realpath("."))
import pandas as pd import pandas as pd
from gensim.models import Word2Vec, word2vec
from utils.defs import nth_element from utils.defs import nth_element
from utils.files import get_df from utils.files import get_df
...@@ -17,23 +18,23 @@ TRACTATE_COLUMNS = [ ...@@ -17,23 +18,23 @@ TRACTATE_COLUMNS = [
def device_tractae_fe(): def device_tractae_fe():
click_df = get_df("personas_tractate_click.csv") pass
exposure_df = get_df("personas_tractate_exposure.csv")
device_fe_df = get_df("personas_device_feature.csv")
tractate_tags_df = get_df("personas_tractate_tags.csv")
print(click_df.shape)
print(exposure_df.shape)
print(device_fe_df.shape)
print(tractate_tags_df.shape)
if __name__ == "__main__": if __name__ == "__main__":
click_df = get_df("personas_tractate_click.csv") device_fe_df = get_df("personas_device_feature.csv",
exposure_df = get_df("personas_tractate_exposure.csv") columns=[
device_fe_df = get_df("personas_device_feature.csv") "cl_id",
tractate_tags_df = get_df("personas_tractate_tags.csv") "first_demands",
print(click_df.shape) "first_solutions",
print(exposure_df.shape) "first_positions",
print(device_fe_df.shape) "second_demands",
print(tractate_tags_df.shape) "second_solutions",
"second_positions",
"projects",
"business_tags",
])
print(device_fe_df.head(3))
tractate_tags_df = get_df("personas_tractate_tags.csv", columns=["tractate_id", "business_tags"])
print(tractate_tags_df.head(3)) print(tractate_tags_df.head(3))
...@@ -32,8 +32,9 @@ def save_dict_to_csv(d, file): ...@@ -32,8 +32,9 @@ def save_dict_to_csv(d, file):
f.write("{}|{}\n".format(k, ",".join([str(x) for x in v]))) f.write("{}|{}\n".format(k, ",".join([str(x) for x in v])))
def get_df(file): def get_df(file, sep="|", columns=[]):
full_path = os.path.join(DATA_PATH, file) full_path = os.path.join(DATA_PATH, file)
print(full_path) print(full_path)
df = pd.read_csv(full_path, sep="|") df = pd.read_csv(full_path, sep="|", names=columns)
print(df.shape)
return df return df
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment