Commit 781e81ef authored by 赵威's avatar 赵威

try get data

parent 6412f7e3
This diff is collapsed.
......@@ -2,10 +2,28 @@ import os
import sys
sys.path.append(os.path.realpath("."))
import pandas as pd
from utils.date import get_ndays_before, get_ndays_before_no_minus
from utils.es import es_scan
from utils.spark import get_spark
from utils.files import save_df_to_csv
from utils.spark import (get_click_data, get_device_tags, get_exposure_data, get_spark)
if __name__ == "__main__":
print(get_ndays_before(10))
spark = get_spark("personas_vector_data")
card_type = "user_post"
days = 5 # TODO days 30
start, end = get_ndays_before_no_minus(days), get_ndays_before_no_minus(1)
click_df = get_click_data(spark, card_type, start, end)
# save_df_to_csv(click_df, "tractate_click.csv")
print(click_df.shape)
exposure_df = get_exposure_data(spark, card_type, start, end)
# save_df_to_csv(exposure_df, "tractate_exposure.csv")
print(exposure_df.shape)
device_feature_df = get_device_tags(spark)
# save_df_to_csv(device_feature_df, "device_feature.csv")
print(device_feature_df.shape)
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/personas_vector/get_data.py
import os
def remove_file(path):
try:
os.remove(path)
except Exception as e:
print(e)
def save_df_to_csv(df, file):
print(df.head(3))
base_dir = os.getcwd()
data_dir = os.path.join(base_dir, "_data")
full_path = os.path.join(data_dir, file)
remove_file(full_path)
df.to_csv(full_path, sep="|", index=False)
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment