Commit 72c037c7 authored by litaolemo's avatar litaolemo

update

parent b796b6de
source /srv/envs/esmm/bin/activate
/opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 2 --num-executors 100 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/meta_base_code/task/search_strategy_d.py
/opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 4 --num-executors 120 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/meta_base_code/task/core_indicators_monitoring.py
/opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 2g --executor-cores 4 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/meta_base_code/task/core_indicators_monitoring.py
/opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 4 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/meta_base_code/task/advertisement_strategy_d.py
......
......@@ -11,6 +11,8 @@ import json
import pymysql
import xlwt, datetime
import redis
from meta_base_code.utils.func_from_redis_get_portrait import user_portrait_scan_info
from meta_base_code.utils.func_from_es_get_article import get_es_article_num
# from pyhive import hive
from maintenance.func_send_email_with_file import send_file_email
from typing import Dict, List
......@@ -38,101 +40,19 @@ def con_sql(sql):
return result
startTime = time.time()
sparkConf = SparkConf()
sparkConf.set("spark.sql.crossJoin.enabled", True)
sparkConf.set("spark.debug.maxToStringFields", "100")
sparkConf.set("spark.tispark.plan.allow_index_double_read", False)
sparkConf.set("spark.tispark.plan.allow_index_read", True)
sparkConf.set("spark.hive.mapred.supports.subdirectories", True)
sparkConf.set("spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive", True)
sparkConf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer")
sparkConf.set("mapreduce.output.fileoutputformat.compress", False)
sparkConf.set("mapreduce.map.output.compress", False)
sparkConf.set("prod.gold.jdbcuri",
"jdbc:mysql://172.16.30.136/doris_prod?user=doris&password=o5gbA27hXHHm&rewriteBatchedStatements=true")
sparkConf.set("prod.mimas.jdbcuri",
"jdbc:mysql://172.16.30.138/mimas_prod?user=mimas&password=GJL3UJe1Ck9ggL6aKnZCq4cRvM&rewriteBatchedStatements=true")
sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true")
sparkConf.set("prod.tidb.jdbcuri",
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.jerry.jdbcuri",
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.158:2379")
sparkConf.set("prod.tispark.pd.addresses", "172.16.40.170:4000")
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf.setAppName("search_tractate_ctr")
spark = (SparkSession.builder.config(conf=sparkConf).config("spark.sql.extensions", "org.apache.spark.sql.TiExtensions")
.config("spark.tispark.pd.addresses", "172.16.40.170:2379").enableHiveSupport().getOrCreate())
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/brickhouse-0.7.1-SNAPSHOT.jar")
spark.sql("ADD JAR hdfs:///user/hive/share/lib/udf/hive-udf-1.0-SNAPSHOT.jar")
spark.sql("CREATE TEMPORARY FUNCTION json_map AS 'brickhouse.udf.json.JsonMapUDF'")
spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJsonFormatCheck'")
spark.sql("CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'")
def user_portrait_scan_info():
return_dict = {}
try:
round = 0
all_count = 0
empty_count = 0
just_projects_count = 0
keys = "doris:user_portrait:tag3:device_id:*"
cur, results = redis_client2.scan(0, keys, 3000)
while cur != 0:
round += 1
print("round: " + str(round))
cur, results = redis_client2.scan(cur, keys, 3000)
for key in results:
key = key
device_id = key.split(":")[-1]
all_count += 1
# print(key)
# if user_portrait_is_empty(device_id):
# print(device_id)
# empty_count += 1
# if user_portrait_just_projects(device_id):
# print(device_id)
# just_projects_count += 1
# user_portrait_get_empty_candidates(device_id)
try:
res_dic = get_user_portrait_tag3_from_redis(device_id)
# print(res_dic)
for data_type in res_dic:
for tag in res_dic[data_type]:
if tag == "眼窝":
print(return_dict.get(tag))
if return_dict.get(tag):
return_dict[tag] = (data_type, return_dict[tag][1] + 1)
else:
return_dict[tag] = (data_type, 1)
except:
continue
# for data_list in res_dic:
# for data in data_list:
return return_dict
except Exception as e:
print(e)
return {}
task_list = []
task_days = 3
for t in range(1, task_days):
day_num = 0 - t
now = (datetime.datetime.now() + datetime.timedelta(days=day_num))
last_30_day_str = (now + datetime.timedelta(days=-30)).strftime("%Y%m%d")
today_str = now.strftime("%Y%m%d")
yesterday_str = (now + datetime.timedelta(days=-1)).strftime("%Y%m%d")
one_week_age_str = (now + datetime.timedelta(days=-7)).strftime("%Y%m%d")
user_portrait_scan = user_portrait_scan_info()
for res in user_portrait_scan:
second_demands = []
projects = []
if res.get("second_demands"):
second_demands = res.get("second_demands")
count_res = get_es_article_num({"second_demands": second_demands},allow_tag=["second_demands"])
print(count_res)
if res.get("projects"):
projects = res.get("projects")
count_res = get_es_article_num({"tags_v3": projects}, allow_tag=["tags_v3"])
print(count_res)
# for active_type in res_dict:
# for device_os_type in res_dict[active_type]:
......
This diff is collapsed.
......@@ -46,6 +46,7 @@ def get_user_portrait_tag3_from_redis(device_id, limit_score=0):
}
return {}
def user_portrait_scan_info():
return_dict = {}
try:
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment