Commit dd8422f9 authored by litaolemo's avatar litaolemo

update

parent 2daf5720
......@@ -3,6 +3,7 @@
# @File : func_from_es_get_article.py
# @email : litao@igengmei.com
# @author : litao
import pymysql
from elasticsearch import Elasticsearch
exists_es_dic = {}
es = Elasticsearch([
......@@ -14,6 +15,26 @@ es = Elasticsearch([
'port': 9200,
}])
def con_sql_jerry_prod(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
db='jerry_prod')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def con_sql_doris_prod(sql):
# 从数据库的表里获取数据
db = pymysql.connect(host='172.16.30.136', port=3306, user='doris', passwd='o5gbA27hXHHm',
db='doris_prod')
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchall()
db.close()
return result
def get_device_num_from_es(word):
......@@ -162,11 +183,19 @@ def get_device_num_from_es(word):
tractate_content_num = results["hits"]["total"]
return tractate_content_num
def get_tractate_tags_from_es(doc_id):
def get_es_article_num(tag_dict, allow_tag=["first_demands", "second_demands", "first_solutions", "second_solutions",
"positions", "second_positions", "tags_v3"]):
# {tag_name:(answer_content_num, tractate_content_num, diary_content_num, total_num)}
# tractate
results = es.get_source(
index='gm-dbmw-tractate-read',
doc_type='tractate',
timeout='10s',
size=0,
id=doc_id
)
return results
def get_es_article_num(tag_dict, allow_tag=["first_demands", "second_demands", "first_solutions", "second_solutions","positions", "second_positions", "tags_v3"]):
article_dict = {
"first_demands": [],
"second_demands": [],
......@@ -291,3 +320,25 @@ def get_es_article_num(tag_dict, allow_tag=["first_demands", "second_demands", "
article_dict[tag_type].append(data_dic)
return article_dict
def get_user_post_from_mysql():
data_tag_count = {}
second_demands_count_dict = {}
tags_v3_count_dict = {}
sql = """
select card_id from strategy_content_exposure_index where card_content_type="user_post" and preciseexposure_num>=50 and ctr>=0.05 and avg_page_stay>=20;
"""
sql_res = con_sql_doris_prod(sql)
for card_id in sql_res:
es_res = get_tractate_tags_from_es(card_id)
for position in es_res["_source"]["second_demands"]:
if position in second_demands_count_dict:
second_demands_count_dict[position] += 1
else:
second_demands_count_dict[position] = 1
for tag in es_res["_source"]["tags_v3"]:
if tag in tags_v3_count_dict:
tags_v3_count_dict[tag] += 1
else:
tags_v3_count_dict[tag] = 1
return second_demands_count_dict,tags_v3_count_dict
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment