Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
宋柯
meta_base_code
Commits
dd8422f9
Commit
dd8422f9
authored
Sep 17, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
2daf5720
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
54 additions
and
3 deletions
+54
-3
func_from_es_get_article.py
utils/func_from_es_get_article.py
+54
-3
No files found.
utils/func_from_es_get_article.py
View file @
dd8422f9
...
...
@@ -3,6 +3,7 @@
# @File : func_from_es_get_article.py
# @email : litao@igengmei.com
# @author : litao
import
pymysql
from
elasticsearch
import
Elasticsearch
exists_es_dic
=
{}
es
=
Elasticsearch
([
...
...
@@ -14,6 +15,26 @@ es = Elasticsearch([
'port'
:
9200
,
}])
def
con_sql_jerry_prod
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
con_sql_doris_prod
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.30.136'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
db
.
close
()
return
result
def
get_device_num_from_es
(
word
):
...
...
@@ -162,11 +183,19 @@ def get_device_num_from_es(word):
tractate_content_num
=
results
[
"hits"
][
"total"
]
return
tractate_content_num
def
get_tractate_tags_from_es
(
doc_id
):
def
get_es_article_num
(
tag_dict
,
allow_tag
=
[
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"positions"
,
"second_positions"
,
"tags_v3"
]):
# {tag_name:(answer_content_num, tractate_content_num, diary_content_num, total_num)}
# tractate
results
=
es
.
get_source
(
index
=
'gm-dbmw-tractate-read'
,
doc_type
=
'tractate'
,
timeout
=
'10s'
,
size
=
0
,
id
=
doc_id
)
return
results
def
get_es_article_num
(
tag_dict
,
allow_tag
=
[
"first_demands"
,
"second_demands"
,
"first_solutions"
,
"second_solutions"
,
"positions"
,
"second_positions"
,
"tags_v3"
]):
article_dict
=
{
"first_demands"
:
[],
"second_demands"
:
[],
...
...
@@ -291,3 +320,25 @@ def get_es_article_num(tag_dict, allow_tag=["first_demands", "second_demands", "
article_dict
[
tag_type
]
.
append
(
data_dic
)
return
article_dict
def
get_user_post_from_mysql
():
data_tag_count
=
{}
second_demands_count_dict
=
{}
tags_v3_count_dict
=
{}
sql
=
"""
select card_id from strategy_content_exposure_index where card_content_type="user_post" and preciseexposure_num>=50 and ctr>=0.05 and avg_page_stay>=20;
"""
sql_res
=
con_sql_doris_prod
(
sql
)
for
card_id
in
sql_res
:
es_res
=
get_tractate_tags_from_es
(
card_id
)
for
position
in
es_res
[
"_source"
][
"second_demands"
]:
if
position
in
second_demands_count_dict
:
second_demands_count_dict
[
position
]
+=
1
else
:
second_demands_count_dict
[
position
]
=
1
for
tag
in
es_res
[
"_source"
][
"tags_v3"
]:
if
tag
in
tags_v3_count_dict
:
tags_v3_count_dict
[
tag
]
+=
1
else
:
tags_v3_count_dict
[
tag
]
=
1
return
second_demands_count_dict
,
tags_v3_count_dict
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment