Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
宋柯
meta_base_code
Commits
5f8dd1e7
Commit
5f8dd1e7
authored
Sep 10, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
5666e58f
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
25 additions
and
24 deletions
+25
-24
portary_div_exposure.py
utils/portary_div_exposure.py
+25
-24
No files found.
utils/portary_div_exposure.py
View file @
5f8dd1e7
...
...
@@ -6,7 +6,6 @@
import
datetime
import
json
import
traceback
import
redis
import
pymysql
from
elasticsearch
import
Elasticsearch
...
...
@@ -61,9 +60,9 @@ def user_portrait_scan_info():
for
data_type
in
res_dic
:
for
tag
in
res_dic
[
data_type
]:
if
return_dict
.
get
(
tag
):
return_dict
[
tag
]
=
(
data_type
,
return_dict
[
tag
][
1
]
+
1
)
return_dict
[
tag
]
=
(
data_type
,
return_dict
[
tag
][
1
]
+
1
)
else
:
return_dict
[
tag
]
=
(
data_type
,
1
)
return_dict
[
tag
]
=
(
data_type
,
1
)
except
:
continue
# for data_list in res_dic:
...
...
@@ -162,16 +161,16 @@ def get_device_num_from_es(word):
timeout
=
'10s'
,
size
=
0
,
body
=
{
"aggs"
:
{
"NAME"
:
{
"nested"
:
{
"path"
:
"projects"
},
"aggs"
:
{
"NAME1"
:
{
"terms"
:
{
"field"
:
"projects.name"
,
"size"
:
10000
}
}
"NAME"
:
{
"nested"
:
{
"path"
:
"projects"
},
"aggs"
:
{
"NAME1"
:
{
"terms"
:
{
"field"
:
"projects.name"
,
"size"
:
10000
}
}
}
}
}
}
)
tractate_content_num
=
results
[
"hits"
][
"total"
]
return
tractate_content_num
...
...
@@ -232,7 +231,7 @@ def get_es_article_num(tag_dict):
)
answer_content_num
=
results
[
"hits"
][
"total"
]
except
:
print
(
"answer has no
%
s"
%
tag_type
)
print
(
"answer has no
%
s"
%
tag_type
)
answer_content_num
=
0
body
=
{
...
...
@@ -330,9 +329,9 @@ def from_id_get_tag(card_id_dict):
index
=
""
doc_type
=
""
query_count
=
{
"diary"
:{},
"answer"
:{},
"tractate"
:{}
"diary"
:
{},
"answer"
:
{},
"tractate"
:
{}
}
for
card_type
in
card_id_dict
:
if
card_type
==
"diary"
:
...
...
@@ -345,26 +344,27 @@ def from_id_get_tag(card_id_dict):
index
=
'gm-dbmw-tractate-read'
doc_type
=
'tractate'
for
card_id
in
card_id_dict
[
card_type
]:
res
=
es
.
get_source
(
index
,
doc_type
,
card_id
)
res
=
es
.
get_source
(
index
,
doc_type
,
card_id
)
# print(res)
first_demands
=
res
.
get
(
"first_demands"
)
if
res
.
get
(
"first_demands"
)
else
[]
second_demands
=
res
.
get
(
"second_demands"
)
if
res
.
get
(
"second_demands"
)
else
[]
first_solutions
=
res
.
get
(
"first_solutions"
)
if
res
.
get
(
"first_solutions"
)
else
[]
second_solutions
=
res
.
get
(
"second_solutions"
)
if
res
.
get
(
"second_solutions"
)
else
[]
first_positions
=
res
.
get
(
"first_positions"
)
if
res
.
get
(
"
first_
positions"
)
else
[]
first_positions
=
res
.
get
(
"first_positions"
)
if
res
.
get
(
"positions"
)
else
[]
second_positions
=
res
.
get
(
"second_positions"
)
if
res
.
get
(
"second_positions"
)
else
[]
projects
=
res
.
get
(
"projects"
)
if
res
.
get
(
"
projects
"
)
else
[]
projects
=
res
.
get
(
"projects"
)
if
res
.
get
(
"
tags_v3
"
)
else
[]
word_count_list
=
first_demands
+
second_demands
+
first_solutions
+
second_solutions
+
first_positions
+
second_positions
+
projects
for
word
in
word_count_list
:
if
word
in
query_count
[
doc_type
]:
query_count
[
doc_type
][
word
]
=
(
doc_type
,
query_count
[
doc_type
][
word
][
1
]
+
1
)
query_count
[
doc_type
][
word
]
=
(
doc_type
,
query_count
[
doc_type
][
word
][
1
]
+
1
)
else
:
query_count
[
doc_type
][
word
]
=
(
doc_type
,
1
)
query_count
[
doc_type
][
word
]
=
(
doc_type
,
1
)
return
query_count
def
save_data_to_csv
(
user_portrait_dict
,
word_count_exposure
):
all_data
=
[(
"user_portrait"
,
"tag_type"
,
"user_portrait_count"
,
"diary_exposure"
,
"answer_exposure"
,
"tractate_exposure"
)]
def
save_data_to_csv
(
user_portrait_dict
,
word_count_exposure
):
all_data
=
[
(
"user_portrait"
,
"tag_type"
,
"user_portrait_count"
,
"diary_exposure"
,
"answer_exposure"
,
"tractate_exposure"
)]
for
tag
in
user_portrait_dict
:
data_type
=
""
data_count
=
""
...
...
@@ -380,9 +380,8 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
answer_exposure
=
word_count_exposure
[
"answer"
]
.
get
(
tag
)
if
word_count_exposure
[
"tractate"
]
.
get
(
tag
):
tractate_exposure
=
word_count_exposure
[
"tractate"
]
.
get
(
tag
)
all_data
.
append
((
data_type
,
data_count
,
diary_exposure
[
1
],
answer_exposure
[
1
],
tractate_exposure
[
1
]))
print
(
tag
,
all_data
[
-
1
])
all_data
.
append
((
tag
,
data_type
,
data_count
,
diary_exposure
[
1
],
answer_exposure
[
1
],
tractate_exposure
[
1
]))
print
(
tag
,
all_data
[
-
1
])
data
=
pd
.
DataFrame
(
all_data
)
s
=
datetime
.
datetime
.
now
()
...
...
@@ -391,6 +390,7 @@ def save_data_to_csv(user_portrait_dict,word_count_exposure):
# columns=columns
)
def
parse_data
():
demands_num
=
{}
# 获取画像数
...
...
@@ -407,7 +407,8 @@ def parse_data():
# 获取曝光id对应的标签
word_count_exposure
=
from_id_get_tag
(
card_id_dict
)
print
(
word_count_exposure
)
save_data_to_csv
(
user_portrait_dict
,
word_count_exposure
)
save_data_to_csv
(
user_portrait_dict
,
word_count_exposure
)
if
__name__
==
"__main__"
:
parse_data
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment