Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
宋柯
meta_base_code
Commits
187154b5
Commit
187154b5
authored
Sep 10, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
c06279c6
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
53 additions
and
23 deletions
+53
-23
portary_div_exposure.py
utils/portary_div_exposure.py
+53
-23
No files found.
utils/portary_div_exposure.py
View file @
187154b5
...
@@ -29,7 +29,7 @@ es = Elasticsearch([
...
@@ -29,7 +29,7 @@ es = Elasticsearch([
def
user_portrait_scan_info
():
def
user_portrait_scan_info
():
re
s
_dict
=
{}
re
turn
_dict
=
{}
try
:
try
:
round
=
0
round
=
0
all_count
=
0
all_count
=
0
...
@@ -45,7 +45,7 @@ def user_portrait_scan_info():
...
@@ -45,7 +45,7 @@ def user_portrait_scan_info():
key
=
key
key
=
key
device_id
=
key
.
split
(
":"
)[
-
1
]
device_id
=
key
.
split
(
":"
)[
-
1
]
all_count
+=
1
all_count
+=
1
print
(
key
)
#
print(key)
# if user_portrait_is_empty(device_id):
# if user_portrait_is_empty(device_id):
# print(device_id)
# print(device_id)
# empty_count += 1
# empty_count += 1
...
@@ -55,17 +55,24 @@ def user_portrait_scan_info():
...
@@ -55,17 +55,24 @@ def user_portrait_scan_info():
# just_projects_count += 1
# just_projects_count += 1
# user_portrait_get_empty_candidates(device_id)
# user_portrait_get_empty_candidates(device_id)
res_dic
=
get_user_portrait_tag3_from_redis
(
device_id
)
try
:
print
(
res_dic
)
res_dic
=
get_user_portrait_tag3_from_redis
(
device_id
)
print
(
res_dic
)
for
data_type
in
res_dic
:
for
tag
in
res_dic
[
data_type
]:
if
return_dict
.
get
(
tag
):
return_dict
[
tag
]
=
(
data_type
,
return_dict
[
tag
][
1
]
+
1
)
else
:
return_dict
[
tag
]
=
(
data_type
,
1
)
except
:
continue
# for data_list in res_dic:
# for data_list in res_dic:
# for data in data_list:
# for data in data_list:
return
return_dict
print
(
"all count: "
+
str
(
all_count
))
print
(
"empty portrait: "
+
str
(
empty_count
))
print
(
"just projects portrait: "
+
str
(
just_projects_count
))
except
Exception
as
e
:
except
Exception
as
e
:
print
(
e
)
print
(
e
)
return
{}
def
get_user_portrait_tag3_redis_key
(
device_id
):
def
get_user_portrait_tag3_redis_key
(
device_id
):
...
@@ -441,7 +448,11 @@ def get_data_by_mysql(host, port, user, passwd, db, sql):
...
@@ -441,7 +448,11 @@ def get_data_by_mysql(host, port, user, passwd, db, sql):
def
from_id_get_tag
(
card_id_dict
):
def
from_id_get_tag
(
card_id_dict
):
index
=
""
index
=
""
doc_type
=
""
doc_type
=
""
query_count
=
{}
query_count
=
{
"diary"
:{},
"answer"
:{},
"tractate"
:{}
}
for
card_type
in
card_id_dict
:
for
card_type
in
card_id_dict
:
if
card_type
==
"diary"
:
if
card_type
==
"diary"
:
index
=
'gm-dbmw-diary-read'
index
=
'gm-dbmw-diary-read'
...
@@ -464,27 +475,46 @@ def from_id_get_tag(card_id_dict):
...
@@ -464,27 +475,46 @@ def from_id_get_tag(card_id_dict):
projects
=
res
.
get
(
"projects"
)
if
res
.
get
(
"projects"
)
else
[]
projects
=
res
.
get
(
"projects"
)
if
res
.
get
(
"projects"
)
else
[]
word_count_list
=
first_demands
+
second_demands
+
first_solutions
+
second_solutions
+
first_positions
+
second_positions
+
projects
word_count_list
=
first_demands
+
second_demands
+
first_solutions
+
second_solutions
+
first_positions
+
second_positions
+
projects
for
word
in
word_count_list
:
for
word
in
word_count_list
:
if
word
in
query_count
:
if
word
in
query_count
[
doc_type
]
:
query_count
[
word
]
+=
1
query_count
[
doc_type
][
word
]
=
(
doc_type
,
query_count
[
doc_type
][
word
][
1
]
+
1
)
else
:
else
:
query_count
[
word
]
=
0
query_count
[
doc_type
][
word
]
=
(
doc_type
,
1
)
return
query_count
return
query_count
# def save_data_to_csv(all_tags, word_count_exposure):
def
save_data_to_csv
(
all_tags
,
user_portrait_dict
,
word_count_exposure
):
# all_data = []
all_data
=
[]
#
# data = pd.DataFrame(all_dic)
for
tag
in
all_tags
:
# s = datetime.datetime.now()
data_type
=
""
# ss = str(s)[0:19].replace(' ', '-').replace(':', '-')
data_count
=
""
# data.to_csv('%s%sall_s2.csv' % (d, monthly_doc_type_name), encoding='gb18030',
diary_exposure
=
0
# # columns=columns
answer_exposure
=
0
# )
tractate_exposure
=
0
user_portrait
=
user_portrait_dict
.
get
(
tag
)
if
user_portrait
:
data_type
,
data_count
=
user_portrait
if
word_count_exposure
[
"diary"
]
.
get
(
"tag"
):
diary_exposure
=
word_count_exposure
[
"diary"
]
.
get
(
"tag"
)
if
word_count_exposure
[
"answer"
]
.
get
(
"tag"
):
answer_exposure
=
word_count_exposure
[
"answer"
]
.
get
(
"tag"
)
if
word_count_exposure
[
"tractate"
]
.
get
(
"tag"
):
tractate_exposure
=
word_count_exposure
[
"tractate"
]
.
get
(
"tag"
)
all_data
.
append
((
data_type
,
data_count
,
diary_exposure
,
answer_exposure
,
tractate_exposure
))
print
(
all_data
[
-
1
])
# data = pd.DataFrame(all_data)
# s = datetime.datetime.now()
# ss = str(s)[0:19].replace(' ', '-').replace(':', '-')
# data.to_csv('%s%sall_s2.csv' % (d, monthly_doc_type_name), encoding='gb18030',
# # columns=columns
# )
def
parse_data
():
def
parse_data
():
demands_num
=
{}
demands_num
=
{}
# 获取画像数
# 获取画像数
user_portrait_scan_info
()
user_portrait_
dict
=
user_portrait_
scan_info
()
# 获取全部标签
# 获取全部标签
all_tags
=
get_channel_tags_info
()
all_tags
=
get_channel_tags_info
()
print
(
all_tags
)
print
(
all_tags
)
...
@@ -496,7 +526,7 @@ def parse_data():
...
@@ -496,7 +526,7 @@ def parse_data():
# 获取曝光id对应的标签
# 获取曝光id对应的标签
word_count_exposure
=
from_id_get_tag
(
card_id_dict
)
word_count_exposure
=
from_id_get_tag
(
card_id_dict
)
print
(
word_count_exposure
)
print
(
word_count_exposure
)
# save_data_to_csv(all_tags,
word_count_exposure)
save_data_to_csv
(
all_tags
,
user_portrait_dict
,
word_count_exposure
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
parse_data
()
parse_data
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment