Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
宋柯
meta_base_code
Commits
ed0f7ce2
Commit
ed0f7ce2
authored
Nov 25, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
51b7a690
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
15 additions
and
1 deletion
+15
-1
new_user_project_protratit.py
new_user_analysis/new_user_project_protratit.py
+15
-1
No files found.
new_user_analysis/new_user_project_protratit.py
View file @
ed0f7ce2
...
...
@@ -23,7 +23,7 @@ import time
from
pyspark
import
SparkConf
from
pyspark.sql
import
SparkSession
,
DataFrame
from
meta_base_code.utils.func_from_redis_get_portrait
import
*
import
pandas
as
pd
# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti
...
...
@@ -105,10 +105,13 @@ es_query_tractate = {"query": {
}
}
}
tractate_list
=
[]
tractate_res_scan
=
scan
(
client
=
es
,
query
=
json
.
dumps
(
es_query_tractate
),
index
=
"gm-dbmw-tractate-read"
,
doc_type
=
"tractate"
,
scroll
=
'3m'
)
tractate_dict
=
{}
for
tractate_json
in
tractate_res_scan
:
# print(tractate_json)
tractate_list
.
append
(
tractate_json
)
_id
=
tractate_json
[
"_id"
]
content_keyword_list
=
tractate_json
[
"_source"
][
"content_keyword"
]
for
content_keyword
in
content_keyword_list
:
...
...
@@ -128,9 +131,16 @@ es_query_answer = {"query": {
}
}
}
res
=
pd
.
DataFrame
(
tractate_list
)
res
.
to_csv
(
"tractate_list.csv"
,
encoding
=
"gb18030"
)
send_file_email
(
""
,
''
,
sender
=
"litao@igengmei.com"
,
email_group
=
[
"litao@igengmei.com"
],
email_msg_body_str
=
"test"
,
title_str
=
"test"
,
cc_group
=
[
"litao@igengmei.com"
],
file
=
"/srv/apps/meta_base_code/tractate_list.csv"
)
answer_list
=
[]
answer_res_scan
=
scan
(
client
=
es
,
query
=
json
.
dumps
(
es_query_answer
),
index
=
"gm-dbmw-answer-read"
,
doc_type
=
"answer"
,
scroll
=
'3m'
)
answer_dict
=
{}
for
answer_json
in
answer_res_scan
:
answer_list
.
append
(
answer_json
)
_id
=
answer_json
[
"_id"
]
content_keyword_list
=
answer_json
[
"_source"
][
"content_keyword"
]
for
content_keyword
in
content_keyword_list
:
...
...
@@ -138,6 +148,10 @@ for answer_json in answer_res_scan:
answer_dict
[
content_keyword
]
+=
1
else
:
answer_dict
[
content_keyword
]
=
1
res
=
pd
.
DataFrame
(
answer_list
)
res
.
to_csv
(
"answer_list.csv"
,
encoding
=
"gb18030"
)
send_file_email
(
""
,
''
,
sender
=
"litao@igengmei.com"
,
email_group
=
[
"litao@igengmei.com"
],
email_msg_body_str
=
"test"
,
title_str
=
"test"
,
cc_group
=
[
"litao@igengmei.com"
],
file
=
"/srv/apps/meta_base_code/answer_list.csv"
)
for
t
in
range
(
2
,
task_days
):
day_num
=
0
-
t
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment