Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
黎涛
meta_base_code
Commits
70e16469
Commit
70e16469
authored
Nov 24, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
a08b8172
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
33 additions
and
32 deletions
+33
-32
new_user_project_protratit.py
new_user_analysis/new_user_project_protratit.py
+33
-32
No files found.
new_user_analysis/new_user_project_protratit.py
View file @
70e16469
...
...
@@ -14,10 +14,10 @@ import redis
# from pyhive import hive
from
maintenance.func_send_email_with_file
import
send_file_email
from
typing
import
Dict
,
List
#
from elasticsearch_7 import Elasticsearch
#
from elasticsearch_7.helpers import scan
from
elasticsearch
import
Elasticsearch
from
elasticsearch.helpers
import
scan
from
elasticsearch_7
import
Elasticsearch
from
elasticsearch_7.helpers
import
scan
#
from elasticsearch import Elasticsearch
#
from elasticsearch.helpers import scan
import
sys
import
time
from
pyspark
import
SparkConf
...
...
@@ -92,22 +92,23 @@ task_list = []
task_days
=
3
es_query_tractate
=
{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"term"
:
{
"is_online"
:
True
}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
}}},
{
"terms"
:
{
"operators_add_tags"
:
[
3315
]}}
"bool"
:
{
"must"
:
[
{
"term"
:
{
"is_online"
:
True
}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
}}},
{
"terms"
:
{
"operators_add_tags"
:
[
3315
]}}
],
"must_not"
:
[{
"term"
:
{
"status"
:
4
}},
{
"term"
:
{
"show_by_index"
:
2
}}
]
}
}
}
tractate_res_scan
=
scan
(
client
=
es
,
query
=
es_query_tractate
,
index
=
"gm-dbmw-tractate-read"
,
doc_type
=
"tractate"
)
],
"must_not"
:
[{
"term"
:
{
"status"
:
4
}},
{
"term"
:
{
"show_by_index"
:
2
}}
]
}
}
}
tractate_res_scan
=
scan
(
client
=
es
,
query
=
es_query_tractate
,
index
=
"gm-dbmw-tractate-read"
,
doc_type
=
"tractate"
)
tractate_dict
=
{}
for
tractate_json
in
tractate_res_scan
:
print
(
tractate_json
)
_id
=
tractate_json
[
"_id"
]
content_keyword_list
=
tractate_json
[
"_source"
][
"content_keyword"
]
for
content_keyword
in
content_keyword_list
:
...
...
@@ -117,17 +118,17 @@ for tractate_json in tractate_res_scan:
tractate_dict
[
content_keyword
]
=
1
es_query_answer
=
{
"query"
:
{
"bool"
:
{
"must"
:
[
{
"term"
:
{
"is_online"
:
True
}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
}}},
{
"terms"
:
{
"operators_add_tags"
:
[
3315
]}},
{
"range"
:
{
"content_length"
:
{
"gte"
:
30
}}}
]
}
}
}
answer_res_scan
=
scan
(
client
=
es
,
query
=
es_query_tractate
,
index
=
"gm-dbmw-answer-read"
,
doc_type
=
"answer"
)
"bool"
:
{
"must"
:
[
{
"term"
:
{
"is_online"
:
True
}},
{
"range"
:
{
"content_level"
:
{
"gte"
:
3
}}},
{
"terms"
:
{
"operators_add_tags"
:
[
3315
]}},
{
"range"
:
{
"content_length"
:
{
"gte"
:
30
}}}
]
}
}
}
answer_res_scan
=
scan
(
client
=
es
,
query
=
es_query_tractate
,
index
=
"gm-dbmw-answer-read"
,
doc_type
=
"answer"
)
answer_dict
=
{}
for
answer_json
in
answer_res_scan
:
_id
=
answer_json
[
"_id"
]
...
...
@@ -138,7 +139,6 @@ for answer_json in answer_res_scan:
else
:
answer_dict
[
content_keyword
]
=
1
for
t
in
range
(
2
,
task_days
):
day_num
=
0
-
t
now
=
(
datetime
.
datetime
.
now
()
+
datetime
.
timedelta
(
days
=
day_num
))
...
...
@@ -283,14 +283,15 @@ WHERE spam_pv.device_id IS NULL
partition_date
=
today_str
pid
=
hashlib
.
md5
((
partition_date
+
protratit_type
)
.
encode
(
"utf8"
))
.
hexdigest
()
action_count
=
portrait_dict
[
"projects"
][
protratit_type
]
answer_count
=
answer_dict
.
get
(
protratit_type
,
0
)
tractate_count
=
tractate_dict
.
get
(
protratit_type
,
0
)
answer_count
=
answer_dict
.
get
(
protratit_type
,
0
)
tractate_count
=
tractate_dict
.
get
(
protratit_type
,
0
)
total_count
=
answer_count
+
tractate_count
instert_sql
=
"""replace into new_user_project_count(
partition_day,pid,protratit_count,protratit_type,answer_count,tractate_count,total_count) VALUES('{partition_day}','{pid}',{protratit_count},'{protratit_type}',{answer_count},{tractate_count},{total_count});"""
.
format
(
partition_day
=
today_str
,
pid
=
pid
,
protratit_count
=
action_count
,
protratit_type
=
protratit_type
,
answer_count
=
answer_count
,
tractate_count
=
tractate_count
,
total_count
=
total_count
,
protratit_type
=
protratit_type
,
answer_count
=
answer_count
,
tractate_count
=
tractate_count
,
total_count
=
total_count
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment