Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
黎涛
meta_base_code
Commits
60424f4a
Commit
60424f4a
authored
Nov 26, 2020
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
b0fa04ad
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
30 additions
and
19 deletions
+30
-19
new_user_project_analysis.py
new_user_analysis/new_user_project_analysis.py
+13
-12
func_get_uesr_event.py
utils/func_get_uesr_event.py
+17
-7
No files found.
new_user_analysis/new_user_project_analysis.py
View file @
60424f4a
...
...
@@ -88,25 +88,19 @@ spark.sql("CREATE TEMPORARY FUNCTION is_json AS 'com.gmei.hive.common.udf.UDFJso
spark
.
sql
(
"CREATE TEMPORARY FUNCTION arrayMerge AS 'com.gmei.hive.common.udf.UDFArryMerge'"
)
task_list
=
[]
task_days
=
2
tractate_list
=
[]
# res = pd.DataFrame(answer_list)
# res.to_csv("answer_list.csv",encoding="gb18030")
# send_file_email("", '', sender="litao@igengmei.com", email_group=["litao@igengmei.com"], email_msg_body_str="test",
# title_str="test", cc_group=["litao@igengmei.com"], file="/srv/apps/meta_base_code/answer_list.csv")
for
t
in
range
(
1
,
task_days
):
task_days
=
3
for
t
in
range
(
2
,
task_days
):
day_num
=
0
-
t
now
=
(
datetime
.
datetime
.
now
()
+
datetime
.
timedelta
(
days
=
day_num
))
last_30_day_str
=
(
now
+
datetime
.
timedelta
(
days
=-
30
))
.
strftime
(
"
%
Y
%
m
%
d"
)
tomorrow_str
=
(
datetime
.
datetime
.
now
()
+
datetime
.
timedelta
(
days
=
day_num
+
1
))
.
strftime
(
"
%
Y
%
m
%
d"
)
today_timestamp
=
int
(
now
.
timestamp
())
today_str
=
now
.
strftime
(
"
%
Y
%
m
%
d"
)
today_str_format
=
now
.
strftime
(
"
%
Y-
%
m-
%
d"
)
yesterday_str
=
(
now
+
datetime
.
timedelta
(
days
=-
1
))
.
strftime
(
"
%
Y
%
m
%
d"
)
...
...
@@ -149,12 +143,12 @@ WHERE dev.device_id is null and t2.device_id is not null
count_not_has_portratit
=
0
event_dict
=
{}
event_dict_reverse
=
{}
for
count_user_count
,
res
in
enumerate
(
sql_res
):
# print(count, res)
temp_count
=
0
try
:
for
event_cn
,
projects
in
get_user_event_from_mysql
(
res
.
device_id
):
for
event_cn
,
projects
in
get_user_event_from_mysql
(
res
.
device_id
,
today_timestamp
):
project_list
=
projects
.
split
(
","
)
for
project
in
project_list
:
if
project
not
in
event_dict
:
...
...
@@ -164,6 +158,13 @@ WHERE dev.device_id is null and t2.device_id is not null
else
:
event_dict
[
project
][
event_cn
]
=
1
if
project
not
in
event_dict_reverse
:
event_dict_reverse
[
event_cn
]
=
{}
if
event_dict_reverse
[
event_cn
]
.
get
(
project
):
event_dict_reverse
[
event_cn
][
project
]
+=
1
else
:
event_dict_reverse
[
event_cn
][
project
]
=
1
except
Exception
as
e
:
print
(
"error "
,
e
)
...
...
@@ -175,7 +176,7 @@ WHERE dev.device_id is null and t2.device_id is not null
# print(portrait_dict)
# print(count_user_count + 1, count_not_has_portratit)
# print("-------------------------------")
print
(
"event_dict"
,
event_dict
)
print
(
"event_dict"
,
today_str
,
event_dict_reverse
)
# for protratit_type in portrait_dict["projects"]:
# partition_date = today_str
# pid = hashlib.md5((partition_date + protratit_type).encode("utf8")).hexdigest()
...
...
utils/func_get_uesr_event.py
View file @
60424f4a
...
...
@@ -20,10 +20,19 @@ def con_sql(sql):
return
result
def
get_user_event_from_mysql
(
keyword
):
sql_str
=
"""select cl_id, from_unixtime(log_time) as date, event_cn, projects from kafka_tag3_log
where cl_id = '
%
s' order by date desc"""
%
keyword
sql_result
=
con_sql
(
sql_str
)
for
data
in
sql_result
:
clid
,
date
,
event_cn
,
projects
=
data
yield
event_cn
,
projects
def
get_user_event_from_mysql
(
keyword
,
timestamp
=
None
):
if
not
timestamp
:
sql_str
=
"""select cl_id, from_unixtime(log_time) as date, event_cn, projects from kafka_tag3_log
where cl_id = '
%
s'"""
%
keyword
sql_result
=
con_sql
(
sql_str
)
for
data
in
sql_result
:
clid
,
date
,
event_cn
,
projects
=
data
yield
event_cn
,
projects
else
:
tomorrow_timestamp
=
timestamp
+
86400
sql_str
=
"""select cl_id, from_unixtime(log_time) as date, event_cn, projects from kafka_tag3_log
where cl_id = '
%
s' and log_time <
%
s and long_time >
%
s"""
%
(
keyword
,
str
(
tomorrow_timestamp
),
str
(
timestamp
))
sql_result
=
con_sql
(
sql_str
)
for
data
in
sql_result
:
clid
,
date
,
event_cn
,
projects
=
data
yield
event_cn
,
projects
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment