Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
3707dc43
Commit
3707dc43
authored
Jun 12, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
try run
parent
0c03c1a3
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
17 deletions
+18
-17
tag3_update_user_portrait_offline.py
eda/smart_rank/tag3_update_user_portrait_offline.py
+17
-16
tool.py
eda/smart_rank/tool.py
+1
-1
No files found.
eda/smart_rank/tag3_update_user_portrait_offline.py
View file @
3707dc43
...
@@ -170,15 +170,14 @@ def update_tag3_user_portrait(cl_id):
...
@@ -170,15 +170,14 @@ def update_tag3_user_portrait(cl_id):
def
consume_kafka
():
def
consume_kafka
():
# TODO comment
sql
=
"select distinct cl_id from kafka_tag3_log where log_time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
# sql = "select distinct cl_id from kafka_tag3_log where log_time > UNIX_TIMESTAMP(DATE_SUB(NOW(), INTERVAL 30 day))"
db
,
cursor
=
get_jerry_test
()
# db, cursor = get_jerry_test()
cursor
.
execute
(
sql
)
# cursor.execute(sql)
device_ids_lst
=
[
i
[
0
]
for
i
in
cursor
.
fetchall
()]
# device_ids_lst = [i[0] for i in cursor.fetchall()]
db
.
close
()
# db.close()
cursor
.
close
()
# cursor.close()
device_ids_lst
=
[
"androidid_a25a1129c0b38f7b"
]
#
device_ids_lst = ["androidid_a25a1129c0b38f7b"]
sparkConf
=
SparkConf
()
.
set
(
"spark.hive.mapred.supports.subdirectories"
,
"true"
)
\
sparkConf
=
SparkConf
()
.
set
(
"spark.hive.mapred.supports.subdirectories"
,
"true"
)
\
.
set
(
"spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive"
,
"true"
)
\
.
set
(
"spark.hadoop.mapreduce.input.fileinputformat.input.dir.recursive"
,
"true"
)
\
...
@@ -196,9 +195,15 @@ def consume_kafka():
...
@@ -196,9 +195,15 @@ def consume_kafka():
spark
.
sparkContext
.
addPyFile
(
"/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py"
)
spark
.
sparkContext
.
addPyFile
(
"/srv/apps/ffm-baseline_git/eda/smart_rank/tool.py"
)
device_ids_lst_rdd
=
spark
.
sparkContext
.
parallelize
(
device_ids_lst
,
numSlices
=
1000
)
device_ids_lst_rdd
=
spark
.
sparkContext
.
parallelize
(
device_ids_lst
,
numSlices
=
1000
)
result
=
device_ids_lst_rdd
.
repartition
(
100
)
.
map
(
lambda
x
:
update_tag3_user_portrait
(
x
))
# result.foreach(print)
# result = device_ids_lst_rdd.repartition(100).map(lambda x: update_tag3_user_portrait(x))
result
.
collect
()
# # result.foreach(print)
# result.collect()
result2
=
device_ids_lst_rdd
.
repartition
(
100
)
.
map
(
lambda
x
:
update_tag3_user_portrait_by_event
(
x
))
# result2.foreach(print)
result2
.
collect
()
spark
.
stop
()
spark
.
stop
()
except
Exception
as
e
:
except
Exception
as
e
:
send_email
(
"tag3_update_user_portrait_offline"
,
"tag3_update_user_portrait_offline"
,
e
)
send_email
(
"tag3_update_user_portrait_offline"
,
"tag3_update_user_portrait_offline"
,
e
)
...
@@ -206,11 +211,7 @@ def consume_kafka():
...
@@ -206,11 +211,7 @@ def consume_kafka():
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
start
=
datetime
.
datetime
.
now
()
start
=
datetime
.
datetime
.
now
()
# TODO
consume_kafka
()
# consume_kafka()
update_tag3_user_portrait_by_event
(
"androidid_a25a1129c0b38f7b"
)
end
=
datetime
.
datetime
.
now
()
end
=
datetime
.
datetime
.
now
()
print
(
end
-
start
)
print
(
end
-
start
)
print
(
"done"
)
print
(
"done"
)
eda/smart_rank/tool.py
View file @
3707dc43
...
@@ -452,7 +452,7 @@ def write_user_portrait(cl_id, first_solutions, second_solutions, first_demands,
...
@@ -452,7 +452,7 @@ def write_user_portrait(cl_id, first_solutions, second_solutions, first_demands,
# `first_positions` text NOT NULL,
# `first_positions` text NOT NULL,
# `second_positions` text NOT NULL,
# `second_positions` text NOT NULL,
# `projects` text NOT NULL,
# `projects` text NOT NULL,
# `event` text NOT NULL,
# `event
_cn
` text NOT NULL,
# PRIMARY KEY(`id`)
# PRIMARY KEY(`id`)
# )
# )
def
write_user_portrait_by_event
(
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
def
write_user_portrait_by_event
(
cl_id
,
first_solutions
,
second_solutions
,
first_demands
,
second_demands
,
first_positions
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment