Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
3b6fa0b2
Commit
3b6fa0b2
authored
Nov 05, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use pandas
parent
7ff8f681
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
10 additions
and
9 deletions
+10
-9
get_tractate_data.py
dssm/get_tractate_data.py
+10
-9
No files found.
dssm/get_tractate_data.py
View file @
3b6fa0b2
...
...
@@ -166,12 +166,12 @@ def get_click_data(spark, card_type, start, end):
WHERE (spam_pv.device_id IS NULL or spam_pv.device_id ='')
and (dev.device_id is null or dev.device_id ='')
"""
.
format
(
start
,
end
,
card_type
,
reg
,
start
,
end
,
end
,
start
,
end
,
end
,
end
,
end
,
end
)
return
spark
.
sql
(
sql
)
return
spark
.
sql
(
sql
)
.
toPandas
()
def
get_exposure_data
(
spark
,
card_type
,
start
,
end
):
reg
=
r"""^\\d+$"""
return
spark
.
sql
(
"""
sql
=
"""
SELECT DISTINCT
t1.cl_id device_id,
cast(card_id AS int) card_id,
...
...
@@ -260,7 +260,8 @@ def get_exposure_data(spark, card_type, start, end):
,'promotion_qianka','promotion_xiaoyu','promotion_dianru','promotion_malioaso','promotion_malioaso-shequ'
,'promotion_shike','promotion_julang_jl03','promotion_zuimei')
AND t2.first_channel_source_type not LIKE 'promotion
\\
_jf
\\
_
%
'
"""
.
format
(
reg
,
card_type
,
end
,
start
,
end
,
end
,
end
,
end
,
end
,
start
,
end
))
"""
.
format
(
reg
,
card_type
,
end
,
start
,
end
,
end
,
end
,
end
,
end
,
start
,
end
)
return
spark
.
sql
(
sql
)
.
toPandas
()
def
get_card_feature_df
(
spark
,
card_type
,
yesterday
):
...
...
@@ -373,7 +374,7 @@ def get_card_feature_df(spark, card_type, yesterday):
and card_content_type = '{}'
and card_id rlike '{}'
"""
.
format
(
yesterday
,
card_type
,
reg
)
return
spark
.
sql
(
sql
)
return
spark
.
sql
(
sql
)
.
toPandas
()
def
get_device_tags
(
spark
):
...
...
@@ -382,7 +383,7 @@ def get_device_tags(spark):
FROM user_tag3_portrait
WHERE date = '{}'
"""
.
format
(
get_ndays_before
(
1
))
return
spark
.
sql
(
sql
)
return
spark
.
sql
(
sql
)
.
toPandas
()
if
__name__
==
"__main__"
:
...
...
@@ -391,15 +392,15 @@ if __name__ == "__main__":
# TODO days 30
start
,
end
=
get_ndays_before_no_minus
(
5
),
get_ndays_before_no_minus
(
1
)
click_df
=
get_click_data
(
spark
,
card_type
,
start
,
end
)
click_df
.
show
(
5
,
False
)
print
(
click_df
.
head
(
3
)
)
exposure_df
=
get_exposure_data
(
spark
,
card_type
,
start
,
end
)
exposure_df
.
show
(
5
,
False
)
print
(
exposure_df
.
head
(
3
)
)
tractate_feature_df
=
get_card_feature_df
(
spark
,
card_type
,
end
)
tractate_feature_df
.
show
(
5
,
False
)
print
(
tractate_feature_df
.
head
(
3
)
)
device_feature_df
=
get_device_tags
(
spark
)
device_feature_df
.
show
(
5
,
False
)
print
(
device_feature_df
.
head
(
3
)
)
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment