Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
223dcbc2
Commit
223dcbc2
authored
Nov 05, 2020
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
time logger
parent
0f9626e6
Show whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
7 additions
and
5 deletions
+7
-5
get_tractate_data.py
dssm/get_tractate_data.py
+7
-5
No files found.
dssm/get_tractate_data.py
View file @
223dcbc2
...
...
@@ -34,7 +34,8 @@ def get_spark(app_name=""):
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
app_name
)
.
enableHiveSupport
()
.
getOrCreate
()
# sc = spark.sparkContext
sc
=
spark
.
sparkContext
sc
.
setLogLevel
(
"WARN"
)
# sc.addPyFile("/srv/apps/strategy_embedding/utils/date.py")
ti
=
pti
.
TiContext
(
spark
)
ti
.
tidbMapDatabase
(
"jerry_test"
)
...
...
@@ -237,12 +238,13 @@ def get_exposure_data(spark, card_type, start, end):
if
__name__
==
"__main__"
:
spark
=
get_spark
(
"dssm_tractate_data"
)
card_tye
=
"user_post"
start
,
end
=
get_ndays_before_no_minus
(
180
),
get_ndays_before_no_minus
(
1
)
click_df
=
get_click_data
(
spark
,
card_tye
,
start
,
end
)
card_type
=
"user_post"
# TODO days 30
start
,
end
=
get_ndays_before_no_minus
(
5
),
get_ndays_before_no_minus
(
1
)
click_df
=
get_click_data
(
spark
,
card_type
,
start
,
end
)
click_df
.
show
(
5
,
False
)
exposure_df
=
get_exposure_data
(
spark
,
card_tye
,
start
,
end
)
exposure_df
=
get_exposure_data
(
spark
,
card_ty
p
e
,
start
,
end
)
exposure_df
.
show
(
5
,
False
)
# spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/dssm/get_tractate_data.py
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment