Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
S
strategy_embedding
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
rank
strategy_embedding
Commits
56c47d31
Commit
56c47d31
authored
4 years ago
by
赵威
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
get tractate data
parent
a1c23e8d
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
7 additions
and
7 deletions
+7
-7
spark.py
utils/spark.py
+1
-0
tests.py
word_vector/tests.py
+0
-7
word_to_vec.py
word_vector/word_to_vec.py
+6
-0
No files found.
utils/spark.py
View file @
56c47d31
...
...
@@ -18,6 +18,7 @@ def get_spark(app_name=""):
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
app_name
)
.
enableHiveSupport
()
.
getOrCreate
())
spark
.
addPyFile
(
"/srv/apps/strategy_embedding/utils/date.py"
)
ti
=
pti
.
TiContext
(
spark
)
ti
.
tidbMapDatabase
(
"jerry_test"
)
return
spark
...
...
This diff is collapsed.
Click to expand it.
word_vector/tests.py
View file @
56c47d31
# from django.test import TestCase
from
utils.date
import
get_ndays_before_no_minus
from
utils.spark
import
get_spark
,
get_tracate_click_data
# create your tests here.
if
__name__
==
"__main__"
:
spark
=
get_spark
(
"test"
)
click_data
=
get_tracate_click_data
(
spark
,
get_ndays_before_no_minus
(
2
),
get_ndays_before_no_minus
(
1
))
click_data
.
show
(
5
,
False
)
# /opt/spark/bin/spark-submit --master yarn --deploy-mode client --queue root.strategy --driver-memory 16g --executor-memory 1g --executor-cores 1 --num-executors 70 --conf spark.default.parallelism=100 --conf spark.storage.memoryFraction=0.5 --conf spark.shuffle.memoryFraction=0.3 --conf spark.locality.wait=0 --jars /srv/apps/tispark-core-2.1-SNAPSHOT-jar-with-dependencies.jar,/srv/apps/spark-connector_2.11-1.9.0-rc2.jar,/srv/apps/mysql-connector-java-5.1.38.jar /srv/apps/strategy_embedding/word_vector/tests.py
This diff is collapsed.
Click to expand it.
word_vector/word_to_vec.py
View file @
56c47d31
...
...
@@ -4,8 +4,10 @@ import time
from
gensim.models
import
Word2Vec
,
word2vec
from
gm_rpcd.all
import
bind
from
utils.date
import
get_ndays_before_no_minus
from
utils.db
import
get_device_click_tractate_ids
from
utils.es
import
es_scan
from
utils.spark
import
get_spark
,
get_tracate_click_data
base_dir
=
os
.
getcwd
()
print
(
"base_dir: "
+
base_dir
)
...
...
@@ -94,4 +96,8 @@ if __name__ == "__main__":
for
i
in
[
"双眼皮"
,
"隆鼻"
]:
print
(
word_similarity
(
i
))
spark
=
get_spark
(
"test"
)
click_data
=
get_tracate_click_data
(
spark
,
get_ndays_before_no_minus
(
2
),
get_ndays_before_no_minus
(
1
))
click_data
.
show
(
5
,
False
)
print
(
"total cost: {:.2f}mins"
.
format
((
time
.
time
()
-
begin_time
)
/
60
))
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment