Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
0aae1e2c
Commit
0aae1e2c
authored
6 years ago
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
change test file
parent
22de0c8b
master
gyz
mr/beta/bug22
offic
rtt
updatedb
zhao
zhao22
No related merge requests found
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
22 additions
and
10 deletions
+22
-10
multi.py
tensnsorflow/multi.py
+22
-10
No files found.
tensnsorflow/multi.py
View file @
0aae1e2c
...
...
@@ -6,6 +6,7 @@ import pytispark.pytispark as pti
from
pyspark.sql
import
SparkSession
import
datetime
import
pandas
as
pd
import
subprocess
def
app_list_func
(
x
,
l
):
...
...
@@ -126,6 +127,19 @@ def con_sql(db,sql):
db
.
close
()
return
df
def
get_filename
(
dir_in
):
pre_path
=
"hdfs://172.16.32.4:8020"
args
=
"hdfs dfs -ls "
+
dir_in
+
" | awk '{print $8}'"
proc
=
subprocess
.
Popen
(
args
,
stdout
=
subprocess
.
PIPE
,
stderr
=
subprocess
.
PIPE
,
shell
=
True
)
s_output
,
s_err
=
proc
.
communicate
()
all_dart_dirs
=
s_output
.
split
()
a
=
[]
for
i
in
all_dart_dirs
:
b
=
str
(
i
)
.
split
(
"/"
)[
4
]
if
b
[:
4
]
==
"part"
:
tmp
=
pre_path
+
str
(
i
)[
2
:
-
1
]
a
.
append
(
tmp
)
return
a
if
__name__
==
'__main__'
:
# sparkConf = SparkConf().set("spark.hive.mapred.supports.subdirectories", "true") \
...
...
@@ -148,7 +162,6 @@ if __name__ == '__main__':
# get_predict(validate_date, value_map, app_list_map)
# [path + "tr/part-r-00000"]
import
subprocess
# spark = SparkSession.builder.getOrCreate()
# b = [("a", 1), ("a", 1), ("b", 3), ("a", 2)]
...
...
@@ -157,15 +170,14 @@ if __name__ == '__main__':
# df.show()
# df.createOrReplaceTempView("df")
# t = spark.sql("select id from df").map()
# print(t)
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"select device_id from esmm_train_data limit 10"
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
print
(
result
)
a
=
list
(
set
([
i
[
0
]
for
i
in
result
]))
print
(
a
)
tr_files
=
get_filename
(
"/strategy/esmm/tr"
)
print
(
tr_files
)
va_files
=
get_filename
(
"/strategy/esmm/va"
)
print
(
"test"
)
print
(
va_files
)
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment