Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
38d63dc0
Commit
38d63dc0
authored
May 05, 2019
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
修改测试文件
parent
8662e492
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
9 additions
and
4 deletions
+9
-4
multi.py
tensnsorflow/multi.py
+6
-4
record.py
tensnsorflow/record.py
+3
-0
No files found.
tensnsorflow/multi.py
View file @
38d63dc0
...
@@ -197,14 +197,16 @@ def con_sql(db,sql):
...
@@ -197,14 +197,16 @@ def con_sql(db,sql):
def
test
():
def
test
():
sql
=
"select stat_date,cid_id,y,ccity_name from esmm_train_data limit 60"
rdd
=
spark
.
sql
(
sql
)
.
select
(
"stat_date"
,
"cid_id"
,
"y"
,
"ccity_name"
)
.
rdd
spark
.
createDataFrame
(
rdd
)
.
show
(
6
)
from
hdfs
import
InsecureClient
from
hdfs
import
InsecureClient
from
hdfs.ext.dataframe
import
read_dataframe
from
hdfs.ext.dataframe
import
read_dataframe
client
=
InsecureClient
(
'http://nvwa01:50070'
)
client
=
InsecureClient
(
'http://nvwa01:50070'
)
df
=
read_dataframe
(
client
,
"/recommend/native/part-00199-f83757ab-9f64-4a2c-9f27-0b76df51c1c4-c000.avro"
)
df
=
read_dataframe
(
client
,
"/recommend/va/*.avro"
)
print
(
df
.
head
())
print
(
df
.
head
())
print
(
df
.
count
())
#
print(df.count())
# spark.sql("use online")
# spark.sql("use online")
# spark.sql("ADD JAR /srv/apps/brickhouse-0.7.1-SNAPSHOT.jar")
# spark.sql("ADD JAR /srv/apps/brickhouse-0.7.1-SNAPSHOT.jar")
...
...
tensnsorflow/record.py
View file @
38d63dc0
...
@@ -28,6 +28,9 @@ def gen_tfrecords(in_file):
...
@@ -28,6 +28,9 @@ def gen_tfrecords(in_file):
from
hdfs.ext.dataframe
import
read_dataframe
from
hdfs.ext.dataframe
import
read_dataframe
client
=
InsecureClient
(
'http://nvwa01:50070'
)
client
=
InsecureClient
(
'http://nvwa01:50070'
)
df
=
read_dataframe
(
client
,
"/recommend/tr/part-00000-2f0d632b-0c61-4a0b-97d4-54bd5e579c5e-c000.avro"
)
df
=
read_dataframe
(
client
,
"/recommend/tr/part-00000-2f0d632b-0c61-4a0b-97d4-54bd5e579c5e-c000.avro"
)
df
=
df
.
rename
({
"app_list"
,
"level2_ids"
,
"level3_ids"
,
"stat_date"
,
"ucity_id"
,
"ccity_name"
,
"device_type"
,
"manufacturer"
,
"channel"
,
"top"
,
"time"
,
"hospital_id"
,
"treatment_method"
,
"price_min"
,
"price_max"
,
"treatment_time"
,
"maintain_time"
,
"recover_time"
,
"y"
,
"z"
})
for
i
in
range
(
df
.
shape
[
0
]):
for
i
in
range
(
df
.
shape
[
0
]):
feats
=
[
"cid_id"
]
feats
=
[
"cid_id"
]
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment