Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
e916ae5c
Commit
e916ae5c
authored
Jun 03, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' of
http://git.wanmeizhensuo.com/ML/ffm-baseline
parents
9cde0cf4
eecd0fdd
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
11 additions
and
10 deletions
+11
-10
feature_engineering.py
eda/esmm/Model_pipline/feature_engineering.py
+1
-0
submit.sh
eda/esmm/Model_pipline/submit.sh
+5
-2
to_database.py
eda/esmm/Model_pipline/to_database.py
+5
-8
train.py
eda/esmm/Model_pipline/train.py
+0
-0
No files found.
eda/esmm/Model_pipline/feature_engineering.py
View file @
e916ae5c
...
@@ -75,6 +75,7 @@ def con_sql(db,sql):
...
@@ -75,6 +75,7 @@ def con_sql(db,sql):
db
.
close
()
db
.
close
()
return
df
return
df
def
feature_engineer
():
def
feature_engineer
():
apps_number
,
app_list_map
,
level2_number
,
leve2_map
,
level3_number
,
leve3_map
=
get_map
()
apps_number
,
app_list_map
,
level2_number
,
leve2_map
,
level3_number
,
leve3_map
=
get_map
()
unique_values
=
[]
unique_values
=
[]
...
...
eda/esmm/Model_pipline/submit.sh
View file @
e916ae5c
...
@@ -8,7 +8,7 @@ export CLASSPATH="/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api
...
@@ -8,7 +8,7 @@ export CLASSPATH="/opt/hadoop/etc/hadoop:/opt/hadoop/share/hadoop/common/lib/api
echo
$CLASSPATH
echo
$CLASSPATH
export
LD_LIBRARY_PATH
=
"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server:/opt/hadoop/lib/native"
export
LD_LIBRARY_PATH
=
"/usr/lib/jvm/java-8-openjdk-amd64/jre/lib/amd64/server:/opt/hadoop/lib/native"
echo
$LD_LIBRARY_PATH
echo
$LD_LIBRARY_PATH
export
PATH
=
$PATH
:/usr/local/hadoop/bin/
echo
"rm model file"
echo
"rm model file"
rm
-r
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/20
*
rm
-r
${
LOCAL_PATH
}
/model_ckpt/DeepCvrMTL/20
*
...
@@ -29,4 +29,6 @@ echo "sort and 2sql"
...
@@ -29,4 +29,6 @@ echo "sort and 2sql"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
>
"/home/gmuser/esmm/log/insert_
$b
.log"
${
PYTHON_PATH
}
${
MODEL_PATH
}
/to_database.py
>
"/home/gmuser/esmm/log/insert_
$b
.log"
echo
"delete files"
echo
"delete files"
rm
/home/gmuser/esmm/
*
.csv
rm
/home/gmuser/esmm/native/
*
rm
/home/gmuser/esmm/nearby/
*
\ No newline at end of file
eda/esmm/Model_pipline/to_database.py
View file @
e916ae5c
...
@@ -19,12 +19,7 @@ def con_sql(sql):
...
@@ -19,12 +19,7 @@ def con_sql(sql):
return
result
return
result
def
nearby_set_join
(
lst
):
def
set_join
(
lst
):
# return ','.join([str(i) for i in list(lst)])
return
','
.
join
([
str
(
i
)
for
i
in
lst
.
unique
()
.
tolist
()])
def
native_set_join
(
lst
):
l
=
lst
.
unique
()
.
tolist
()
l
=
lst
.
unique
()
.
tolist
()
r
=
[
str
(
i
)
for
i
in
l
]
r
=
[
str
(
i
)
for
i
in
l
]
r
=
r
[:
500
]
r
=
r
[:
500
]
...
@@ -39,7 +34,8 @@ def main():
...
@@ -39,7 +34,8 @@ def main():
df1
=
pd
.
read_csv
(
path
+
"/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df1
=
pd
.
read_csv
(
path
+
"/native/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
native_set_join
})
.
reset_index
(
drop
=
False
)
df3
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
\
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
df3
.
columns
=
[
"device_id"
,
"city_id"
,
"native_queue"
]
print
(
"native_device_count"
,
df3
.
shape
)
print
(
"native_device_count"
,
df3
.
shape
)
...
@@ -50,7 +46,8 @@ def main():
...
@@ -50,7 +46,8 @@ def main():
df1
=
pd
.
read_csv
(
path
+
"/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df1
=
pd
.
read_csv
(
path
+
"/nearby/pred.txt"
,
sep
=
'
\t
'
,
header
=
None
,
names
=
[
"ctr"
,
"cvr"
,
"ctcvr"
])
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df2
[
"ctr"
],
df2
[
"cvr"
],
df2
[
"ctcvr"
]
=
df1
[
"ctr"
],
df1
[
"cvr"
],
df1
[
"ctcvr"
]
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
nearby_set_join
})
.
reset_index
(
drop
=
False
)
df4
=
df2
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
apply
(
lambda
x
:
x
.
sort_values
(
by
=
"ctcvr"
,
ascending
=
False
))
\
.
reset_index
(
drop
=
True
)
.
groupby
(
by
=
[
"uid"
,
"city"
])
.
agg
({
'cid_id'
:
set_join
})
.
reset_index
(
drop
=
False
)
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
df4
.
columns
=
[
"device_id"
,
"city_id"
,
"nearby_queue"
]
print
(
"nearby_device_count"
,
df4
.
shape
)
print
(
"nearby_device_count"
,
df4
.
shape
)
...
...
eda/esmm/Model_pipline/train.py
View file @
e916ae5c
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment