Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
41328a9b
Commit
41328a9b
authored
Oct 10, 2018
by
王志伟
Browse files
Options
Browse Files
Download
Plain Diff
fix conflick
parents
3b477572
fcf2dcac
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
45 additions
and
36 deletions
+45
-36
ffm_get_data.py
eda/gray_stat/ffm_get_data.py
+26
-11
yesterday.py
eda/gray_stat/yesterday.py
+7
-12
Main.scala
eda/node2vec/src/main/scala/com/gmei/Main.scala
+6
-10
sql_change.py
local/sql_change.py
+6
-3
No files found.
eda/gray_stat/ffm_get_data.py
View file @
41328a9b
...
@@ -12,28 +12,43 @@ def get_yesterday_date():
...
@@ -12,28 +12,43 @@ def get_yesterday_date():
"""
"""
today
=
datetime
.
date
.
today
()
today
=
datetime
.
date
.
today
()
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
yesterday
=
yesterday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
print
(
yesterday
)
return
yesterday
return
yesterday
#today = datetime.date.today().strftime("%Y%m%d")
#today = datetime.date.today().strftime("%Y%m%d")
#return today
#return today
def
get_data
():
def
get_data
():
conn2db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
conn2db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
cursor
=
conn2db
.
cursor
()
cursor
=
conn2db
.
cursor
()
sql
=
"select device_id
,city_id
from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
sql
=
"select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
result
=
cursor
.
fetchall
()
device_id
=
tuple
(
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
())
cursor
.
close
()
cursor
.
close
()
return
result
return
device_id
def
result2file
(
data
):
def
ctr
(
date
):
output
=
DIRECTORY_PATH
+
"ffm_get_data_"
+
get_yesterday_date
()
+
".csv"
device_id
=
get_data
()
with
open
(
output
,
"w"
)
as
f
:
sql_click
=
"select count(cid) from data_feed_click "
\
for
i
in
data
:
"where cid_type = 'diary' "
\
line
=
str
(
i
[
0
])
+
","
+
str
(
i
[
1
])
+
"
\n
"
"and stat_date = '{}' and device_id in {};"
.
format
(
date
,
device_id
)
f
.
write
(
line
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql_click
)
click
=
cursor
.
fetchone
()[
0
]
print
(
"点击数:"
+
str
(
click
))
sql_exp
=
"select count(cid) from data_feed_exposure "
\
"where cid_type = 'diary' and stat_date = '{}' and "
\
"device_id in {}"
.
format
(
date
,
device_id
)
cursor
.
execute
(
sql_exp
)
exp
=
cursor
.
fetchone
()[
0
]
print
(
"曝光数:"
+
str
(
exp
))
if
exp
!=
0
:
print
(
"点击率:"
+
str
(
click
/
exp
))
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
result
=
get_data
()
date
=
get_yesterday_date
()
result2file
(
result
)
ctr
(
date
)
ffm_get_data
.py
→
eda/gray_stat/yesterday
.py
View file @
41328a9b
# -*- coding: UTF-8 -*-
import
pymysql
import
pymysql
import
datetime
import
datetime
import
pandas
as
pd
DIRECTORY_PATH
=
"/data2/ffm/"
DIRECTORY_PATH
=
"/data2/ffm/"
...
@@ -10,29 +12,23 @@ def get_yesterday_date():
...
@@ -10,29 +12,23 @@ def get_yesterday_date():
"""
"""
today
=
datetime
.
date
.
today
()
today
=
datetime
.
date
.
today
()
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
today
-
datetime
.
timedelta
(
days
=
1
)
yesterday
=
yesterday
.
strftime
(
"
%
Y
%
m
%
d"
)
yesterday
=
yesterday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
print
(
yesterday
)
return
yesterday
return
yesterday
#today = datetime.date.today().strftime("%Y%m%d")
#today = datetime.date.today().strftime("%Y%m%d")
#return today
#return today
def
get_data
():
def
get_data
():
conn2db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
conn2db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
cursor
=
conn2db
.
cursor
()
cursor
=
conn2db
.
cursor
()
sql
=
"select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
sql
=
"select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
result
=
cursor
.
fetchall
()
pd
.
DataFrame
(
list
(
result
))
.
to_csv
(
"/data2/ffm/yesterday.csv"
)
cursor
.
close
()
cursor
.
close
()
return
result
def
result2file
(
data
):
output
=
DIRECTORY_PATH
+
"ffm_get_data_"
+
get_yesterday_date
+
".csv"
with
open
(
output
,
"w"
)
as
f
:
for
i
in
data
:
line
=
str
(
i
[
0
])
+
","
+
str
(
i
[
1
])
+
"
\n
"
f
.
write
(
line
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
result
=
get_data
()
get_data
()
result2file
(
result
)
\ No newline at end of file
eda/node2vec/src/main/scala/com/gmei/Main.scala
View file @
41328a9b
...
@@ -96,12 +96,13 @@ object Main {
...
@@ -96,12 +96,13 @@ object Main {
ti
.
tidbMapTable
(
dbName
=
GmeiConfig
.
config
.
getString
(
"tidb.database"
),
tableName
=
"data_feed_click"
)
ti
.
tidbMapTable
(
dbName
=
GmeiConfig
.
config
.
getString
(
"tidb.database"
),
tableName
=
"data_feed_click"
)
// val date8 = GmeiConfig.getMinusNDate(8
)
val
date8
=
GmeiConfig
.
getMinusNDate
(
30
)
val
tidb_inupt
=
sc
.
sql
(
val
tidb_inupt
=
sc
.
sql
(
s
"""
s
"""
|SELECT
|SELECT
| service_id,cid
| service_id,cid
|FROM nd_data_meigou_cid
|FROM nd_data_meigou_cid
|where stat_date > '${date8}'
"""
.
stripMargin
"""
.
stripMargin
)
)
...
@@ -184,29 +185,27 @@ object Main {
...
@@ -184,29 +185,27 @@ object Main {
GmeiConfig
.
writeToJDBCTable
(
similar_result
,
table
=
"nd_cid_similarity_matrix"
,
SaveMode
.
Overwrite
)
GmeiConfig
.
writeToJDBCTable
(
similar_result
,
table
=
"nd_cid_similarity_matrix"
,
SaveMode
.
Overwrite
)
//3. cids queue map to device_id
//3. cids queue map to device_id
ti
.
tidbMapTable
(
dbName
=
GmeiConfig
.
config
.
getString
(
"tidb.database"
),
tableName
=
"nd_cid_similarity_matrix"
)
ti
.
tidbMapTable
(
dbName
=
GmeiConfig
.
config
.
getString
(
"tidb.database"
),
tableName
=
"nd_cid_similarity_matrix"
)
val
device_id
=
sc
.
sql
(
val
device_id
=
sc
.
sql
(
s
"""
s
"""
|select a.device_id device_id,a.city_id city_id ,b.similarity_cid similarity_cid from
|select a.device_id device_id,a.city_id city_id ,b.similarity_cid similarity_cid from
|(select device_id,city_id,first(cid) as cid from data_feed_click
|(select device_id,
first(city_id) as
city_id,first(cid) as cid from data_feed_click
|where cid in (select cid from nd_cid_similarity_matrix)
|where cid in (select cid from nd_cid_similarity_matrix)
|group by device_id
order by time
) a left join
|group by device_id) a left join
|nd_cid_similarity_matrix b
|nd_cid_similarity_matrix b
|on a.cid = b.cid
|on a.cid = b.cid
|where b.similarity_cid is not null
|where b.similarity_cid is not null
"""
.
stripMargin
"""
.
stripMargin
)
).
na
.
fill
(
Map
(
"city_id"
->
"beijing"
))
device_id
.
na
.
fill
(
Map
(
"city_id"
->
"beijing"
))
device_id
.
show
()
device_id
.
show
()
val
device_queue
=
device_id
.
rdd
.
map
{
item
=>
val
device_queue
=
device_id
.
rdd
.
map
{
item
=>
val
parts
=
(
item
.
getAs
[
String
](
fieldName
=
"device_id"
),
item
.
getAs
[
String
](
fieldName
=
"city_id"
),
item
.
getAs
[
String
](
fieldName
=
"similarity_cid"
))
val
parts
=
(
item
.
getAs
[
String
](
fieldName
=
"device_id"
),
item
.
getAs
[
String
](
fieldName
=
"city_id"
),
item
.
getAs
[
String
](
fieldName
=
"similarity_cid"
))
Try
{
Try
{
(
parts
.
_1
,
Try
(
parts
.
_2
.
toString
.
replace
(
"worldwide"
,
"beijing"
)),
Try
(
parts
.
_3
.
toString
.
replace
(
"diary|"
,
""
)).
getOrElse
(
null
))
(
parts
.
_1
,
Try
(
parts
.
_2
.
toString
.
replace
(
"worldwide"
,
"beijing"
))
.
getOrElse
(
null
)
,
Try
(
parts
.
_3
.
toString
.
replace
(
"diary|"
,
""
)).
getOrElse
(
null
))
}.
getOrElse
(
null
)
}.
getOrElse
(
null
)
}.
filter
(
_
!=
null
).
toDF
(
"device_id"
,
"city_id"
,
"similarity_cid"
)
}.
filter
(
_
!=
null
).
toDF
(
"device_id"
,
"city_id"
,
"similarity_cid"
)
...
@@ -221,6 +220,3 @@ object Main {
...
@@ -221,6 +220,3 @@ object Main {
sys
.
exit
(
1
)
sys
.
exit
(
1
)
}
}
}
}
local/sql_change.py
View file @
41328a9b
import
pymysql
import
pymysql
import
pandas
as
pd
import
pandas
as
pd
# 从一个数据库读数据,把读到的数据写到另外一个数据库
def
get_data
():
def
get_data
():
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'eagle'
)
cursor
=
db
.
cursor
()
cursor
=
db
.
cursor
()
sql
=
"select native_queue,nearby_queue,nation_queue,megacity_queue,device_id,city_id from ffm_diary_queue;"
sql
=
"select native_queue,nearby_queue,nation_queue,megacity_queue,device_id,city_id from ffm_diary_queue;"
cursor
.
execute
(
sql
)
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
result
=
cursor
.
fetchall
()
pd
.
DataFrame
(
list
(
result
))
.
to_csv
(
"/home/gmuser/ffm.csv"
,
index
=
None
)
df
=
pd
.
DataFrame
(
list
(
result
))
df
=
pd
.
read_csv
(
"/home/gmuser/ffm.csv"
)
for
i
in
range
(
df
.
shape
[
0
]):
for
i
in
range
(
df
.
shape
[
0
]):
a
=
df
.
loc
[
i
,
:]
.
values
a
=
df
.
loc
[
i
,
:]
.
values
insert
(
a
)
insert
(
a
)
...
@@ -19,6 +18,10 @@ def insert(a):
...
@@ -19,6 +18,10 @@ def insert(a):
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
db
=
pymysql
.
connect
(
host
=
'rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_prod'
)
db
=
'doris_prod'
)
# List("AB20292B-5D15-4C44-9429-1C2FF5ED26F6", "802C5FDC-5DC6-42D0-8F6F-2DBE200BB21B",
# "358035085192742", "B2F0665E-4375-4169-8FE3-8A26A1CFE248", "863455037703008",
# "65EC6C14-1AD6-44C2-AED2-C41452284E91", "29548727-8242-4D58-8151-F603F975BB98")
# sql_delete = ""
sql
=
"INSERT INTO device_diary_queue (native_queue, nearby_queue, nation_queue, "
\
sql
=
"INSERT INTO device_diary_queue (native_queue, nearby_queue, nation_queue, "
\
"megacity_queue,device_id,city_id) VALUES ('{}','{}','{}','{}','{}','{}');"
.
format
\
"megacity_queue,device_id,city_id) VALUES ('{}','{}','{}','{}','{}','{}');"
.
format
\
(
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
])
(
a
[
0
],
a
[
1
],
a
[
2
],
a
[
3
],
a
[
4
],
a
[
5
])
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment