Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
0027e2d0
Commit
0027e2d0
authored
Nov 22, 2018
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
增加获取城市列表
parent
064892be
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
90 additions
and
66 deletions
+90
-66
ctr.py
ctr.py
+89
-65
feature_engineering.py
tensnsorflow/feature_engineering.py
+1
-1
No files found.
ctr.py
View file @
0027e2d0
...
...
@@ -3,13 +3,9 @@ import pymysql
from
datetime
import
datetime
from
datetime
import
timedelta
def
get_tail8
():
sql
=
"select distinct device_id from data_feed_click
\
where stat_date='{}'
\
and cid_type='{}'
\
and device_id regexp '8$';"
.
format
(
stat_date
,
cid_type
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
def
get_city
():
sql
=
"select distinct city_id from data_feed_exposure where stat_date >= '2018-10-01' order by city_id desc"
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_prod'
)
cursor
=
db
.
cursor
()
print
(
"开始获取"
)
cursor
.
execute
(
sql
)
...
...
@@ -17,75 +13,103 @@ def get_tail8():
result
=
cursor
.
fetchall
()
db
.
close
()
user
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
user
=
tuple
(
user
)
print
(
"尾号是8的用户个数"
)
print
(
len
(
user
))
return
user
print
(
user
)
def
get_ctr
(
user_tuple
):
sql
=
"select count(device_id) from data_feed_click
\
where stat_date='{}'
\
and cid_type='{}'
\
and device_id in {}"
.
format
(
stat_date
,
cid_type
,
user_tuple
)
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
cursor
=
db
.
cursor
()
print
(
"开始获取"
)
cursor
.
execute
(
sql
)
click
=
cursor
.
fetchall
()[
0
][
0
]
print
(
click
)
sql
=
"select count(device_id) from data_feed_exposure
\
where stat_date='{}'
\
and cid_type='{}'
\
and device_id in {}"
.
format
(
stat_date
,
cid_type
,
user_tuple
)
cursor
=
db
.
cursor
()
print
(
"开始获取"
)
cursor
.
execute
(
sql
)
exp
=
cursor
.
fetchall
()[
0
][
0
]
db
.
close
()
print
(
exp
)
print
(
click
/
exp
)
def
get_tail6
():
df
=
pd
.
read_csv
(
path
+
"{}predictTail6Unique.csv"
.
format
(
stat_date
))
pre_list
=
tuple
(
eval
(
df
.
loc
[
0
,
"list"
]))
print
(
len
(
pre_list
))
print
(
pre_list
[:
2
])
db
=
pymysql
.
connect
(
host
=
'10.66.157.22'
,
port
=
4000
,
user
=
'root'
,
passwd
=
'3SYz54LS9#^9sBvC'
,
db
=
'jerry_test'
)
sql
=
"select distinct device_id from data_feed_click
\
where stat_date='{}'
\
and cid_type='{}'
\
and device_id in {}"
.
format
(
stat_date
,
cid_type
,
pre_list
)
sql
=
"select distinct name from api_tag where tag_type = 4"
db
=
pymysql
.
connect
(
host
=
'rdsfewzdmf0jfjp9un8xj.mysql.rds.aliyuncs.com'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
print
(
"开始获取"
)
cursor
.
execute
(
sql
)
print
(
"成功获取"
)
result
=
cursor
.
fetchall
()
db
.
close
()
print
(
pd
.
DataFrame
(
list
(
result
))
.
empty
)
user
=
pd
.
DataFrame
(
list
(
result
))[
0
]
.
values
.
tolist
()
user
=
tuple
(
user
)
print
(
"用户个数"
)
print
(
len
(
user
))
return
user
print
(
user
)
# def get_tail8():
# sql = "select distinct device_id from data_feed_click \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id regexp '8$';".format(stat_date,cid_type)
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# print("成功获取")
# result = cursor.fetchall()
# db.close()
# user = pd.DataFrame(list(result))[0].values.tolist()
# user = tuple(user)
# print("尾号是8的用户个数")
# print(len(user))
# return user
# def get_ctr(user_tuple):
# sql = "select count(device_id) from data_feed_click \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id in {}".format(stat_date, cid_type, user_tuple)
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# click = cursor.fetchall()[0][0]
# print(click)
#
# sql = "select count(device_id) from data_feed_exposure \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id in {}".format(stat_date, cid_type, user_tuple)
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# exp = cursor.fetchall()[0][0]
# db.close()
# print(exp)
# print(click / exp)
# def get_tail6():
# df = pd.read_csv(path+"{}predictTail6Unique.csv".format(stat_date))
# pre_list = tuple(eval(df.loc[0,"list"]))
# print(len(pre_list))
# print(pre_list[:2])
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# sql = "select distinct device_id from data_feed_click \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id in {}".format(stat_date,cid_type,pre_list)
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# print("成功获取")
# result = cursor.fetchall()
# db.close()
# print(pd.DataFrame(list(result)).empty)
# user = pd.DataFrame(list(result))[0].values.tolist()
# user = tuple(user)
# print("用户个数")
# print(len(user))
# return user
if
__name__
==
"__main__"
:
path
=
"/data/models/"
cid_type
=
"diary"
now
=
datetime
.
now
()
year
=
now
.
year
month
=
now
.
month
day
=
now
.
day
stat_date
=
datetime
(
year
,
month
,
day
)
stat_date
=
(
stat_date
-
timedelta
(
days
=
1
))
.
strftime
(
"
%
Y-
%
m-
%
d"
)
print
(
stat_date
)
tail6
=
get_tail6
()
get_ctr
(
tail6
)
tail8
=
get_tail8
()
get_ctr
(
tail8
)
get_city
()
# path = "/data/models/"
# cid_type = "diary"
# now = datetime.now()
# year = now.year
# month = now.month
# day = now.day
# stat_date = datetime(year, month, day)
# stat_date = (stat_date - timedelta(days=1)).strftime("%Y-%m-%d")
# print(stat_date)
# tail6 = get_tail6()
# get_ctr(tail6)
# tail8 = get_tail8()
# get_ctr(tail8)
...
...
tensnsorflow/feature_engineering.py
View file @
0027e2d0
...
...
@@ -77,7 +77,7 @@ def test():
# click = click.map(lambda x: (x[0], x[1], x[2], 1))
def
hive
():
conf
=
SparkConf
()
.
setAppName
(
"My app"
)
conf
=
SparkConf
()
.
set
Master
(
"spark://10.30.181.88:7077"
)
.
set
AppName
(
"My app"
)
sc
=
SparkContext
(
conf
=
conf
)
sc
.
setLogLevel
(
"WARN"
)
sqlContext
=
HiveContext
(
sc
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment