Commit 0027e2d0 authored by 张彦钊's avatar 张彦钊

增加获取城市列表

parent 064892be
......@@ -3,13 +3,9 @@ import pymysql
from datetime import datetime
from datetime import timedelta
def get_tail8():
sql = "select distinct device_id from data_feed_click \
where stat_date='{}' \
and cid_type='{}' \
and device_id regexp '8$';".format(stat_date,cid_type)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
def get_city():
sql = "select distinct city_id from data_feed_exposure where stat_date >= '2018-10-01' order by city_id desc"
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
cursor = db.cursor()
print("开始获取")
cursor.execute(sql)
......@@ -17,75 +13,103 @@ def get_tail8():
result = cursor.fetchall()
db.close()
user = pd.DataFrame(list(result))[0].values.tolist()
user = tuple(user)
print("尾号是8的用户个数")
print(len(user))
return user
print(user)
def get_ctr(user_tuple):
sql = "select count(device_id) from data_feed_click \
where stat_date='{}' \
and cid_type='{}' \
and device_id in {}".format(stat_date, cid_type, user_tuple)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
cursor = db.cursor()
print("开始获取")
cursor.execute(sql)
click = cursor.fetchall()[0][0]
print(click)
sql = "select count(device_id) from data_feed_exposure \
where stat_date='{}' \
and cid_type='{}' \
and device_id in {}".format(stat_date, cid_type, user_tuple)
cursor = db.cursor()
print("开始获取")
cursor.execute(sql)
exp = cursor.fetchall()[0][0]
db.close()
print(exp)
print(click / exp)
def get_tail6():
df = pd.read_csv(path+"{}predictTail6Unique.csv".format(stat_date))
pre_list = tuple(eval(df.loc[0,"list"]))
print(len(pre_list))
print(pre_list[:2])
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select distinct device_id from data_feed_click \
where stat_date='{}' \
and cid_type='{}' \
and device_id in {}".format(stat_date,cid_type,pre_list)
sql = "select distinct name from api_tag where tag_type = 4"
db = pymysql.connect(host='rdsfewzdmf0jfjp9un8xj.mysql.rds.aliyuncs.com', port=3306,
user='work', passwd='BJQaT9VzDcuPBqkd', db='zhengxing')
cursor = db.cursor()
print("开始获取")
cursor.execute(sql)
print("成功获取")
result = cursor.fetchall()
db.close()
print(pd.DataFrame(list(result)).empty)
user = pd.DataFrame(list(result))[0].values.tolist()
user = tuple(user)
print("用户个数")
print(len(user))
return user
print(user)
# def get_tail8():
# sql = "select distinct device_id from data_feed_click \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id regexp '8$';".format(stat_date,cid_type)
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# print("成功获取")
# result = cursor.fetchall()
# db.close()
# user = pd.DataFrame(list(result))[0].values.tolist()
# user = tuple(user)
# print("尾号是8的用户个数")
# print(len(user))
# return user
# def get_ctr(user_tuple):
# sql = "select count(device_id) from data_feed_click \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id in {}".format(stat_date, cid_type, user_tuple)
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# click = cursor.fetchall()[0][0]
# print(click)
#
# sql = "select count(device_id) from data_feed_exposure \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id in {}".format(stat_date, cid_type, user_tuple)
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# exp = cursor.fetchall()[0][0]
# db.close()
# print(exp)
# print(click / exp)
# def get_tail6():
# df = pd.read_csv(path+"{}predictTail6Unique.csv".format(stat_date))
# pre_list = tuple(eval(df.loc[0,"list"]))
# print(len(pre_list))
# print(pre_list[:2])
# db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
# sql = "select distinct device_id from data_feed_click \
# where stat_date='{}' \
# and cid_type='{}' \
# and device_id in {}".format(stat_date,cid_type,pre_list)
# cursor = db.cursor()
# print("开始获取")
# cursor.execute(sql)
# print("成功获取")
# result = cursor.fetchall()
# db.close()
# print(pd.DataFrame(list(result)).empty)
# user = pd.DataFrame(list(result))[0].values.tolist()
# user = tuple(user)
# print("用户个数")
# print(len(user))
# return user
if __name__ == "__main__":
path = "/data/models/"
cid_type = "diary"
now = datetime.now()
year = now.year
month = now.month
day = now.day
stat_date = datetime(year, month, day)
stat_date = (stat_date - timedelta(days=1)).strftime("%Y-%m-%d")
print(stat_date)
tail6 = get_tail6()
get_ctr(tail6)
tail8 = get_tail8()
get_ctr(tail8)
get_city()
# path = "/data/models/"
# cid_type = "diary"
# now = datetime.now()
# year = now.year
# month = now.month
# day = now.day
# stat_date = datetime(year, month, day)
# stat_date = (stat_date - timedelta(days=1)).strftime("%Y-%m-%d")
# print(stat_date)
# tail6 = get_tail6()
# get_ctr(tail6)
# tail8 = get_tail8()
# get_ctr(tail8)
......
......@@ -77,7 +77,7 @@ def test():
# click = click.map(lambda x: (x[0], x[1], x[2], 1))
def hive():
conf = SparkConf().setAppName("My app")
conf = SparkConf().setMaster("spark://10.30.181.88:7077").setAppName("My app")
sc = SparkContext(conf=conf)
sc.setLogLevel("WARN")
sqlContext = HiveContext(sc)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment