# -*- coding: UTF-8 -*- import pymysql import datetime import pandas as pd def con_sql(sql): #从数据库的表里获取数据 """ :type sql : str :rtype : tuple """ db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test') cursor = db.cursor() cursor.execute(sql) result = cursor.fetchall() db.close() return result def tuple2dict(tuple_result): #把sql结果从tuple(tuple,tuple)格式转换成dict格式 """ :type tuple_result : tuple :rtype : dict """ dict_result = {} for i in range(len(tuple_result)): dict_result[tuple_result[i][0]] = tuple_result[i][1] return dict_result def get_yesterday_date(): #自动获取昨天的日期,如"20180808" """ :rtype : str """ today = datetime.date.today() yesterday = today - datetime.timedelta(days=1) yesterday = yesterday.strftime("%Y%m%d") return yesterday #获取各个平台下的活跃用户点击率 def get_activate_uid_ctr(platform, ndays=1): """ ndays : 1;2;3;4.. #The number of days from the current time platform : 'all';'ios';'android' rtype : list """ if platform == "ios": platform = "='App Store'" elif platform == "android": platform = "!='App Store'" else: platform = " is not null" sql_clk = "select count(device_id) from data_feed_click \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ and device_type{1}".format(ndays, platform.replace(' ','') if platform[-2]=='e' else platform) clk_count = con_sql(sql_clk)[0][0] sql_imp = "select count(device_id) from data_feed_exposure \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{0} day) \ and device_id in \ (select device_id from data_feed_click \ where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -{1} day) \ and device_type{2}) \ and device_type{3}".format(ndays, ndays, platform.replace(' ','') if platform[-2]=='e' else platform, platform) imp_count = con_sql(sql_imp)[0][0] clk_rate = round(clk_count/imp_count, 4 ) if platform == "='App Store'": platform = "苹果" elif platform == "!='App Store'": platform = "安卓" else: platform = "所有" return [platform, clk_count, imp_count, clk_rate] #获取 {点击次数 : 独立用户数} def get_click_times_to_count_uid_df(): """ rtype : pandas.DataFrame """ sql = "select device_id,count(cid_type) click_times from data_feed_click where from_unixtime(time,'%Y-%m-%d')=date_add(curdate(), interval -1 day) group by device_id order by click_times desc" uid_click_times = con_sql(sql) uid_lst = [i[0] for i in uid_click_times] click_times_lst = [i[1] for i in uid_click_times] uid_click_times_df = pd.DataFrame({"uid":uid_lst,"click_times":click_times_lst}) df = uid_click_times_df.groupby(by="click_times",as_index=False).count() return df