hypothesis_test.py 22.1 KB
Newer Older
1 2 3 4 5 6
#! -*- coding: utf8 -*-
import pandas as pd
from scipy.stats import ttest_ind
from scipy.stats import levene
import datetime
from utils import con_sql
7
from decimal import *
8
import numpy as np
9 10 11
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr
12

13

14 15 16 17 18 19 20 21 22 23 24 25
#########推荐策略前后统计指标假设检验(t检验)###############

#自动获取昨日日期
def get_yesterday_date():
	#自动获取昨天的日期,如"2018-08-08"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	yesterday = today - datetime.timedelta(days=1)
	yesterday = yesterday.strftime("%Y-%m-%d")
	return yesterday
26
yesterday=get_yesterday_date()
27
print("监测数据日期:{}".format(yesterday))
28 29 30 31 32 33 34
#自动获取10日前的日期
def get_somedate():
	#自动获取10日前的日期,如"2018-07-28"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
35
	someday = today - datetime.timedelta(days=10)
36 37 38
	someday = someday.strftime("%Y-%m-%d")
	return someday
ten_days=get_somedate()
39
print("===========分割线,T检验最近10日指标与策略前10日指标是否获得显著提升============")
40
#获取最近10天的数据
41
def DATA_recently(x,y,z,q,t):
42
    ten_days = get_somedate()
43
    sql_cid = "select {0}/{1} as {2} from {3} \
44
    where stat_date >='{4}' ".format(x,y,z,q,t)
45
    CVR_DATA_recently = con_sql(sql_cid)
46
    return CVR_DATA_recently
47 48
#
# #获取固定时间的10天的数据
49 50
def DATA_fixed(x,y,z,q):
    sql_cid = "select {0}/{1} as {2} from {3} \
51
    where stat_date >='2018-11-17'  and stat_date<='2018-11-26' group by stat_date".format(x,y,z,q)
52
    CVR_DATA_fixed = con_sql(sql_cid)
53
    return CVR_DATA_fixed
王志伟's avatar
王志伟 committed
54 55 56 57 58 59 60 61 62 63 64 65 66 67

def DATA_recently_all(x,y,z,q,m,t):
    ten_days = get_somedate()
    sql_cid = "select ({0}+{1})/{2} as {3} from {4} \
    where stat_date >='{5}' ".format(x,y,z,q,m,t)
    CVR_DATA_recently = con_sql(sql_cid)
    return CVR_DATA_recently
#
# #获取固定时间的10天的数据
def DATA_fixed_all(x,y,z,q,m):
    sql_cid = "select ({0}+{1})/{2} as {3} from {4} \
    where stat_date >='2018-11-17'  and stat_date<='2018-11-26' group by stat_date".format(x,y,z,q,m)
    CVR_DATA_fixed = con_sql(sql_cid)
    return CVR_DATA_fixed
68 69
#
# #新用户cvr
70
x_crv_new_temp=DATA_recently("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv",ten_days)
71
x_crv_new=[float(str(Decimal(x_crv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_crv_new_temp))]
72 73
y_crv_new_temp=DATA_fixed("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv")
y_crv_new=[float(str(Decimal(y_crv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_crv_new_temp))]
74

75
# #老用户cvr
76 77 78 79 80
x_crv_old_temp=DATA_recently("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv",ten_days)
x_crv_old=[float(str(Decimal(x_crv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_crv_old_temp))]
y_crv_old_temp=DATA_fixed("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv")
y_crv_old=[float(str(Decimal(y_crv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_crv_old_temp))]

81 82
#
# #新用户ct-cvr
83 84 85 86
x_ctcrv_new_temp=DATA_recently("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv",ten_days)
x_ctcrv_new=[float(str(Decimal(x_ctcrv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctcrv_new_temp))]
y_ctcrv_new_temp=DATA_fixed("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv")
y_ctcrv_new=[float(str(Decimal(y_ctcrv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctcrv_new_temp))]
87 88
#
# #老用户ct-cvr
89 90 91 92
x_ctcrv_old_temp=DATA_recently("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv",ten_days)
x_ctcrv_old =[float(str(Decimal(x_ctcrv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctcrv_old_temp))]
y_ctcrv_old_temp=DATA_fixed("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv")
y_ctcrv_old=[float(str(Decimal(y_ctcrv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctcrv_old_temp))]
93 94
#
# #新用户ctr(page_view)
95 96 97 98
x_ctr_new_temp=DATA_recently("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser",ten_days)
x_ctr_new=[float(str(Decimal(x_ctr_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_new_temp))]
y_ctr_new_temp=DATA_fixed("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser")
y_ctr_new=[float(str(Decimal(y_ctr_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_new_temp))]
99
# #
100 101 102 103 104 105 106
# #老用户ctr(page_view)
x_ctr_old_temp=DATA_recently("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp",ten_days)
x_ctr_old=[float(str(Decimal(x_ctr_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_old_temp))]
y_ctr_old_temp=DATA_fixed("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp")
y_ctr_old=[float(str(Decimal(y_ctr_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_old_temp))]
#
# #新用户ctr(on_click_diary_card)
王志伟's avatar
王志伟 committed
107
x_ctr_new_o_temp=DATA_recently_all("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","ctr_new","on_click_diary_card",ten_days)
108
x_ctr_new_o=[float(str(Decimal(x_ctr_new_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_new_o_temp))]
王志伟's avatar
王志伟 committed
109
y_ctr_new_o_temp=DATA_fixed_all("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","ctr_new","on_click_diary_card")
110 111 112
y_ctr_new_o=[float(str(Decimal(y_ctr_new_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_new_o_temp))]
#
# #老用户ctr(on_click_diary_card)
王志伟's avatar
王志伟 committed
113
x_ctr_old_o_temp=DATA_recently_all("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","ctr_old","on_click_diary_card",ten_days)
114
x_ctr_old_o=[float(str(Decimal(x_ctr_old_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_old_o_temp))]
王志伟's avatar
王志伟 committed
115
y_ctr_old_o_temp=DATA_fixed_all("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","ctr_old","on_click_diary_card")
116
y_ctr_old_o=[float(str(Decimal(y_ctr_old_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_old_o_temp))]
117
# #
118 119
#
#
120 121 122 123 124 125 126 127 128 129
def t_test(x,y): #进行t检验

    #策略前的数据,赋值给x,策略后的数据赋值给y,均采用10日内数据
    #检验数据方差是否齐性
    a=levene(x,y)
    p_value=a[1] #结果若p_value>0.05,则认为两组数据方差是相等的,否则两组数据方差是不等的

    if p_value>0.05: #认为数据方差具有齐性,equal_var=ture
        t_test=ttest_ind(x,y,equal_var=True)
        t_p_value=t_test[1]
130
        # print(t_p_value)
131
        if t_p_value>0.05:
王志伟's avatar
王志伟 committed
132
            print("95%置信度认为策略前后两组数据【无显著性差异】,即该指标没有显著变化,p_value:{}" .format(t_p_value))
133
            print("\n")
134
        else:
王志伟's avatar
王志伟 committed
135
            print("95%置信度认为策略前后两组数据【有显著性差异】,即该指标获得显著变化,p_value:{}" .format(t_p_value))
136
            print("\n")
137 138 139 140
    else: #认为数据方差不具有齐性,equal_var=false
        t_test = ttest_ind(x, y, equal_var=False)
        t_p_value = t_test[1]
        if t_p_value > 0.05:
王志伟's avatar
王志伟 committed
141
            print("95%置信度认为策略前后两组数据【无显著性差异】,即该指标没有显著变化,p_value:{}" .format(t_p_value))
142
            print("\n")
143
        else:
王志伟's avatar
王志伟 committed
144
            print("95%置信度认为策略前后两组数据【有显著性差异】,即该指标获得显著变化,p_value:{}" .format(t_p_value))
145
            print("\n")
146 147 148
#
# ###假设检验,判断是否具有显著性
#
149
#新用户cvr假设检验
150
print("【1】新用户CVR假设检验结果:")
151
crv_new_ttest=t_test(x_crv_new,y_crv_new)
152
#老用户cvr假设检验
153
print("【2】老用户CVR假设检验结果:")
154
crv_old_ttest=t_test(x_crv_old,y_crv_old)
155
#
156
#新用户ct_cvr假设检验
157
print("【3】新用户CT-CVR假设检验结果:")
158
ctcrv_new_ttest=t_test(x_ctcrv_new,y_ctcrv_new)
159
# #老用户ct_cvr假设检验
160
print("【4】老用户CT-CVR假设检验结果:")
161
ctcrv_old_ttest=t_test(x_ctcrv_old,y_ctcrv_old)
162 163
#
#
164
#新用户ctr假设检验
165
print("【5】新用户CTR假设检验结果:")
166 167
ctr_new_ttest=t_test(x_ctr_new,y_ctr_new)
#老用户ctr假设检验
168
print("【6】老用户CTR假设检验结果:")
169 170 171
ctr_old_ttest=t_test(x_ctr_old,y_ctr_old)

#新用户ctr(on_click_diary_card)假设检验
王志伟's avatar
王志伟 committed
172
print("【7】新用户CTR假设检验(日记本列表ctr)(on_click_diary_card)结果:")
173 174
ctr_new_o_ttest=t_test(x_ctr_new_o,y_ctr_new_o)
#老用户ctr(on_click_diary_card)假设检验
王志伟's avatar
王志伟 committed
175
print("【8】老用户CTR假设检验(日记本列表ctr)(on_click_diary_card)结果:")
176
ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o)
177 178
#
# ###############推荐策略不变的情况下数据假设检验##############
179
print("===========分割线,卡方检验昨日指标与前5日指标均值是否显著变化============")
180 181 182
# #1 计算每日指标卡方检验
#
# #自动获取5日前的日期
183 184 185 186 187 188 189 190 191 192 193
def get_fivedate():
	#自动获取10日前的日期,如"2018-07-28"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	someday = today - datetime.timedelta(days=5)
	someday = someday.strftime("%Y-%m-%d")
	return someday
five_days=get_fivedate()

194
#获取最近5天的数据,此函数只适用于on_click_diary_card表格,具体原因可以查看数据代码
195 196
def chi_DATA_recently(x,y,z,q,t1,t2):
    sql_cid = "select AVG({0}+{1}),AVG({2}) from {3} \
197
    where stat_date >= '{4}' and stat_date < '{5}' ".format(x,y,z,q,t1,t2)
198
    CVR_DATA_recently = con_sql(sql_cid)[0]
199
    return CVR_DATA_recently
200 201
def chi_DATA_yesterday(x,y,z,q,t1):
    sql_cid = "select {0}+{1},{2} from {3} where stat_date='{4}'  ".format(x,y,z,q,t1)
202
    CVR_DATA_yesterday = con_sql(sql_cid)[0]
203
    return CVR_DATA_yesterday
204
#获取最近5天的数据
205
def chi_DATA_recently_all(x,y,z,t1,t2):
206 207 208 209
    sql_cid = "select AVG({0}),AVG({1}) from {2} \
    where stat_date >= '{3}' and stat_date < '{4}' ".format(x,y,z,t1,t2)
    CVR_DATA_recently = con_sql(sql_cid)[0]
    return CVR_DATA_recently
210
def chi_DATA_yesterday_all(x,y,z,t1):
211 212 213 214
    sql_cid = "select {0},{1} from {2} where stat_date='{3}'  ".format(x,y,z,t1)
    CVR_DATA_yesterday = con_sql(sql_cid)[0]
    return CVR_DATA_yesterday

215

216 217 218 219 220 221
#整理数据
def data_cal(x,y):
    x_a = [x[0], x[1] - x[0]]
    y_a=[y[0], y[1] - y[0]]
    a_df=pd.DataFrame({'原':x_a,'测':y_a})
    return a_df
222

223 224 225 226 227 228 229 230 231 232 233
def chi_cal(data):
    data['共计'] = data.apply(lambda x: x.sum(), axis=1)
    data.loc['共计'] = data.apply(lambda x: x.sum())
    t1=data.iloc[0]
    t2=data.iloc[1]
    t11_count=t1[0]
    t12_count=t1[1]
    t21_count=t2[0]
    t22_count=t2[1]

    ###理论值计算
234
    temp1=data['共计']
235 236 237 238 239
    rate1=temp1[0]/temp1[2]
    rate2=temp1[1]/temp1[2]
    temp2=data.iloc[2]
    t11_theory=temp2[0]*rate1
    t12_theory=temp2[1]*rate1
240 241
    t21_theory = temp2[0]*rate2
    t22_theory = temp2[1]*rate2
242
    #计算卡方值
243
    X=(((t11_count-t11_theory)**2)/t11_theory)+(((t12_count-t12_theory)**2)/t12_theory)+(((t21_count-t21_theory)**2)/t21_theory)+(((t22_count-t22_theory)**2)/t22_theory)
244
    print("卡方值为:{}".format(X))
245 246 247 248
    #计算自由度
    v=(len(data)-1)*(data.columns.size-1)
    #查表发现阈值为3.84
    if X>3.84:
王志伟's avatar
王志伟 committed
249
        print("数据波动较大,超出正常波动范围,95%可能性属于指标【显著变化,请关注】")
250
        print("\n")
251
    else:
王志伟's avatar
王志伟 committed
252
        print("数据波动较小,95%可能性属于【正常波动】范围")
253
        print("\n")
254

255
#老用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
王志伟's avatar
王志伟 committed
256
print("【1】(精准曝光)首页精选日记本列表老用户CTR数据波动假设检验结果:")
257 258
chi_ctr_precise_old_recently=chi_DATA_recently("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days,yesterday)
temp1_old=[float(str(Decimal(chi_ctr_precise_old_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctr_precise_old_recently))]
王志伟's avatar
王志伟 committed
259

260 261
chi_ctr_precise_old_yesterday=chi_DATA_yesterday("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",yesterday)
temp2_old=[float(chi_ctr_precise_old_yesterday[i]) for i in range(len(chi_ctr_precise_old_yesterday))]
王志伟's avatar
王志伟 committed
262

263 264
ctr_tst_old=data_cal(temp1_old,temp2_old)
chi_cal(ctr_tst_old)
265

266
#新用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
王志伟's avatar
王志伟 committed
267
print("【2】(精准曝光)首页精选日记本列表新用户CTR数据波动假设检验结果:")
268
chi_ctr_precise_new_recently=chi_DATA_recently("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days,yesterday)
269
temp1_new=[float(str(Decimal(chi_ctr_precise_new_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctr_precise_new_recently))]
王志伟's avatar
王志伟 committed
270

271 272
chi_ctr_precise_new_yesterday=chi_DATA_yesterday("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",yesterday)
temp2_new=[float(chi_ctr_precise_new_yesterday[i]) for i in range(len(chi_ctr_precise_new_yesterday))]
王志伟's avatar
王志伟 committed
273

274 275 276 277 278 279 280
ctr_tst_new=data_cal(temp1_new,temp2_new)
chi_cal(ctr_tst_new)

#老用户美购转化数据
print("【3】老用户CVR数据波动假设检验结果:")
chi_cvr_old_recently=chi_DATA_recently_all("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days,yesterday)
cvr_old=[float(str(Decimal(chi_cvr_old_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_cvr_old_recently))]
王志伟's avatar
王志伟 committed
281

282 283
chi_cvr_old_yesterday=chi_DATA_yesterday_all("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",yesterday)
cvr_old2=[float(chi_cvr_old_yesterday[i]) for i in range(len(chi_cvr_old_yesterday))]
王志伟's avatar
王志伟 committed
284

285 286 287
cvr_tst_old=data_cal(cvr_old,cvr_old2)
chi_cal(cvr_tst_old)
#老用户美购转化数据
288
print("【4】新用户CVR数据波动假设检验结果:")
289 290
chi_cvr_new_recently=chi_DATA_recently_all("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days,yesterday)
cvr_new=[float(str(Decimal(chi_cvr_new_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_cvr_new_recently))]
王志伟's avatar
王志伟 committed
291

292 293
chi_cvr_new_yesterday=chi_DATA_yesterday_all("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",yesterday)
cvr_new2=[float(chi_cvr_new_yesterday[i]) for i in range(len(chi_cvr_new_yesterday))]
王志伟's avatar
王志伟 committed
294

295 296 297
cvr_tst_new=data_cal(cvr_new,cvr_new2)
chi_cal(cvr_tst_new)
#老用户美购转化数据
298
print("【5】老用户CT-CVR数据波动假设检验结果:")
299 300
chi_ctcvr_old_recently=chi_DATA_recently_all("diary_meigou_oldUser","diary_exp_oldUser","diary_meigou_crv",five_days,yesterday)
ctcvr_old=[float(str(Decimal(chi_ctcvr_old_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctcvr_old_recently))]
王志伟's avatar
王志伟 committed
301

302 303
chi_ctcvr_old_yesterday=chi_DATA_yesterday_all("diary_meigou_oldUser","diary_exp_oldUser","diary_meigou_crv",yesterday)
ctcvr_old2=[float(chi_ctcvr_old_yesterday[i]) for i in range(len(chi_ctcvr_old_yesterday))]
王志伟's avatar
王志伟 committed
304

305 306 307
ctcvr_tst_old=data_cal(ctcvr_old,ctcvr_old2)
chi_cal(ctcvr_tst_old)
#老用户美购转化数据
308
print("【6】新用户CT-CVR数据波动假设检验结果:")
309 310
chi_ctcvr_new_recently=chi_DATA_recently_all("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",five_days,yesterday)
ctcvr_new=[float(str(Decimal(chi_ctcvr_new_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctcvr_new_recently))]
王志伟's avatar
王志伟 committed
311

312 313
chi_ctcvr_new_yesterday=chi_DATA_yesterday_all("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",yesterday)
ctcvr_new2=[float(chi_ctcvr_new_yesterday[i]) for i in range(len(chi_ctcvr_new_yesterday))]
王志伟's avatar
王志伟 committed
314

315 316 317
ctcvr_tst_new=data_cal(ctcvr_new,ctcvr_new2)
chi_cal(ctcvr_tst_new)

318
# ###############数据波动大小检验##############
王志伟's avatar
王志伟 committed
319
print("===============分割线,开始检测各个指标的5日内的方差和均值==================")
320

321
def get_var_data1(x,y,z,t1):
322
    sql_cid = "select {0}/{1} from {2} \
323
    where stat_date >= '{3}' ".format(x,y,z,t1)
324
    CVR_DATA_recently = con_sql(sql_cid)
325
    return CVR_DATA_recently
326 327 328 329 330 331 332

def get_var_data2(x,y,z,q,t1):
    sql_cid = "select ({0}+{1})/{2} from {3} \
    where stat_date >= '{4}' ".format(x,y,z,q,t1)
    CVR_DATA_recently = con_sql(sql_cid)
    return CVR_DATA_recently

333
def collect_data(data):
334
    tt = [float(data[i][0])*100 for i in range(len(data))]
335
    return tt
336

337
var_ctcvr_old_data=get_var_data1("diary_meigou_oldUser","diary_exp_oldUser","diary_meigou_crv",five_days)
338 339
var_ctcvr_old_D=collect_data(var_ctcvr_old_data)
var_ctcvr_old=np.var(var_ctcvr_old_D)
340
mean_var_ctcvr_old=np.mean(var_ctcvr_old_D)
341 342
print("【1-1】老用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_old))
print("【1-2】老用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_old))
343
print("\n")
344

345

346
var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",five_days)
347 348
var_ctcvr_new_D=collect_data(var_ctcvr_new_data)
var_ctcvr_new=np.var(var_ctcvr_new_D)
349
mean_var_ctcvr_new=np.mean(var_ctcvr_new_D)
350 351
print("【2-1】新用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_new))
print("【2-2】新用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_new))
352
print("\n")
353

354
var_cvr_old_data=get_var_data1("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days)
355 356
var_cvr_old_D=collect_data(var_cvr_old_data)
var_cvr_old=np.var(var_cvr_old_D)
357
mean_var_cvr_old=np.mean(var_cvr_old_D)
358 359
print("【3-1】老用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_old))
print("【3-2】老用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_old))
360
print("\n")
361

362
#
363
var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days)
364 365
var_cvr_new_D=collect_data(var_cvr_new_data)
var_cvr_new=np.var(var_cvr_new_D)
366
mean_var_cvr_new=np.mean(var_cvr_new_D)
367 368
print("【4-1】新用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_new))
print("【4-2】新用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_new))
369
print("\n")
370 371 372 373 374

var_ctr_old_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","on_click_diary_card",five_days)
var_ctr_old_D=collect_data(var_ctr_old_data)
var_ctr_old=np.var(var_cvr_old_D)
mean_var_ctr_old=np.mean(var_ctr_old_D)
375 376
print("【5-1】老用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_old))
print("【5-2】老用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_old))
377
print("\n")
378 379 380 381 382

var_ctr_new_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","on_click_diary_card",five_days)
var_ctr_new_D=collect_data(var_ctr_new_data)
var_ctr_new=np.var(var_ctr_new_D)
mean_var_ctr_new=np.mean(var_ctr_new_D)
383 384
print("【6-1】新用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_new))
print("【6-2】新用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_new))
385
print("\n")
386 387 388 389 390

var_ctr_new_precise_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days)
var_ctr_new_precise_D=collect_data(var_ctr_new_precise_data)
var_ctr_new_precise=np.var(var_ctr_new_precise_D)
mean_var_ctr_new_precise=np.mean(var_ctr_new_precise_D)
391 392
print("【7-1】新用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_new_precise))
print("【7-2】新用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_new_precise))
393
print("\n")
394

395 396 397 398
var_ctr_old_precise_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days)
var_ctr_old_precise_D=collect_data(var_ctr_old_precise_data)
var_ctr_old_precise=np.var(var_ctr_old_precise_D)
mean_var_ctr_old_precise=np.mean(var_ctr_old_precise_D)
399 400
print("【8-1】老用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_old_precise))
print("【8-2】老用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_old_precise))
401
print("\n")
402 403


王志伟's avatar
王志伟 committed
404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436
# print("============================分割线===================================")
#根据新老用户进行区分
# print("============================新用户各指标假设检验结果分析===================================")
# #新用户cvr假设检验
# print("【1】新用户CVR假设检验结果:")
# crv_new_ttest1=t_test(x_crv_new,y_crv_new)
# #新用户ct_cvr假设检验
# print("【3】新用户CT-CVR假设检验结果:")
# ctcrv_new_ttest1=t_test(x_ctcrv_new,y_ctcrv_new)
# #新用户ctr假设检验
# print("【5】新用户CTR假设检验结果:")
# ctr_new_ttest1=t_test(x_ctr_new,y_ctr_new)
# #新用户ctr(on_click_diary_card)假设检验
# print("【7】新用户CTR假设检验(日记本列表ctr)(on_click_diary_card)结果:")
# ctr_new_o_ttest1=t_test(x_ctr_new_o,y_ctr_new_o)
#
#
#
#
#
# print("============================老用户各指标假设检验结果分析===================================")
# #老用户cvr假设检验
# print("【2】老用户CVR假设检验结果:")
# crv_old_ttest1=t_test(x_crv_old,y_crv_old)
# # #老用户ct_cvr假设检验
# print("【4】老用户CT-CVR假设检验结果:")
# ctcrv_old_ttest1=t_test(x_ctcrv_old,y_ctcrv_old)
# #老用户ctr假设检验
# print("【6】老用户CTR假设检验结果:")
# ctr_old_ttest1=t_test(x_ctr_old,y_ctr_old)
# #老用户ctr(on_click_diary_card)假设检验
# print("【8】老用户CTR假设检验(日记本列表ctr)(on_click_diary_card)结果:")
# ctr_old_o_ttest1=t_test(x_ctr_old_o,y_ctr_old_o)
437 438 439 440




441 442
##发送邮件

443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466
# my_sender='gaoyazhe@igengmei.com'
# my_pass = 'VCrKTui99a7ALhiK'
# my_user1='wangzhiwei@igengmei.com'
# def mail():
#     ret = True
#     try:
#         text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com"
#         msg = MIMEText(text, 'plain', 'utf-8')
#         msg['From'] = formataddr(["王志伟", my_sender])
#         msg['To'] = my_user1
#         msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计"
#         server = smtplib.SMTP_SSL("smtp.exmail.qq.com", 465)
#         server.login(my_sender, my_pass)
#         server.sendmail(my_sender, [my_user1], msg.as_string())
#         server.quit()
#     except Exception:
#         ret=False
#     return ret
#
# ret=mail()
# if ret:
#     print("邮件发送成功")
# else:
#     print("邮件发送失败")
467

468 469 470 471 472 473 474 475 476
# chi_cvr_new=
# chi_cvr_old=
#
# chi_ctcvr_new=
# chi_ctcvr_old=
#
#
#
# def chi_cal(data):