hypothesis_test.py

#! -*- coding: utf8 -*-
import pandas as pd
from scipy.stats import ttest_ind
from scipy.stats import levene
import datetime
from utils import con_sql
from decimal import *
import numpy as np
import smtplib
from email.mime.text import MIMEText
from email.utils import formataddr


#########推荐策略前后统计指标假设检验（t检验）###############

#自动获取昨日日期
def get_yesterday_date():
	#自动获取昨天的日期,如"2018-08-08"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	yesterday = today - datetime.timedelta(days=1)
	yesterday = yesterday.strftime("%Y-%m-%d")
	return yesterday
yesterday=get_yesterday_date()
print("监测数据日期:{}".format(yesterday))
#自动获取10日前的日期
def get_somedate():
	#自动获取10日前的日期,如"2018-07-28"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	someday = today - datetime.timedelta(days=10)
	someday = someday.strftime("%Y-%m-%d")
	return someday
ten_days=get_somedate()
print("===========分割线，T检验最近10日指标与策略前10日指标是否获得显著提升============")
#获取最近10天的数据
def DATA_recently(x,y,z,q,t):
    ten_days = get_somedate()
    sql_cid = "select {0}/{1} as {2} from {3} \
    where stat_date >='{4}' ".format(x,y,z,q,t)
    CVR_DATA_recently = con_sql(sql_cid)
    return CVR_DATA_recently
#
# #获取固定时间的10天的数据
def DATA_fixed(x,y,z,q):
    sql_cid = "select {0}/{1} as {2} from {3} \
    where stat_date >='2018-11-17'  and stat_date<='2018-11-26' group by stat_date".format(x,y,z,q)
    CVR_DATA_fixed = con_sql(sql_cid)
    return CVR_DATA_fixed

def DATA_recently_all(x,y,z,q,m,t):
    ten_days = get_somedate()
    sql_cid = "select ({0}+{1})/{2} as {3} from {4} \
    where stat_date >='{5}' ".format(x,y,z,q,m,t)
    CVR_DATA_recently = con_sql(sql_cid)
    return CVR_DATA_recently
#
# #获取固定时间的10天的数据
def DATA_fixed_all(x,y,z,q,m):
    sql_cid = "select ({0}+{1})/{2} as {3} from {4} \
    where stat_date >='2018-11-17'  and stat_date<='2018-11-26' group by stat_date".format(x,y,z,q,m)
    CVR_DATA_fixed = con_sql(sql_cid)
    return CVR_DATA_fixed
#
# #新用户cvr
x_crv_new_temp=DATA_recently("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv",ten_days)
x_crv_new=[float(str(Decimal(x_crv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_crv_new_temp))]
y_crv_new_temp=DATA_fixed("diary_meigou_newUser","diary_clk_newUser","CVR_new","diary_meigou_crv")
y_crv_new=[float(str(Decimal(y_crv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_crv_new_temp))]

# #老用户cvr
x_crv_old_temp=DATA_recently("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv",ten_days)
x_crv_old=[float(str(Decimal(x_crv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_crv_old_temp))]
y_crv_old_temp=DATA_fixed("diary_meigou_oldUser","diary_clk_oldUser","CVR_old","diary_meigou_crv")
y_crv_old=[float(str(Decimal(y_crv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_crv_old_temp))]

#
# #新用户ct-cvr
x_ctcrv_new_temp=DATA_recently("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv",ten_days)
x_ctcrv_new=[float(str(Decimal(x_ctcrv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctcrv_new_temp))]
y_ctcrv_new_temp=DATA_fixed("diary_meigou_newUser","diary_exp_newUser","CT_CVR_new","diary_meigou_crv")
y_ctcrv_new=[float(str(Decimal(y_ctcrv_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctcrv_new_temp))]
#
# #老用户ct-cvr
x_ctcrv_old_temp=DATA_recently("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv",ten_days)
x_ctcrv_old =[float(str(Decimal(x_ctcrv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctcrv_old_temp))]
y_ctcrv_old_temp=DATA_fixed("diary_meigou_oldUser","diary_exp_oldUser","CT_CVR_old","diary_meigou_crv")
y_ctcrv_old=[float(str(Decimal(y_ctcrv_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctcrv_old_temp))]
#
# #新用户ctr(page_view)
x_ctr_new_temp=DATA_recently("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser",ten_days)
x_ctr_new=[float(str(Decimal(x_ctr_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_new_temp))]
y_ctr_new_temp=DATA_fixed("clk_count_newUser_all","imp_count_newUser_all","ctr_new","bug_Recommendation_strategy_newUser")
y_ctr_new=[float(str(Decimal(y_ctr_new_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_new_temp))]
# #
# #老用户ctr(page_view)
x_ctr_old_temp=DATA_recently("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp",ten_days)
x_ctr_old=[float(str(Decimal(x_ctr_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_old_temp))]
y_ctr_old_temp=DATA_fixed("clk_count_oldUser_all","imp_count_oldUser_all","ctr_old","bug_Recommendation_strategy_temp")
y_ctr_old=[float(str(Decimal(y_ctr_old_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_old_temp))]
#
# #新用户ctr(on_click_diary_card)
x_ctr_new_o_temp=DATA_recently_all("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","ctr_new","on_click_diary_card",ten_days)
x_ctr_new_o=[float(str(Decimal(x_ctr_new_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_new_o_temp))]
y_ctr_new_o_temp=DATA_fixed_all("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","ctr_new","on_click_diary_card")
y_ctr_new_o=[float(str(Decimal(y_ctr_new_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_new_o_temp))]
#
# #老用户ctr(on_click_diary_card)
x_ctr_old_o_temp=DATA_recently_all("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","ctr_old","on_click_diary_card",ten_days)
x_ctr_old_o=[float(str(Decimal(x_ctr_old_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(x_ctr_old_o_temp))]
y_ctr_old_o_temp=DATA_fixed_all("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","ctr_old","on_click_diary_card")
y_ctr_old_o=[float(str(Decimal(y_ctr_old_o_temp[i][0]).quantize(Decimal('0.0000')))) for i in range(len(y_ctr_old_o_temp))]
# #
#
#
def t_test(x,y): #进行t检验

    #策略前的数据，赋值给x,策略后的数据赋值给y,均采用10日内数据
    #检验数据方差是否齐性
    a=levene(x,y)
    p_value=a[1] #结果若p_value>0.05,则认为两组数据方差是相等的，否则两组数据方差是不等的

    if p_value>0.05: #认为数据方差具有齐性，equal_var=ture
        t_test=ttest_ind(x,y,equal_var=True)
        t_p_value=t_test[1]
        # print(t_p_value)
        if t_p_value>0.05:
            print("95%置信度认为策略前后两组数据【无显著性差异】，即该指标没有显著变化，p_value:{}" .format(t_p_value))
            print("\n")
        else:
            print("95%置信度认为策略前后两组数据【有显著性差异】，即该指标获得显著变化，p_value:{}" .format(t_p_value))
            print("\n")
    else: #认为数据方差不具有齐性，equal_var=false
        t_test = ttest_ind(x, y, equal_var=False)
        t_p_value = t_test[1]
        if t_p_value > 0.05:
            print("95%置信度认为策略前后两组数据【无显著性差异】，即该指标没有显著变化，p_value:{}" .format(t_p_value))
            print("\n")
        else:
            print("95%置信度认为策略前后两组数据【有显著性差异】，即该指标获得显著变化，p_value:{}" .format(t_p_value))
            print("\n")
#
# ###假设检验，判断是否具有显著性
#
#新用户cvr假设检验
print("【1】新用户CVR假设检验结果：")
crv_new_ttest=t_test(x_crv_new,y_crv_new)
#老用户cvr假设检验
print("【2】老用户CVR假设检验结果：")
crv_old_ttest=t_test(x_crv_old,y_crv_old)
#
#新用户ct_cvr假设检验
print("【3】新用户CT-CVR假设检验结果：")
ctcrv_new_ttest=t_test(x_ctcrv_new,y_ctcrv_new)
# #老用户ct_cvr假设检验
print("【4】老用户CT-CVR假设检验结果：")
ctcrv_old_ttest=t_test(x_ctcrv_old,y_ctcrv_old)
#
#
#新用户ctr假设检验
print("【5】新用户CTR假设检验结果：")
ctr_new_ttest=t_test(x_ctr_new,y_ctr_new)
#老用户ctr假设检验
print("【6】老用户CTR假设检验结果：")
ctr_old_ttest=t_test(x_ctr_old,y_ctr_old)

#新用户ctr(on_click_diary_card)假设检验
print("【7】新用户CTR假设检验(日记本列表ctr)（on_click_diary_card）结果：")
ctr_new_o_ttest=t_test(x_ctr_new_o,y_ctr_new_o)
#老用户ctr(on_click_diary_card)假设检验
print("【8】老用户CTR假设检验(日记本列表ctr)（on_click_diary_card）结果：")
ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o)
#
# ###############推荐策略不变的情况下数据假设检验##############
print("===========分割线，卡方检验昨日指标与前5日指标均值是否显著变化============")
# #1 计算每日指标卡方检验
#
# #自动获取5日前的日期
def get_fivedate():
	#自动获取10日前的日期,如"2018-07-28"
	"""
	:rtype : str
	"""
	today = datetime.date.today()
	someday = today - datetime.timedelta(days=5)
	someday = someday.strftime("%Y-%m-%d")
	return someday
five_days=get_fivedate()

#获取最近5天的数据,此函数只适用于on_click_diary_card表格，具体原因可以查看数据代码
def chi_DATA_recently(x,y,z,q,t1,t2):
    sql_cid = "select AVG({0}+{1}),AVG({2}) from {3} \
    where stat_date >= '{4}' and stat_date < '{5}' ".format(x,y,z,q,t1,t2)
    CVR_DATA_recently = con_sql(sql_cid)[0]
    return CVR_DATA_recently
def chi_DATA_yesterday(x,y,z,q,t1):
    sql_cid = "select {0}+{1},{2} from {3} where stat_date='{4}'  ".format(x,y,z,q,t1)
    CVR_DATA_yesterday = con_sql(sql_cid)[0]
    return CVR_DATA_yesterday
#获取最近5天的数据
def chi_DATA_recently_all(x,y,z,t1,t2):
    sql_cid = "select AVG({0}),AVG({1}) from {2} \
    where stat_date >= '{3}' and stat_date < '{4}' ".format(x,y,z,t1,t2)
    CVR_DATA_recently = con_sql(sql_cid)[0]
    return CVR_DATA_recently
def chi_DATA_yesterday_all(x,y,z,t1):
    sql_cid = "select {0},{1} from {2} where stat_date='{3}'  ".format(x,y,z,t1)
    CVR_DATA_yesterday = con_sql(sql_cid)[0]
    return CVR_DATA_yesterday


#整理数据
def data_cal(x,y):
    x_a = [x[0], x[1] - x[0]]
    y_a=[y[0], y[1] - y[0]]
    a_df=pd.DataFrame({'原':x_a,'测':y_a})
    return a_df

def chi_cal(data):
    data['共计'] = data.apply(lambda x: x.sum(), axis=1)
    data.loc['共计'] = data.apply(lambda x: x.sum())
    t1=data.iloc[0]
    t2=data.iloc[1]
    t11_count=t1[0]
    t12_count=t1[1]
    t21_count=t2[0]
    t22_count=t2[1]

    ###理论值计算
    temp1=data['共计']
    rate1=temp1[0]/temp1[2]
    rate2=temp1[1]/temp1[2]
    temp2=data.iloc[2]
    t11_theory=temp2[0]*rate1
    t12_theory=temp2[1]*rate1
    t21_theory = temp2[0]*rate2
    t22_theory = temp2[1]*rate2
    #计算卡方值
    X=(((t11_count-t11_theory)**2)/t11_theory)+(((t12_count-t12_theory)**2)/t12_theory)+(((t21_count-t21_theory)**2)/t21_theory)+(((t22_count-t22_theory)**2)/t22_theory)
    print("卡方值为：{}".format(X))
    #计算自由度
    v=(len(data)-1)*(data.columns.size-1)
    #查表发现阈值为3.84
    if X>3.84:
        print("数据波动较大，超出正常波动范围，95%可能性属于指标【显著变化，请关注】")
        print("\n")
    else:
        print("数据波动较小，95%可能性属于【正常波动】范围")
        print("\n")

#老用户精准点击曝光数据（首页精选日记本列表on_click_diary_card）
print("【1】（精准曝光）首页精选日记本列表老用户CTR数据波动假设检验结果：")
chi_ctr_precise_old_recently=chi_DATA_recently("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days,yesterday)
temp1_old=[float(str(Decimal(chi_ctr_precise_old_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctr_precise_old_recently))]

chi_ctr_precise_old_yesterday=chi_DATA_yesterday("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",yesterday)
temp2_old=[float(chi_ctr_precise_old_yesterday[i]) for i in range(len(chi_ctr_precise_old_yesterday))]

ctr_tst_old=data_cal(temp1_old,temp2_old)
chi_cal(ctr_tst_old)

#新用户精准点击曝光数据（首页精选日记本列表on_click_diary_card）
print("【2】（精准曝光）首页精选日记本列表新用户CTR数据波动假设检验结果：")
chi_ctr_precise_new_recently=chi_DATA_recently("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days,yesterday)
temp1_new=[float(str(Decimal(chi_ctr_precise_new_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctr_precise_new_recently))]

chi_ctr_precise_new_yesterday=chi_DATA_yesterday("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",yesterday)
temp2_new=[float(chi_ctr_precise_new_yesterday[i]) for i in range(len(chi_ctr_precise_new_yesterday))]

ctr_tst_new=data_cal(temp1_new,temp2_new)
chi_cal(ctr_tst_new)

#老用户美购转化数据
print("【3】老用户CVR数据波动假设检验结果：")
chi_cvr_old_recently=chi_DATA_recently_all("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days,yesterday)
cvr_old=[float(str(Decimal(chi_cvr_old_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_cvr_old_recently))]

chi_cvr_old_yesterday=chi_DATA_yesterday_all("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",yesterday)
cvr_old2=[float(chi_cvr_old_yesterday[i]) for i in range(len(chi_cvr_old_yesterday))]

cvr_tst_old=data_cal(cvr_old,cvr_old2)
chi_cal(cvr_tst_old)
#老用户美购转化数据
print("【4】新用户CVR数据波动假设检验结果：")
chi_cvr_new_recently=chi_DATA_recently_all("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days,yesterday)
cvr_new=[float(str(Decimal(chi_cvr_new_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_cvr_new_recently))]

chi_cvr_new_yesterday=chi_DATA_yesterday_all("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",yesterday)
cvr_new2=[float(chi_cvr_new_yesterday[i]) for i in range(len(chi_cvr_new_yesterday))]

cvr_tst_new=data_cal(cvr_new,cvr_new2)
chi_cal(cvr_tst_new)
#老用户美购转化数据
print("【5】老用户CT-CVR数据波动假设检验结果：")
chi_ctcvr_old_recently=chi_DATA_recently_all("diary_meigou_oldUser","diary_exp_oldUser","diary_meigou_crv",five_days,yesterday)
ctcvr_old=[float(str(Decimal(chi_ctcvr_old_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctcvr_old_recently))]

chi_ctcvr_old_yesterday=chi_DATA_yesterday_all("diary_meigou_oldUser","diary_exp_oldUser","diary_meigou_crv",yesterday)
ctcvr_old2=[float(chi_ctcvr_old_yesterday[i]) for i in range(len(chi_ctcvr_old_yesterday))]

ctcvr_tst_old=data_cal(ctcvr_old,ctcvr_old2)
chi_cal(ctcvr_tst_old)
#老用户美购转化数据
print("【6】新用户CT-CVR数据波动假设检验结果：")
chi_ctcvr_new_recently=chi_DATA_recently_all("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",five_days,yesterday)
ctcvr_new=[float(str(Decimal(chi_ctcvr_new_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctcvr_new_recently))]

chi_ctcvr_new_yesterday=chi_DATA_yesterday_all("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",yesterday)
ctcvr_new2=[float(chi_ctcvr_new_yesterday[i]) for i in range(len(chi_ctcvr_new_yesterday))]

ctcvr_tst_new=data_cal(ctcvr_new,ctcvr_new2)
chi_cal(ctcvr_tst_new)

# ###############数据波动大小检验##############
print("===============分割线,开始检测各个指标的5日内的方差和均值==================")

def get_var_data1(x,y,z,t1):
    sql_cid = "select {0}/{1} from {2} \
    where stat_date >= '{3}' ".format(x,y,z,t1)
    CVR_DATA_recently = con_sql(sql_cid)
    return CVR_DATA_recently

def get_var_data2(x,y,z,q,t1):
    sql_cid = "select ({0}+{1})/{2} from {3} \
    where stat_date >= '{4}' ".format(x,y,z,q,t1)
    CVR_DATA_recently = con_sql(sql_cid)
    return CVR_DATA_recently

def collect_data(data):
    tt = [float(data[i][0])*100 for i in range(len(data))]
    return tt

var_ctcvr_old_data=get_var_data1("diary_meigou_oldUser","diary_exp_oldUser","diary_meigou_crv",five_days)
var_ctcvr_old_D=collect_data(var_ctcvr_old_data)
var_ctcvr_old=np.var(var_ctcvr_old_D)
mean_var_ctcvr_old=np.mean(var_ctcvr_old_D)
print("【1-1】老用户CT-CVR数据波动5日内方差检验结果：{}".format(var_ctcvr_old))
print("【1-2】老用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_old))
print("\n")


var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",five_days)
var_ctcvr_new_D=collect_data(var_ctcvr_new_data)
var_ctcvr_new=np.var(var_ctcvr_new_D)
mean_var_ctcvr_new=np.mean(var_ctcvr_new_D)
print("【2-1】新用户CT-CVR数据波动5日内方差检验结果：{}".format(var_ctcvr_new))
print("【2-2】新用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_new))
print("\n")

var_cvr_old_data=get_var_data1("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days)
var_cvr_old_D=collect_data(var_cvr_old_data)
var_cvr_old=np.var(var_cvr_old_D)
mean_var_cvr_old=np.mean(var_cvr_old_D)
print("【3-1】老用户CVR数据波动5日内方差检验结果：{}".format(var_cvr_old))
print("【3-2】老用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_old))
print("\n")

#
var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days)
var_cvr_new_D=collect_data(var_cvr_new_data)
var_cvr_new=np.var(var_cvr_new_D)
mean_var_cvr_new=np.mean(var_cvr_new_D)
print("【4-1】新用户CVR数据波动5日内方差检验结果：{}".format(var_cvr_new))
print("【4-2】新用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_new))
print("\n")

var_ctr_old_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","on_click_diary_card",five_days)
var_ctr_old_D=collect_data(var_ctr_old_data)
var_ctr_old=np.var(var_cvr_old_D)
mean_var_ctr_old=np.mean(var_ctr_old_D)
print("【5-1】老用户CTR数据波动5日内方差检验结果：{}".format(var_ctr_old))
print("【5-2】老用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_old))
print("\n")

var_ctr_new_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","on_click_diary_card",five_days)
var_ctr_new_D=collect_data(var_ctr_new_data)
var_ctr_new=np.var(var_ctr_new_D)
mean_var_ctr_new=np.mean(var_ctr_new_D)
print("【6-1】新用户CTR数据波动5日内方差检验结果：{}".format(var_ctr_new))
print("【6-2】新用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_new))
print("\n")

var_ctr_new_precise_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days)
var_ctr_new_precise_D=collect_data(var_ctr_new_precise_data)
var_ctr_new_precise=np.var(var_ctr_new_precise_D)
mean_var_ctr_new_precise=np.mean(var_ctr_new_precise_D)
print("【7-1】新用户精准曝光CTR数据波动5日内方差检验结果：{}".format(var_ctr_new_precise))
print("【7-2】新用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_new_precise))
print("\n")

var_ctr_old_precise_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days)
var_ctr_old_precise_D=collect_data(var_ctr_old_precise_data)
var_ctr_old_precise=np.var(var_ctr_old_precise_D)
mean_var_ctr_old_precise=np.mean(var_ctr_old_precise_D)
print("【8-1】老用户精准曝光CTR数据波动5日内方差检验结果：{}".format(var_ctr_old_precise))
print("【8-2】老用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_old_precise))
print("\n")


# print("============================分割线===================================")
#根据新老用户进行区分
# print("============================新用户各指标假设检验结果分析===================================")
# #新用户cvr假设检验
# print("【1】新用户CVR假设检验结果：")
# crv_new_ttest1=t_test(x_crv_new,y_crv_new)
# #新用户ct_cvr假设检验
# print("【3】新用户CT-CVR假设检验结果：")
# ctcrv_new_ttest1=t_test(x_ctcrv_new,y_ctcrv_new)
# #新用户ctr假设检验
# print("【5】新用户CTR假设检验结果：")
# ctr_new_ttest1=t_test(x_ctr_new,y_ctr_new)
# #新用户ctr(on_click_diary_card)假设检验
# print("【7】新用户CTR假设检验(日记本列表ctr)（on_click_diary_card）结果：")
# ctr_new_o_ttest1=t_test(x_ctr_new_o,y_ctr_new_o)
#
#
#
#
#
# print("============================老用户各指标假设检验结果分析===================================")
# #老用户cvr假设检验
# print("【2】老用户CVR假设检验结果：")
# crv_old_ttest1=t_test(x_crv_old,y_crv_old)
# # #老用户ct_cvr假设检验
# print("【4】老用户CT-CVR假设检验结果：")
# ctcrv_old_ttest1=t_test(x_ctcrv_old,y_ctcrv_old)
# #老用户ctr假设检验
# print("【6】老用户CTR假设检验结果：")
# ctr_old_ttest1=t_test(x_ctr_old,y_ctr_old)
# #老用户ctr(on_click_diary_card)假设检验
# print("【8】老用户CTR假设检验(日记本列表ctr)（on_click_diary_card）结果：")
# ctr_old_o_ttest1=t_test(x_ctr_old_o,y_ctr_old_o)


##发送邮件

# my_sender='gaoyazhe@igengmei.com'
# my_pass = 'VCrKTui99a7ALhiK'
# my_user1='wangzhiwei@igengmei.com'
# def mail():
#     ret = True
#     try:
#         text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com"
#         msg = MIMEText(text, 'plain', 'utf-8')
#         msg['From'] = formataddr(["王志伟", my_sender])
#         msg['To'] = my_user1
#         msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计"
#         server = smtplib.SMTP_SSL("smtp.exmail.qq.com", 465)
#         server.login(my_sender, my_pass)
#         server.sendmail(my_sender, [my_user1], msg.as_string())
#         server.quit()
#     except Exception:
#         ret=False
#     return ret
#
# ret=mail()
# if ret:
#     print("邮件发送成功")
# else:
#     print("邮件发送失败")

# chi_cvr_new=
# chi_cvr_old=
#
# chi_ctcvr_new=
# chi_ctcvr_old=
#
#
#
# def chi_cal(data):