Commit fbbd84f7 authored by 王志伟's avatar 王志伟

数据指标波动假设检验统计

parent 5cf5f5de
...@@ -10,6 +10,8 @@ import smtplib ...@@ -10,6 +10,8 @@ import smtplib
from email.mime.text import MIMEText from email.mime.text import MIMEText
from email.utils import formataddr from email.utils import formataddr
f=open('/srv/apps/ffm-baseline/eda/recommended_indexs/test.txt','w')
#########推荐策略前后统计指标假设检验(t检验)############### #########推荐策略前后统计指标假设检验(t检验)###############
#自动获取昨日日期 #自动获取昨日日期
...@@ -23,7 +25,7 @@ def get_yesterday_date(): ...@@ -23,7 +25,7 @@ def get_yesterday_date():
yesterday = yesterday.strftime("%Y-%m-%d") yesterday = yesterday.strftime("%Y-%m-%d")
return yesterday return yesterday
yesterday=get_yesterday_date() yesterday=get_yesterday_date()
print(yesterday) print("监测数据日期:{}".format(yesterday))
#自动获取10日前的日期 #自动获取10日前的日期
def get_somedate(): def get_somedate():
#自动获取10日前的日期,如"2018-07-28" #自动获取10日前的日期,如"2018-07-28"
...@@ -119,6 +121,7 @@ def t_test(x,y): #进行t检验 ...@@ -119,6 +121,7 @@ def t_test(x,y): #进行t检验
print("\n") print("\n")
else: else:
print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value)) print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value))
print("\n")
else: #认为数据方差不具有齐性,equal_var=false else: #认为数据方差不具有齐性,equal_var=false
t_test = ttest_ind(x, y, equal_var=False) t_test = ttest_ind(x, y, equal_var=False)
t_p_value = t_test[1] t_p_value = t_test[1]
...@@ -128,6 +131,7 @@ def t_test(x,y): #进行t检验 ...@@ -128,6 +131,7 @@ def t_test(x,y): #进行t检验
print("\n") print("\n")
else: else:
print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value)) print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value))
print("\n")
# #
# ###假设检验,判断是否具有显著性 # ###假设检验,判断是否具有显著性
# #
...@@ -233,8 +237,10 @@ def chi_cal(data): ...@@ -233,8 +237,10 @@ def chi_cal(data):
#查表发现阈值为3.84 #查表发现阈值为3.84
if X>3.84: if X>3.84:
print("数据波动较大,超出正常波动范围,95%可能性属于指标显著变化,请关注") print("数据波动较大,超出正常波动范围,95%可能性属于指标显著变化,请关注")
print("\n")
else: else:
print("数据波动较小,95%可能性属于正常波动范围") print("数据波动较小,95%可能性属于正常波动范围")
print("\n")
#老用户精准点击曝光数据(首页精选日记本列表on_click_diary_card) #老用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
print("【1】(精准曝光)首页精选日记本列表老用户ctr数据波动假设检验结果:") print("【1】(精准曝光)首页精选日记本列表老用户ctr数据波动假设检验结果:")
...@@ -324,6 +330,7 @@ var_ctcvr_old=np.var(var_ctcvr_old_D) ...@@ -324,6 +330,7 @@ var_ctcvr_old=np.var(var_ctcvr_old_D)
mean_var_ctcvr_old=np.mean(var_ctcvr_old_D) mean_var_ctcvr_old=np.mean(var_ctcvr_old_D)
print("【1】老用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_old)) print("【1】老用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_old))
print("老用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_old)) print("老用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_old))
print("\n")
var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",five_days) var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","diary_meigou_crv",five_days)
...@@ -332,6 +339,7 @@ var_ctcvr_new=np.var(var_ctcvr_new_D) ...@@ -332,6 +339,7 @@ var_ctcvr_new=np.var(var_ctcvr_new_D)
mean_var_ctcvr_new=np.mean(var_ctcvr_new_D) mean_var_ctcvr_new=np.mean(var_ctcvr_new_D)
print("【2】新用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_new)) print("【2】新用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_new))
print("新用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_new)) print("新用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_new))
print("\n")
var_cvr_old_data=get_var_data1("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days) var_cvr_old_data=get_var_data1("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days)
var_cvr_old_D=collect_data(var_cvr_old_data) var_cvr_old_D=collect_data(var_cvr_old_data)
...@@ -339,6 +347,7 @@ var_cvr_old=np.var(var_cvr_old_D) ...@@ -339,6 +347,7 @@ var_cvr_old=np.var(var_cvr_old_D)
mean_var_cvr_old=np.mean(var_cvr_old_D) mean_var_cvr_old=np.mean(var_cvr_old_D)
print("【3】老用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_old)) print("【3】老用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_old))
print("老用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_old)) print("老用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_old))
print("\n")
# #
var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days) var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary_meigou_crv",five_days)
...@@ -347,6 +356,7 @@ var_cvr_new=np.var(var_cvr_new_D) ...@@ -347,6 +356,7 @@ var_cvr_new=np.var(var_cvr_new_D)
mean_var_cvr_new=np.mean(var_cvr_new_D) mean_var_cvr_new=np.mean(var_cvr_new_D)
print("【4】新用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_new)) print("【4】新用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_new))
print("新用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_new)) print("新用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_new))
print("\n")
var_ctr_old_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","on_click_diary_card",five_days) var_ctr_old_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","on_click_diary_card",five_days)
var_ctr_old_D=collect_data(var_ctr_old_data) var_ctr_old_D=collect_data(var_ctr_old_data)
...@@ -354,6 +364,7 @@ var_ctr_old=np.var(var_cvr_old_D) ...@@ -354,6 +364,7 @@ var_ctr_old=np.var(var_cvr_old_D)
mean_var_ctr_old=np.mean(var_ctr_old_D) mean_var_ctr_old=np.mean(var_ctr_old_D)
print("【5】老用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_old)) print("【5】老用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_old))
print("老用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_old)) print("老用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_old))
print("\n")
var_ctr_new_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","on_click_diary_card",five_days) var_ctr_new_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","on_click_diary_card",five_days)
var_ctr_new_D=collect_data(var_ctr_new_data) var_ctr_new_D=collect_data(var_ctr_new_data)
...@@ -361,6 +372,7 @@ var_ctr_new=np.var(var_ctr_new_D) ...@@ -361,6 +372,7 @@ var_ctr_new=np.var(var_ctr_new_D)
mean_var_ctr_new=np.mean(var_ctr_new_D) mean_var_ctr_new=np.mean(var_ctr_new_D)
print("【6】新用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_new)) print("【6】新用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_new))
print("新用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_new)) print("新用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_new))
print("\n")
var_ctr_new_precise_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days) var_ctr_new_precise_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days)
var_ctr_new_precise_D=collect_data(var_ctr_new_precise_data) var_ctr_new_precise_D=collect_data(var_ctr_new_precise_data)
...@@ -368,6 +380,7 @@ var_ctr_new_precise=np.var(var_ctr_new_precise_D) ...@@ -368,6 +380,7 @@ var_ctr_new_precise=np.var(var_ctr_new_precise_D)
mean_var_ctr_new_precise=np.mean(var_ctr_new_precise_D) mean_var_ctr_new_precise=np.mean(var_ctr_new_precise_D)
print("【7】新用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_new_precise)) print("【7】新用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_new_precise))
print("新用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_new_precise)) print("新用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_new_precise))
print("\n")
var_ctr_old_precise_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days) var_ctr_old_precise_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days)
var_ctr_old_precise_D=collect_data(var_ctr_old_precise_data) var_ctr_old_precise_D=collect_data(var_ctr_old_precise_data)
...@@ -375,38 +388,44 @@ var_ctr_old_precise=np.var(var_ctr_old_precise_D) ...@@ -375,38 +388,44 @@ var_ctr_old_precise=np.var(var_ctr_old_precise_D)
mean_var_ctr_old_precise=np.mean(var_ctr_old_precise_D) mean_var_ctr_old_precise=np.mean(var_ctr_old_precise_D)
print("【8】老用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_old_precise)) print("【8】老用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_old_precise))
print("老用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_old_precise)) print("老用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_old_precise))
print("\n")
# print(chi_ctr_precise_recently) # print(chi_ctr_precise_recently)
# print(chi_ctr_precise_yesterday) # print(chi_ctr_precise_yesterday)
print("============================分割线===================================") print("============================分割线===================================")
#保存文件
##发送邮件 ##发送邮件
my_sender='gaoyazhe@igengmei.com' # my_sender='gaoyazhe@igengmei.com'
my_pass = 'VCrKTui99a7ALhiK' # my_pass = 'VCrKTui99a7ALhiK'
my_user1='wangzhiwei@igengmei.com' # my_user1='wangzhiwei@igengmei.com'
def mail(): # def mail():
ret = True # ret = True
try: # try:
text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com" # text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com"
msg = MIMEText(text, 'plain', 'utf-8') # msg = MIMEText(text, 'plain', 'utf-8')
msg['From'] = formataddr(["王志伟", my_sender]) # msg['From'] = formataddr(["王志伟", my_sender])
msg['To'] = my_user1 # msg['To'] = my_user1
msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计" # msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计"
server = smtplib.SMTP_SSL("smtp.exmail.qq.com", 465) # server = smtplib.SMTP_SSL("smtp.exmail.qq.com", 465)
server.login(my_sender, my_pass) # server.login(my_sender, my_pass)
server.sendmail(my_sender, [my_user1], msg.as_string()) # server.sendmail(my_sender, [my_user1], msg.as_string())
server.quit() # server.quit()
except Exception: # except Exception:
ret=False # ret=False
return ret # return ret
#
ret=mail() # ret=mail()
if ret: # if ret:
print("邮件发送成功") # print("邮件发送成功")
else: # else:
print("邮件发送失败") # print("邮件发送失败")
# chi_cvr_new= # chi_cvr_new=
# chi_cvr_old= # chi_cvr_old=
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment