Commit ebd1d550 authored by 王志伟's avatar 王志伟

数据指标波动假设检验统计

parent fbbd84f7
...@@ -38,7 +38,7 @@ def get_somedate(): ...@@ -38,7 +38,7 @@ def get_somedate():
return someday return someday
ten_days=get_somedate() ten_days=get_somedate()
# print(ten_days) # print(ten_days)
print("===========分割线,T检验最近10日指标与策略前10日指标是否获得显著提升============")
#获取最近10天的数据 #获取最近10天的数据
def DATA_recently(x,y,z,q,t): def DATA_recently(x,y,z,q,t):
ten_days = get_somedate() ten_days = get_somedate()
...@@ -117,20 +117,20 @@ def t_test(x,y): #进行t检验 ...@@ -117,20 +117,20 @@ def t_test(x,y): #进行t检验
t_p_value=t_test[1] t_p_value=t_test[1]
# print(t_p_value) # print(t_p_value)
if t_p_value>0.05: if t_p_value>0.05:
print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}" .format(t_p_value)) print("95%置信度认为策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}" .format(t_p_value))
print("\n") print("\n")
else: else:
print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value)) print("95%置信度认为策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value))
print("\n") print("\n")
else: #认为数据方差不具有齐性,equal_var=false else: #认为数据方差不具有齐性,equal_var=false
t_test = ttest_ind(x, y, equal_var=False) t_test = ttest_ind(x, y, equal_var=False)
t_p_value = t_test[1] t_p_value = t_test[1]
# print(t_p_value) # print(t_p_value)
if t_p_value > 0.05: if t_p_value > 0.05:
print("策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}" .format(t_p_value)) print("95%置信度认为策略前后两组数据无显著性差异,即该指标没有显著提升,p_value:{}" .format(t_p_value))
print("\n") print("\n")
else: else:
print("策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value)) print("95%置信度认为策略前后两组数据有显著性差异,即该指标获得显著提升,p_value:{}" .format(t_p_value))
print("\n") print("\n")
# #
# ###假设检验,判断是否具有显著性 # ###假设检验,判断是否具有显著性
...@@ -165,7 +165,7 @@ print("【8】老用户CTR假设检验(on_click_diary_card)结果:") ...@@ -165,7 +165,7 @@ print("【8】老用户CTR假设检验(on_click_diary_card)结果:")
ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o) ctr_old_o_ttest=t_test(x_ctr_old_o,y_ctr_old_o)
# #
# ###############推荐策略不变的情况下数据假设检验############## # ###############推荐策略不变的情况下数据假设检验##############
print("============================分割线===================================") print("===========分割线,卡方检验昨日指标与前5日指标均值是否显著变化============")
# #1 计算每日指标卡方检验 # #1 计算每日指标卡方检验
# #
# #自动获取5日前的日期 # #自动获取5日前的日期
...@@ -306,7 +306,7 @@ ctcvr_tst_new=data_cal(ctcvr_new,ctcvr_new2) ...@@ -306,7 +306,7 @@ ctcvr_tst_new=data_cal(ctcvr_new,ctcvr_new2)
chi_cal(ctcvr_tst_new) chi_cal(ctcvr_tst_new)
# ###############数据波动大小检验############## # ###############数据波动大小检验##############
print("============================分割线===================================") print("============================分割线,开始检测各个指标的5日内的方差和均值===================================")
def get_var_data1(x,y,z,t1): def get_var_data1(x,y,z,t1):
sql_cid = "select {0}/{1} from {2} \ sql_cid = "select {0}/{1} from {2} \
...@@ -328,8 +328,8 @@ var_ctcvr_old_data=get_var_data1("diary_meigou_oldUser","diary_exp_oldUser","dia ...@@ -328,8 +328,8 @@ var_ctcvr_old_data=get_var_data1("diary_meigou_oldUser","diary_exp_oldUser","dia
var_ctcvr_old_D=collect_data(var_ctcvr_old_data) var_ctcvr_old_D=collect_data(var_ctcvr_old_data)
var_ctcvr_old=np.var(var_ctcvr_old_D) var_ctcvr_old=np.var(var_ctcvr_old_D)
mean_var_ctcvr_old=np.mean(var_ctcvr_old_D) mean_var_ctcvr_old=np.mean(var_ctcvr_old_D)
print("【1】老用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_old)) print("【1-1】老用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_old))
print("老用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_old)) print("【1-2】老用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_old))
print("\n") print("\n")
...@@ -337,16 +337,16 @@ var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","dia ...@@ -337,16 +337,16 @@ var_ctcvr_new_data=get_var_data1("diary_meigou_newUser","diary_exp_newUser","dia
var_ctcvr_new_D=collect_data(var_ctcvr_new_data) var_ctcvr_new_D=collect_data(var_ctcvr_new_data)
var_ctcvr_new=np.var(var_ctcvr_new_D) var_ctcvr_new=np.var(var_ctcvr_new_D)
mean_var_ctcvr_new=np.mean(var_ctcvr_new_D) mean_var_ctcvr_new=np.mean(var_ctcvr_new_D)
print("【2】新用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_new)) print("【2-1】新用户CT-CVR数据波动5日内方差检验结果:{}".format(var_ctcvr_new))
print("新用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_new)) print("【2-2】新用户CT-CVR数据波动5日内均值:{}%".format(mean_var_ctcvr_new))
print("\n") print("\n")
var_cvr_old_data=get_var_data1("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days) var_cvr_old_data=get_var_data1("diary_meigou_oldUser","diary_clk_oldUser","diary_meigou_crv",five_days)
var_cvr_old_D=collect_data(var_cvr_old_data) var_cvr_old_D=collect_data(var_cvr_old_data)
var_cvr_old=np.var(var_cvr_old_D) var_cvr_old=np.var(var_cvr_old_D)
mean_var_cvr_old=np.mean(var_cvr_old_D) mean_var_cvr_old=np.mean(var_cvr_old_D)
print("【3】老用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_old)) print("【3-1】老用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_old))
print("老用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_old)) print("【3-2】老用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_old))
print("\n") print("\n")
# #
...@@ -354,40 +354,40 @@ var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary ...@@ -354,40 +354,40 @@ var_cvr_new_data=get_var_data1("diary_meigou_newUser","diary_clk_newUser","diary
var_cvr_new_D=collect_data(var_cvr_new_data) var_cvr_new_D=collect_data(var_cvr_new_data)
var_cvr_new=np.var(var_cvr_new_D) var_cvr_new=np.var(var_cvr_new_D)
mean_var_cvr_new=np.mean(var_cvr_new_D) mean_var_cvr_new=np.mean(var_cvr_new_D)
print("【4】新用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_new)) print("【4-1】新用户CVR数据波动5日内方差检验结果:{}".format(var_cvr_new))
print("新用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_new)) print("【4-2】新用户CVR数据波动5日内均值:{}%".format(mean_var_cvr_new))
print("\n") print("\n")
var_ctr_old_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","on_click_diary_card",five_days) var_ctr_old_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all","on_click_diary_card",five_days)
var_ctr_old_D=collect_data(var_ctr_old_data) var_ctr_old_D=collect_data(var_ctr_old_data)
var_ctr_old=np.var(var_cvr_old_D) var_ctr_old=np.var(var_cvr_old_D)
mean_var_ctr_old=np.mean(var_ctr_old_D) mean_var_ctr_old=np.mean(var_ctr_old_D)
print("【5】老用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_old)) print("【5-1】老用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_old))
print("老用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_old)) print("【5-2】老用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_old))
print("\n") print("\n")
var_ctr_new_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","on_click_diary_card",five_days) var_ctr_new_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all","on_click_diary_card",five_days)
var_ctr_new_D=collect_data(var_ctr_new_data) var_ctr_new_D=collect_data(var_ctr_new_data)
var_ctr_new=np.var(var_ctr_new_D) var_ctr_new=np.var(var_ctr_new_D)
mean_var_ctr_new=np.mean(var_ctr_new_D) mean_var_ctr_new=np.mean(var_ctr_new_D)
print("【6】新用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_new)) print("【6-1】新用户CTR数据波动5日内方差检验结果:{}".format(var_ctr_new))
print("新用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_new)) print("【6-2】新用户CTR数据波动5日内均值:{}%".format(mean_var_ctr_new))
print("\n") print("\n")
var_ctr_new_precise_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days) var_ctr_new_precise_data=get_var_data2("clk_count_newUser_all_a","clk_count_newUser_all_b","imp_count_newUser_all_precise","on_click_diary_card",five_days)
var_ctr_new_precise_D=collect_data(var_ctr_new_precise_data) var_ctr_new_precise_D=collect_data(var_ctr_new_precise_data)
var_ctr_new_precise=np.var(var_ctr_new_precise_D) var_ctr_new_precise=np.var(var_ctr_new_precise_D)
mean_var_ctr_new_precise=np.mean(var_ctr_new_precise_D) mean_var_ctr_new_precise=np.mean(var_ctr_new_precise_D)
print("【7】新用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_new_precise)) print("【7-1】新用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_new_precise))
print("新用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_new_precise)) print("【7-2】新用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_new_precise))
print("\n") print("\n")
var_ctr_old_precise_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days) var_ctr_old_precise_data=get_var_data2("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days)
var_ctr_old_precise_D=collect_data(var_ctr_old_precise_data) var_ctr_old_precise_D=collect_data(var_ctr_old_precise_data)
var_ctr_old_precise=np.var(var_ctr_old_precise_D) var_ctr_old_precise=np.var(var_ctr_old_precise_D)
mean_var_ctr_old_precise=np.mean(var_ctr_old_precise_D) mean_var_ctr_old_precise=np.mean(var_ctr_old_precise_D)
print("【8】老用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_old_precise)) print("【8-1】老用户精准曝光CTR数据波动5日内方差检验结果:{}".format(var_ctr_old_precise))
print("老用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_old_precise)) print("【8-2】老用户精准曝光CTR数据波动5日内均值:{}%".format(mean_var_ctr_old_precise))
print("\n") print("\n")
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment