Commit f26a48d9 authored by 王志伟's avatar 王志伟

数据指标波动假设检验统计

parent 69e6eda7
...@@ -181,24 +181,56 @@ def chi_DATA_yesterday(x,y,z,q,t1): ...@@ -181,24 +181,56 @@ def chi_DATA_yesterday(x,y,z,q,t1):
CVR_DATA_yesterday = con_sql(sql_cid)[0] CVR_DATA_yesterday = con_sql(sql_cid)[0]
return CVR_DATA_yesterday return CVR_DATA_yesterday
#整理数据
def data_cal(x,y):
x_a = [x[0], x[1] - x[0]]
y_a=[y[0], y[1] - y[0]]
a_df=pd.DataFrame({'原':x_a,'测':y_a})
return a_df
chi_ctr_precise_recently=chi_DATA_recently("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days,yesterday) chi_ctr_precise_recently=chi_DATA_recently("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",five_days,yesterday)
temp1=[float(str(Decimal(chi_ctr_precise_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctr_precise_recently))] temp1=[float(str(Decimal(chi_ctr_precise_recently[i]).quantize(Decimal('0.0')))) for i in range(len(chi_ctr_precise_recently))]
temp1_a=[temp1[0],temp1[1]-temp1[0]] # print(temp1)
print(temp1)
chi_ctr_precise_yesterday=chi_DATA_yesterday("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",yesterday) chi_ctr_precise_yesterday=chi_DATA_yesterday("clk_count_oldUser_all_a","clk_count_oldUser_all_b","imp_count_oldUser_all_precise","on_click_diary_card",yesterday)
temp2=[float(chi_ctr_precise_yesterday[i]) for i in range(len(chi_ctr_precise_yesterday))] temp2=[float(chi_ctr_precise_yesterday[i]) for i in range(len(chi_ctr_precise_yesterday))]
temp2_a=[temp2[0],temp2[1]-temp2[0]] # print(temp2)
print(temp2) tst=data_cal(temp1,temp2)
print(tst)
a3=pd.DataFrame({'原':temp1_a,'测':temp2_a})
a3['共计'] = a3.apply(lambda x: x.sum(), axis=1)
print(a3)
a3.loc['共计'] = a3.apply(lambda x: x.sum())
print(a3)
# print(chi_ctr_precise_recently) # print(chi_ctr_precise_recently)
# print(chi_ctr_precise_yesterday) # print(chi_ctr_precise_yesterday)
def chi_cal(data):
data['共计'] = data.apply(lambda x: x.sum(), axis=1)
# print(data)
data.loc['共计'] = data.apply(lambda x: x.sum())
# print(data)
t1=data.iloc[0]
t2=data.iloc[1]
t11_count=t1[0]
t12_count=t1[1]
t21_count=t2[0]
t22_count=t2[1]
###理论值计算
temp1=data.loc['共计']
rate1=temp1[0]/temp1[2]
rate2=temp1[1]/temp1[2]
temp2=data.iloc[2]
t11_theory=temp2[0]*rate1
t12_theory=temp2[1]*rate1
t21_theory = temp2[0] * rate2
t22_theory = temp2[1] * rate2
#计算卡方值
X=((t11_count-t11_theory)**2)/t11_theory+((t12_count-t12_theory)**2)/t12_theory+((t21_count-t21_theory)**2)/t21_theory+((t22_count-t22_theory)**2)/t22_theory
#计算自由度
v=(len(data)-1)*(data.columns.size-1)
#查表发现阈值为3.84
if X>3.84:
print("数据波动较大,超出正常波动范围,95%可能性属于指标显著变化,请关注")
else:
print("数据波动较小,95%可能性属于正常波动范围")
# chi_cvr_new= # chi_cvr_new=
# chi_cvr_old= # chi_cvr_old=
# #
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment