Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
f26a48d9
Commit
f26a48d9
authored
Feb 19, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据指标波动假设检验统计
parent
69e6eda7
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
41 additions
and
9 deletions
+41
-9
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+41
-9
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
f26a48d9
...
...
@@ -181,24 +181,56 @@ def chi_DATA_yesterday(x,y,z,q,t1):
CVR_DATA_yesterday
=
con_sql
(
sql_cid
)[
0
]
return
CVR_DATA_yesterday
#整理数据
def
data_cal
(
x
,
y
):
x_a
=
[
x
[
0
],
x
[
1
]
-
x
[
0
]]
y_a
=
[
y
[
0
],
y
[
1
]
-
y
[
0
]]
a_df
=
pd
.
DataFrame
({
'原'
:
x_a
,
'测'
:
y_a
})
return
a_df
chi_ctr_precise_recently
=
chi_DATA_recently
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
,
yesterday
)
temp1
=
[
float
(
str
(
Decimal
(
chi_ctr_precise_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctr_precise_recently
))]
temp1_a
=
[
temp1
[
0
],
temp1
[
1
]
-
temp1
[
0
]]
print
(
temp1
)
# print(temp1)
chi_ctr_precise_yesterday
=
chi_DATA_yesterday
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
yesterday
)
temp2
=
[
float
(
chi_ctr_precise_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctr_precise_yesterday
))]
temp2_a
=
[
temp2
[
0
],
temp2
[
1
]
-
temp2
[
0
]]
print
(
temp2
)
# print(temp2)
tst
=
data_cal
(
temp1
,
temp2
)
print
(
tst
)
a3
=
pd
.
DataFrame
({
'原'
:
temp1_a
,
'测'
:
temp2_a
})
a3
[
'共计'
]
=
a3
.
apply
(
lambda
x
:
x
.
sum
(),
axis
=
1
)
print
(
a3
)
a3
.
loc
[
'共计'
]
=
a3
.
apply
(
lambda
x
:
x
.
sum
())
print
(
a3
)
# print(chi_ctr_precise_recently)
# print(chi_ctr_precise_yesterday)
def
chi_cal
(
data
):
data
[
'共计'
]
=
data
.
apply
(
lambda
x
:
x
.
sum
(),
axis
=
1
)
# print(data)
data
.
loc
[
'共计'
]
=
data
.
apply
(
lambda
x
:
x
.
sum
())
# print(data)
t1
=
data
.
iloc
[
0
]
t2
=
data
.
iloc
[
1
]
t11_count
=
t1
[
0
]
t12_count
=
t1
[
1
]
t21_count
=
t2
[
0
]
t22_count
=
t2
[
1
]
###理论值计算
temp1
=
data
.
loc
[
'共计'
]
rate1
=
temp1
[
0
]
/
temp1
[
2
]
rate2
=
temp1
[
1
]
/
temp1
[
2
]
temp2
=
data
.
iloc
[
2
]
t11_theory
=
temp2
[
0
]
*
rate1
t12_theory
=
temp2
[
1
]
*
rate1
t21_theory
=
temp2
[
0
]
*
rate2
t22_theory
=
temp2
[
1
]
*
rate2
#计算卡方值
X
=
((
t11_count
-
t11_theory
)
**
2
)
/
t11_theory
+
((
t12_count
-
t12_theory
)
**
2
)
/
t12_theory
+
((
t21_count
-
t21_theory
)
**
2
)
/
t21_theory
+
((
t22_count
-
t22_theory
)
**
2
)
/
t22_theory
#计算自由度
v
=
(
len
(
data
)
-
1
)
*
(
data
.
columns
.
size
-
1
)
#查表发现阈值为3.84
if
X
>
3.84
:
print
(
"数据波动较大,超出正常波动范围,95
%
可能性属于指标显著变化,请关注"
)
else
:
print
(
"数据波动较小,95
%
可能性属于正常波动范围"
)
# chi_cvr_new=
# chi_cvr_old=
#
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment