Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
d18eca7d
Commit
d18eca7d
authored
Feb 20, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
数据指标波动假设检验统计
parent
c87edd7b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
6 additions
and
13 deletions
+6
-13
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+6
-13
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
d18eca7d
...
@@ -196,30 +196,19 @@ def chi_cal(data):
...
@@ -196,30 +196,19 @@ def chi_cal(data):
t1
=
data
.
iloc
[
0
]
t1
=
data
.
iloc
[
0
]
t2
=
data
.
iloc
[
1
]
t2
=
data
.
iloc
[
1
]
t11_count
=
t1
[
0
]
t11_count
=
t1
[
0
]
print
(
"t11:{}"
.
format
(
t11_count
))
t12_count
=
t1
[
1
]
t12_count
=
t1
[
1
]
print
(
"t12:{}"
.
format
(
t12_count
))
t21_count
=
t2
[
0
]
t21_count
=
t2
[
0
]
print
(
"t21:{}"
.
format
(
t21_count
))
t22_count
=
t2
[
1
]
t22_count
=
t2
[
1
]
print
(
"t22:{}"
.
format
(
t22_count
))
###理论值计算
###理论值计算
temp1
=
data
[
'共计'
]
temp1
=
data
[
'共计'
]
print
(
"共计:{}"
.
format
(
temp1
))
rate1
=
temp1
[
0
]
/
temp1
[
2
]
rate1
=
temp1
[
0
]
/
temp1
[
2
]
print
(
"rate1:{}"
.
format
(
rate1
))
rate2
=
temp1
[
1
]
/
temp1
[
2
]
rate2
=
temp1
[
1
]
/
temp1
[
2
]
print
(
"rate2:{}"
.
format
(
rate2
))
temp2
=
data
.
iloc
[
2
]
temp2
=
data
.
iloc
[
2
]
t11_theory
=
temp2
[
0
]
*
rate1
t11_theory
=
temp2
[
0
]
*
rate1
print
(
"t11_theory:{}"
.
format
(
t11_theory
))
t12_theory
=
temp2
[
1
]
*
rate1
t12_theory
=
temp2
[
1
]
*
rate1
print
(
"t12_theory:{}"
.
format
(
t12_theory
))
t21_theory
=
temp2
[
0
]
*
rate2
t21_theory
=
temp2
[
0
]
*
rate2
print
(
"t21_theory:{}"
.
format
(
t21_theory
))
t22_theory
=
temp2
[
1
]
*
rate2
t22_theory
=
temp2
[
1
]
*
rate2
print
(
"t22_theory:{}"
.
format
(
t22_theory
))
#计算卡方值
#计算卡方值
X
=
(((
t11_count
-
t11_theory
)
**
2
)
/
t11_theory
)
+
(((
t12_count
-
t12_theory
)
**
2
)
/
t12_theory
)
+
(((
t21_count
-
t21_theory
)
**
2
)
/
t21_theory
)
+
(((
t22_count
-
t22_theory
)
**
2
)
/
t22_theory
)
X
=
(((
t11_count
-
t11_theory
)
**
2
)
/
t11_theory
)
+
(((
t12_count
-
t12_theory
)
**
2
)
/
t12_theory
)
+
(((
t21_count
-
t21_theory
)
**
2
)
/
t21_theory
)
+
(((
t22_count
-
t22_theory
)
**
2
)
/
t22_theory
)
print
(
"卡方值为:{}"
.
format
(
X
))
print
(
"卡方值为:{}"
.
format
(
X
))
...
@@ -231,7 +220,8 @@ def chi_cal(data):
...
@@ -231,7 +220,8 @@ def chi_cal(data):
else
:
else
:
print
(
"数据波动较小,95
%
可能性属于正常波动范围"
)
print
(
"数据波动较小,95
%
可能性属于正常波动范围"
)
#精准点击曝光数据(首页精选日记本列表on_click_diary_card)
#老用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
print
(
"(精准曝光)首页精选日记本列表老用户ctr数据波动假设检验结果:"
)
chi_ctr_precise_recently
=
chi_DATA_recently
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
,
yesterday
)
chi_ctr_precise_recently
=
chi_DATA_recently
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
,
yesterday
)
temp1
=
[
float
(
str
(
Decimal
(
chi_ctr_precise_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctr_precise_recently
))]
temp1
=
[
float
(
str
(
Decimal
(
chi_ctr_precise_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctr_precise_recently
))]
# print(temp1)
# print(temp1)
...
@@ -239,9 +229,12 @@ chi_ctr_precise_yesterday=chi_DATA_yesterday("clk_count_oldUser_all_a","clk_coun
...
@@ -239,9 +229,12 @@ chi_ctr_precise_yesterday=chi_DATA_yesterday("clk_count_oldUser_all_a","clk_coun
temp2
=
[
float
(
chi_ctr_precise_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctr_precise_yesterday
))]
temp2
=
[
float
(
chi_ctr_precise_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctr_precise_yesterday
))]
# print(temp2)
# print(temp2)
tst
=
data_cal
(
temp1
,
temp2
)
tst
=
data_cal
(
temp1
,
temp2
)
print
(
tst
)
chi_cal
(
tst
)
chi_cal
(
tst
)
#新用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
print
(
"(精准曝光)首页精选日记本列表新用户ctr数据波动假设检验结果:"
)
# print(chi_ctr_precise_recently)
# print(chi_ctr_precise_recently)
# print(chi_ctr_precise_yesterday)
# print(chi_ctr_precise_yesterday)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment