Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
3591a3f2
Commit
3591a3f2
authored
Feb 21, 2019
by
王志伟
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
send_email
parent
883da871
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
108 additions
and
48 deletions
+108
-48
hypothesis_test.py
eda/recommended_indexs/hypothesis_test.py
+47
-22
send_email.py
eda/recommended_indexs/send_email.py
+61
-26
No files found.
eda/recommended_indexs/hypothesis_test.py
View file @
3591a3f2
...
...
@@ -36,7 +36,6 @@ def get_somedate():
someday
=
someday
.
strftime
(
"
%
Y-
%
m-
%
d"
)
return
someday
ten_days
=
get_somedate
()
# print(ten_days)
print
(
"===========分割线,T检验最近10日指标与策略前10日指标是否获得显著提升============"
)
#获取最近10天的数据
def
DATA_recently
(
x
,
y
,
z
,
q
,
t
):
...
...
@@ -138,7 +137,6 @@ def t_test(x,y): #进行t检验
else
:
#认为数据方差不具有齐性,equal_var=false
t_test
=
ttest_ind
(
x
,
y
,
equal_var
=
False
)
t_p_value
=
t_test
[
1
]
# print(t_p_value)
if
t_p_value
>
0.05
:
print
(
"95
%
置信度认为策略前后两组数据【无显著性差异】,即该指标没有显著提升,p_value:{}"
.
format
(
t_p_value
))
print
(
"
\n
"
)
...
...
@@ -224,7 +222,6 @@ def data_cal(x,y):
def
chi_cal
(
data
):
data
[
'共计'
]
=
data
.
apply
(
lambda
x
:
x
.
sum
(),
axis
=
1
)
# print(data)
data
.
loc
[
'共计'
]
=
data
.
apply
(
lambda
x
:
x
.
sum
())
t1
=
data
.
iloc
[
0
]
t2
=
data
.
iloc
[
1
]
...
...
@@ -249,20 +246,20 @@ def chi_cal(data):
v
=
(
len
(
data
)
-
1
)
*
(
data
.
columns
.
size
-
1
)
#查表发现阈值为3.84
if
X
>
3.84
:
print
(
"数据波动较大,超出正常波动范围,95
%
可能性属于指标
显著变化,请关注
"
)
print
(
"数据波动较大,超出正常波动范围,95
%
可能性属于指标
【显著变化,请关注】
"
)
print
(
"
\n
"
)
else
:
print
(
"数据波动较小,95
%
可能性属于
正常波动
范围"
)
print
(
"数据波动较小,95
%
可能性属于
【正常波动】
范围"
)
print
(
"
\n
"
)
#老用户精准点击曝光数据(首页精选日记本列表on_click_diary_card)
print
(
"【1】(精准曝光)首页精选日记本列表老用户CTR数据波动假设检验结果:"
)
chi_ctr_precise_old_recently
=
chi_DATA_recently
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
five_days
,
yesterday
)
temp1_old
=
[
float
(
str
(
Decimal
(
chi_ctr_precise_old_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctr_precise_old_recently
))]
# print(temp1)
chi_ctr_precise_old_yesterday
=
chi_DATA_yesterday
(
"clk_count_oldUser_all_a"
,
"clk_count_oldUser_all_b"
,
"imp_count_oldUser_all_precise"
,
"on_click_diary_card"
,
yesterday
)
temp2_old
=
[
float
(
chi_ctr_precise_old_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctr_precise_old_yesterday
))]
# print(temp2)
ctr_tst_old
=
data_cal
(
temp1_old
,
temp2_old
)
chi_cal
(
ctr_tst_old
)
...
...
@@ -270,10 +267,10 @@ chi_cal(ctr_tst_old)
print
(
"【2】(精准曝光)首页精选日记本列表新用户CTR数据波动假设检验结果:"
)
chi_ctr_precise_new_recently
=
chi_DATA_recently
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all_precise"
,
"on_click_diary_card"
,
five_days
,
yesterday
)
temp1_new
=
[
float
(
str
(
Decimal
(
chi_ctr_precise_new_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctr_precise_new_recently
))]
# print(temp1)
chi_ctr_precise_new_yesterday
=
chi_DATA_yesterday
(
"clk_count_newUser_all_a"
,
"clk_count_newUser_all_b"
,
"imp_count_newUser_all_precise"
,
"on_click_diary_card"
,
yesterday
)
temp2_new
=
[
float
(
chi_ctr_precise_new_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctr_precise_new_yesterday
))]
# print(temp2)
ctr_tst_new
=
data_cal
(
temp1_new
,
temp2_new
)
chi_cal
(
ctr_tst_new
)
...
...
@@ -281,40 +278,40 @@ chi_cal(ctr_tst_new)
print
(
"【3】老用户CVR数据波动假设检验结果:"
)
chi_cvr_old_recently
=
chi_DATA_recently_all
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
five_days
,
yesterday
)
cvr_old
=
[
float
(
str
(
Decimal
(
chi_cvr_old_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_cvr_old_recently
))]
# print(temp1)
chi_cvr_old_yesterday
=
chi_DATA_yesterday_all
(
"diary_meigou_oldUser"
,
"diary_clk_oldUser"
,
"diary_meigou_crv"
,
yesterday
)
cvr_old2
=
[
float
(
chi_cvr_old_yesterday
[
i
])
for
i
in
range
(
len
(
chi_cvr_old_yesterday
))]
# print(temp2)
cvr_tst_old
=
data_cal
(
cvr_old
,
cvr_old2
)
chi_cal
(
cvr_tst_old
)
#老用户美购转化数据
print
(
"【4】新用户CVR数据波动假设检验结果:"
)
chi_cvr_new_recently
=
chi_DATA_recently_all
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"diary_meigou_crv"
,
five_days
,
yesterday
)
cvr_new
=
[
float
(
str
(
Decimal
(
chi_cvr_new_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_cvr_new_recently
))]
# print(temp1)
chi_cvr_new_yesterday
=
chi_DATA_yesterday_all
(
"diary_meigou_newUser"
,
"diary_clk_newUser"
,
"diary_meigou_crv"
,
yesterday
)
cvr_new2
=
[
float
(
chi_cvr_new_yesterday
[
i
])
for
i
in
range
(
len
(
chi_cvr_new_yesterday
))]
# print(temp2)
cvr_tst_new
=
data_cal
(
cvr_new
,
cvr_new2
)
chi_cal
(
cvr_tst_new
)
#老用户美购转化数据
print
(
"【5】老用户CT-CVR数据波动假设检验结果:"
)
chi_ctcvr_old_recently
=
chi_DATA_recently_all
(
"diary_meigou_oldUser"
,
"diary_exp_oldUser"
,
"diary_meigou_crv"
,
five_days
,
yesterday
)
ctcvr_old
=
[
float
(
str
(
Decimal
(
chi_ctcvr_old_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctcvr_old_recently
))]
# print(temp1)
chi_ctcvr_old_yesterday
=
chi_DATA_yesterday_all
(
"diary_meigou_oldUser"
,
"diary_exp_oldUser"
,
"diary_meigou_crv"
,
yesterday
)
ctcvr_old2
=
[
float
(
chi_ctcvr_old_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctcvr_old_yesterday
))]
# print(temp2)
ctcvr_tst_old
=
data_cal
(
ctcvr_old
,
ctcvr_old2
)
chi_cal
(
ctcvr_tst_old
)
#老用户美购转化数据
print
(
"【6】新用户CT-CVR数据波动假设检验结果:"
)
chi_ctcvr_new_recently
=
chi_DATA_recently_all
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"diary_meigou_crv"
,
five_days
,
yesterday
)
ctcvr_new
=
[
float
(
str
(
Decimal
(
chi_ctcvr_new_recently
[
i
])
.
quantize
(
Decimal
(
'0.0'
))))
for
i
in
range
(
len
(
chi_ctcvr_new_recently
))]
# print(temp1)
chi_ctcvr_new_yesterday
=
chi_DATA_yesterday_all
(
"diary_meigou_newUser"
,
"diary_exp_newUser"
,
"diary_meigou_crv"
,
yesterday
)
ctcvr_new2
=
[
float
(
chi_ctcvr_new_yesterday
[
i
])
for
i
in
range
(
len
(
chi_ctcvr_new_yesterday
))]
# print(temp2)
ctcvr_tst_new
=
data_cal
(
ctcvr_new
,
ctcvr_new2
)
chi_cal
(
ctcvr_tst_new
)
...
...
@@ -404,11 +401,39 @@ print("【8-2】老用户精准曝光CTR数据波动5日内均值:{}%".format(me
print
(
"
\n
"
)
# print(chi_ctr_precise_recently)
# print(chi_ctr_precise_yesterday)
print
(
"============================分割线==================================="
)
#保存文件
# print("============================分割线===================================")
#根据新老用户进行区分
# print("============================新用户各指标假设检验结果分析===================================")
# #新用户cvr假设检验
# print("【1】新用户CVR假设检验结果:")
# crv_new_ttest1=t_test(x_crv_new,y_crv_new)
# #新用户ct_cvr假设检验
# print("【3】新用户CT-CVR假设检验结果:")
# ctcrv_new_ttest1=t_test(x_ctcrv_new,y_ctcrv_new)
# #新用户ctr假设检验
# print("【5】新用户CTR假设检验结果:")
# ctr_new_ttest1=t_test(x_ctr_new,y_ctr_new)
# #新用户ctr(on_click_diary_card)假设检验
# print("【7】新用户CTR假设检验(日记本列表ctr)(on_click_diary_card)结果:")
# ctr_new_o_ttest1=t_test(x_ctr_new_o,y_ctr_new_o)
#
#
#
#
#
# print("============================老用户各指标假设检验结果分析===================================")
# #老用户cvr假设检验
# print("【2】老用户CVR假设检验结果:")
# crv_old_ttest1=t_test(x_crv_old,y_crv_old)
# # #老用户ct_cvr假设检验
# print("【4】老用户CT-CVR假设检验结果:")
# ctcrv_old_ttest1=t_test(x_ctcrv_old,y_ctcrv_old)
# #老用户ctr假设检验
# print("【6】老用户CTR假设检验结果:")
# ctr_old_ttest1=t_test(x_ctr_old,y_ctr_old)
# #老用户ctr(on_click_diary_card)假设检验
# print("【8】老用户CTR假设检验(日记本列表ctr)(on_click_diary_card)结果:")
# ctr_old_o_ttest1=t_test(x_ctr_old_o,y_ctr_old_o)
...
...
eda/recommended_indexs/send_email.py
View file @
3591a3f2
##发送邮件
# ##发送邮件
#
# #coding=utf-8
#
# import smtplib
# from email.mime.text import MIMEText
# from email.utils import formataddr
# from email.mime.application import MIMEApplication
# import datetime
#
# from email.mime.multipart import MIMEMultipart
#
# my_sender='wangzhiwei@igengmei.com'
# my_pass = 'RiKEcsHAgesCZ7yd'
# my_user1='wangzhiwei@igengmei.com'
# my_user2='gaoyazhe@igengmei.com'
# my_user3='huangkai@igengmei.com'
# def mail():
# ret = True
# pdfFile = 'hypothesis.txt'
# pdfApart = MIMEApplication(open(pdfFile, 'rb').read())
# pdfApart.add_header('Content-Disposition', 'attachment', filename=pdfFile)
# m = MIMEMultipart()
# m.attach(pdfApart)
# m['Subject'] = '数据指标监控数据(假设检验)'
# m['From'] = '王志伟<wangzhiwei@igengmei.com>'
#
#
# try:
# # text = "Hi!\nHow are you?\nHere is the link you wanted:\nhttp://www.baidu.com"
# # msg = MIMEText(text, 'plain', 'utf-8')
# # msg['From'] = formataddr(["王志伟", my_sender])
# # msg['To'] = my_user1
# # msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计"
# server = smtplib.SMTP_SSL("smtp.exmail.qq.com", 465)
# server.login(my_sender, my_pass)
# server.sendmail(my_sender, [my_user1,my_user2,my_user3], m.as_string())
# server.quit()
# except Exception:
# ret=False
# return ret
#
# ret=mail()
# if ret:
# print("邮件发送成功")
# else:
# print("邮件发送失败")
#####尝试发送邮箱,不带附件
#coding=utf-8
import
smtplib
from
email.mime.text
import
MIMEText
from
email.utils
import
formataddr
from
email.mime.application
import
MIMEApplication
import
datetime
from
email.mime.multipart
import
MIMEMultipart
my_sender
=
'wangzhiwei@igengmei.com'
my_pass
=
'RiKEcsHAgesCZ7yd'
my_user1
=
'wangzhiwei@igengmei.com'
# my_user2='gaoyazhe@igengmei.com'
# my_user3='huangkai@igengmei.com'
# my_user2='zhangyanzhao@igengmei.com'
def
mail
():
ret
=
True
pdfFile
=
'hypothesis.txt'
pdfApart
=
MIMEApplication
(
open
(
pdfFile
,
'rb'
)
.
read
())
pdfApart
.
add_header
(
'Content-Disposition'
,
'attachment'
,
filename
=
pdfFile
)
ret
=
True
try
:
text
=
"Hi!
\n
How are you?
\n
Here is the link you wanted:
\n
http://www.baidu.com"
m
=
MIMEMultipart
(
text
,
'plain'
,
'utf-8'
)
m
.
attach
(
pdfApart
)
m
[
'Subject'
]
=
'数据指标监控数据(假设检验)'
m
[
'From'
]
=
'王志伟<wangzhiwei@igengmei.com>'
# msg = MIMEText(text, 'plain', 'utf-8')
# msg['From'] = formataddr(["王志伟", my_sender])
# msg['To'] = my_user1
# msg['Subject'] = str(datetime.date.today()) + "-esmm多目标模型训练指标统计"
server
=
smtplib
.
SMTP_SSL
(
"smtp.exmail.qq.com"
,
465
)
server
.
login
(
my_sender
,
my_pass
)
server
.
sendmail
(
my_sender
,
[
my_user1
],
m
.
as_string
())
server
.
quit
()
with
open
(
'hypothesis.txt'
)
as
f
:
stat_data
=
f
.
read
()
msg
=
MIMEText
(
stat_data
,
'plain'
,
'utf-8'
)
msg
[
'From'
]
=
formataddr
([
"王志伟"
,
my_sender
])
msg
[
'To'
]
=
my_user1
msg
[
'Subject'
]
=
str
(
datetime
.
date
.
today
())
+
"-数据指标监控数据(假设检验)"
server
=
smtplib
.
SMTP_SSL
(
"smtp.exmail.qq.com"
,
465
)
server
.
login
(
my_sender
,
my_pass
)
server
.
sendmail
(
my_sender
,[
my_user1
],
msg
.
as_string
())
server
.
quit
()
except
Exception
:
ret
=
False
return
ret
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment