Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
c304c4d8
Commit
c304c4d8
authored
Feb 21, 2020
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
783bd461
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
28 additions
and
38 deletions
+28
-38
make_data.py
make_data.py
+28
-38
No files found.
make_data.py
View file @
c304c4d8
...
@@ -54,31 +54,28 @@ def doctor():
...
@@ -54,31 +54,28 @@ def doctor():
print
(
"aaaaaaaa"
)
print
(
"aaaaaaaa"
)
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
(
df
[
"expert_pv_30"
]
/
df
[
"expert_exposure_pv_30"
])
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
(
df
[
"expert_pv_30"
]
/
df
[
"expert_exposure_pv_30"
])
df
.
loc
[
df
[
"doctor_ad_money_30_days"
]
<
0
,
[
"doctor_ad_money_30_days"
]]
=
0
df
.
loc
[
df
[
"budan_payment_30_days"
]
<
0
,
[
"budan_payment_30_days"
]]
=
0
df
[
"commission"
]
=
(
df
[
"doctor_ad_money_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"commission"
]
=
(
df
[
"doctor_ad_money_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"pv_ad"
]
=
df
[
"expand_rechange_amount_30"
]
/
df
[
"tmp"
]
df
[
"pv_ad"
]
=
df
[
"expand_rechange_amount_30"
]
/
df
[
"tmp"
]
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"pv_ad"
])
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"pv_ad"
])
df
.
loc
[
df
[
"all_exposure"
]
<=
1500
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
<
0.01
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
>
0.2
,
[
"ctr"
]]
=
0.2
df
.
loc
[
df
[
"commission"
]
>
20
,
[
"commission"
]]
=
20
df
.
loc
[
df
[
"commission"
]
<
0.01
,
[
"commission"
]]
=
0.01
df
.
loc
[
df
[
"pv_ad"
]
>
20
,
[
"pv_ad"
]]
=
20
df
.
loc
[
df
[
"pv_ad"
]
<
0.01
,
[
"pv_ad"
]]
=
0.01
columns
=
[
"doctor_id"
,
"score"
,
"ctr"
,
"commission"
,
"pv_ad"
,
"service_exposure_pv_30"
,
columns
=
[
"doctor_id"
,
"score"
,
"ctr"
,
"commission"
,
"pv_ad"
,
"service_exposure_pv_30"
,
"service_ctr_30"
,
"expert_exposure_pv_30"
,
"expert_pv_30"
,
"service_ctr_30"
,
"expert_exposure_pv_30"
,
"expert_pv_30"
,
"merchant_id"
,
"doctor_ad_money_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"merchant_id"
,
"doctor_ad_money_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"budan_payment_30_days"
]
"mexpert_pv_30"
,
"organization_pv_30"
,
"budan_payment_30_days"
]
data
=
df
.
loc
[:,
columns
]
data
=
df
.
loc
[:,
columns
]
# renames = {'doctor_id': '医生id',
# 'score': '得分',
# 'ctr': '点击率',
# 'commission': '单pv佣金贡献',
# 'pv_ad': '单pv广告消耗',
# 'service_exposure_pv_30': '30天内美购曝光pv',
# 'service_ctr_30': '30天内美购ctr',
# 'expert_exposure_pv_30': '医生曝光次数-30天',
# 'expert_pv_30': '医生主页PV-30天',
# 'merchant_id': '商户id',
# 'doctor_ad_money_30_days': '商户名下非医生、机构账号的验证订单抽成之和',
# 'expand_rechange_amount_30': '统计期内该商户的广告消耗(CPC+CPT+其他',
# 'service_pv_30': '该商户名下所有美购的美购详情页PV',
# 'mexpert_pv_30': '该商户名下所有医生主页PV',
# 'organization_pv_30': '该商户名下机构主页PV',
# 'budan_payment_30_days': '30天已补订单佣金'}
# data = data.rename(columns=renames)
data
=
data
.
drop_duplicates
()
data
=
data
.
drop_duplicates
()
data
.
to_csv
(
'/tmp/doctor.csv'
,
index
=
False
)
data
.
to_csv
(
'/tmp/doctor.csv'
,
index
=
False
)
print
(
"doctor end"
)
print
(
"doctor end"
)
...
@@ -143,36 +140,29 @@ def hospital():
...
@@ -143,36 +140,29 @@ def hospital():
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"hospital_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"hospital_ctr_30"
]
+
\
df
[
"hospital_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"hospital_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"expert_ctr_30"
]
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"expert_ctr_30"
]
df
.
loc
[
df
[
"doctor_ad_money_30_days"
]
<
0
,
[
"doctor_ad_money_30_days"
]]
=
0
df
.
loc
[
df
[
"budan_payment_30_days"
]
<
0
,
[
"budan_payment_30_days"
]]
=
0
df
[
"commission"
]
=
(
df
[
"doctor_ad_money_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"commission"
]
=
(
df
[
"doctor_ad_money_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"cpt"
]
=
df
[
"doctor_discount_30_days"
]
/
df
[
"tmp"
]
df
[
"cpt"
]
=
df
[
"doctor_discount_30_days"
]
/
df
[
"tmp"
]
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"cpt"
])
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"cpt"
])
df
.
loc
[
df
[
"all_exposure"
]
<=
1500
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
<
0.01
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
>
0.2
,
[
"ctr"
]]
=
0.2
df
.
loc
[
df
[
"cpt"
]
>
20
,
[
"cpt"
]]
=
20
df
.
loc
[
df
[
"cpt"
]
<
0.01
,
[
"cpt"
]]
=
0.01
df
.
loc
[
df
[
"commission"
]
>
20
,
[
"commission"
]]
=
20
df
.
loc
[
df
[
"commission"
]
<
0.01
,
[
"commission"
]]
=
0.01
columns
=
[
"doctor_id"
,
"score"
,
"ctr"
,
"commission"
,
"cpt"
,
"hospital_id"
,
"hospital_exposure_pv_30"
,
columns
=
[
"doctor_id"
,
"score"
,
"ctr"
,
"commission"
,
"cpt"
,
"hospital_id"
,
"hospital_exposure_pv_30"
,
"service_exposure_pv_30"
,
"expert_exposure_pv_30"
,
"service_exposure_pv_30"
,
"expert_exposure_pv_30"
,
"service_ctr_30"
,
"hospital_ctr_30"
,
"expert_ctr_30"
,
"merchant_id"
,
"budan_payment_30_days"
,
"service_ctr_30"
,
"hospital_ctr_30"
,
"expert_ctr_30"
,
"merchant_id"
,
"budan_payment_30_days"
,
"doctor_ad_money_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"doctor_ad_money_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"doctor_discount_30_days"
]
"doctor_discount_30_days"
]
data
=
df
.
loc
[:,
columns
]
data
=
df
.
loc
[:,
columns
]
# renames = {'doctor_id': '机构管理者id',
# 'score': '得分',
# 'ctr': '点击率',
# 'commission': '单pv佣金贡献',
# 'cpt': '单pv的CPT消耗',
# 'hospital_id': '医院id',
# 'hospital_exposure_pv_30': '该医院卡片的曝光-30天',
# 'service_exposure_pv_30': '该医院名下所有美购的曝光-30天',
# 'expert_exposure_pv_30': '机构名下所有医生卡片曝光-30天',
# 'service_ctr_30': '机构名下所有美购ctr-30天',
# 'hospital_ctr_30': '医院卡片ctr-30天',
# 'expert_ctr_30': '机构名下所有医生ctr-30天',
# 'merchant_id': '商户id',
# 'budan_payment_30_days': '30天已补订单佣金',
# 'doctor_ad_money_30_days': '商户名下非医生、机构账号的验证订单抽成之和',
# 'service_pv_30': '该商户名下所有美购的美购详情页PV',
# 'mexpert_pv_30': '该商户名下所有医生主页PV',
# 'organization_pv_30': '该商户名下机构主页PV',
# 'doctor_discount_30_days': '该商户的CPT纯消耗金额'}
#
# data = data.rename(columns=renames)
data
=
data
.
drop_duplicates
()
data
=
data
.
drop_duplicates
()
print
(
data
.
head
(
6
))
print
(
data
.
head
(
6
))
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment