Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
e5738731
Commit
e5738731
authored
Feb 21, 2020
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
c304c4d8
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
20 additions
and
20 deletions
+20
-20
make_data.py
make_data.py
+20
-20
No files found.
make_data.py
View file @
e5738731
...
...
@@ -24,7 +24,7 @@ def doctor():
df
=
df
.
rename
(
columns
=
dict
(
zip
(
list
(
range
(
len
(
name
))),
name
)))
sql
=
"select merchant_id,doctor_
ad_money
_30_days,expand_rechange_amount_30,"
\
sql
=
"select merchant_id,doctor_
discount
_30_days,expand_rechange_amount_30,"
\
"service_pv_30,expert_pv_30,organization_pv_30 from statistic_merchant_rank_factor "
\
"where partition_date = '{}';"
.
format
(
date_str
)
...
...
@@ -33,7 +33,7 @@ def doctor():
result
=
cursor
.
fetchall
()
db
.
close
()
tmp
=
pd
.
DataFrame
(
list
(
result
))
name
=
[
"merchant_id"
,
"doctor_
ad_money
_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
name
=
[
"merchant_id"
,
"doctor_
discount
_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
]
tmp
=
tmp
.
rename
(
columns
=
dict
(
zip
(
list
(
range
(
len
(
name
))),
name
)))
...
...
@@ -42,7 +42,7 @@ def doctor():
df
=
pd
.
merge
(
df
,
tmp
,
on
=
'merchant_id'
)
for
i
in
[
"service_exposure_pv_30"
,
"service_ctr_30"
,
"expert_exposure_pv_30"
,
"expert_pv_30"
,
"doctor_
ad_money
_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"doctor_
discount
_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"budan_payment_30_days"
]:
df
[
i
]
=
df
[
i
]
.
astype
(
"float"
)
...
...
@@ -55,10 +55,11 @@ def doctor():
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
(
df
[
"expert_pv_30"
]
/
df
[
"expert_exposure_pv_30"
])
df
.
loc
[
df
[
"doctor_
ad_money_30_days"
]
<
0
,
[
"doctor_ad_money
_30_days"
]]
=
0
df
.
loc
[
df
[
"doctor_
discount_30_days"
]
<
0
,
[
"doctor_discount
_30_days"
]]
=
0
df
.
loc
[
df
[
"budan_payment_30_days"
]
<
0
,
[
"budan_payment_30_days"
]]
=
0
df
.
loc
[
df
[
"expand_rechange_amount_30"
]
<
0
,
[
"expand_rechange_amount_30"
]]
=
0
df
[
"commission"
]
=
(
df
[
"doctor_
ad_money
_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"commission"
]
=
(
df
[
"doctor_
discount
_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"pv_ad"
]
=
df
[
"expand_rechange_amount_30"
]
/
df
[
"tmp"
]
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"pv_ad"
])
...
...
@@ -72,12 +73,12 @@ def doctor():
columns
=
[
"doctor_id"
,
"score"
,
"ctr"
,
"commission"
,
"pv_ad"
,
"service_exposure_pv_30"
,
"service_ctr_30"
,
"expert_exposure_pv_30"
,
"expert_pv_30"
,
"merchant_id"
,
"doctor_
ad_money
_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"merchant_id"
,
"doctor_
discount
_30_days"
,
"expand_rechange_amount_30"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"budan_payment_30_days"
]
data
=
df
.
loc
[:,
columns
]
data
=
data
.
drop_duplicates
()
data
.
to_csv
(
'/tmp/doctor.csv'
,
index
=
False
)
data
.
to_csv
(
'/tmp/
20_
doctor.csv'
,
index
=
False
)
print
(
"doctor end"
)
...
...
@@ -106,10 +107,8 @@ def hospital():
df
=
df
.
rename
(
columns
=
dict
(
zip
(
list
(
range
(
len
(
name
))),
name
)))
print
(
df
.
head
(
6
))
sql
=
"select merchant_id,doctor_ad_money_30_days,"
\
"service_pv_30,expert_pv_30,organization_pv_30,doctor_discount_30_days from statistic_merchant_rank_factor "
\
sql
=
"select merchant_id,doctor_discount_30_days,"
\
"service_pv_30,expert_pv_30,organization_pv_30,expand_rechange_amount_30 from statistic_merchant_rank_factor "
\
"where partition_date = '{}';"
.
format
(
date_str
)
cursor
=
db
.
cursor
()
...
...
@@ -117,7 +116,7 @@ def hospital():
result
=
cursor
.
fetchall
()
db
.
close
()
tmp
=
pd
.
DataFrame
(
list
(
result
))
name
=
[
"merchant_id"
,
"doctor_
ad_money
_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
name
=
[
"merchant_id"
,
"doctor_
discount
_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"doctor_discount_30_days"
]
tmp
=
tmp
.
rename
(
columns
=
dict
(
zip
(
list
(
range
(
len
(
name
))),
name
)))
print
(
tmp
.
head
(
6
))
...
...
@@ -128,8 +127,8 @@ def hospital():
for
i
in
[
"hospital_exposure_pv_30"
,
"service_exposure_pv_30"
,
"expert_exposure_pv_30"
,
"service_ctr_30"
,
"hospital_ctr_30"
,
"expert_ctr_30"
,
"budan_payment_30_days"
,
"doctor_
ad_money
_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"
doctor_discount_30_days
"
]:
"doctor_
discount
_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"
expand_rechange_amount_30
"
]:
df
[
i
]
=
df
[
i
]
.
astype
(
"float"
)
df
[
"all_exposure"
]
=
df
[
"hospital_exposure_pv_30"
]
+
df
[
"service_exposure_pv_30"
]
+
df
[
"expert_exposure_pv_30"
]
...
...
@@ -141,11 +140,12 @@ def hospital():
df
[
"hospital_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"hospital_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"expert_ctr_30"
]
df
.
loc
[
df
[
"doctor_
ad_money_30_days"
]
<
0
,
[
"doctor_ad_money
_30_days"
]]
=
0
df
.
loc
[
df
[
"doctor_
discount_30_days"
]
<
0
,
[
"doctor_discount
_30_days"
]]
=
0
df
.
loc
[
df
[
"budan_payment_30_days"
]
<
0
,
[
"budan_payment_30_days"
]]
=
0
df
.
loc
[
df
[
"expand_rechange_amount_30"
]
<
0
,
[
"expand_rechange_amount_30"
]]
=
0
df
[
"commission"
]
=
(
df
[
"doctor_
ad_money
_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"cpt"
]
=
df
[
"
doctor_discount_30_days
"
]
/
df
[
"tmp"
]
df
[
"commission"
]
=
(
df
[
"doctor_
discount
_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"cpt"
]
=
df
[
"
expand_rechange_amount_30
"
]
/
df
[
"tmp"
]
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"cpt"
])
df
.
loc
[
df
[
"all_exposure"
]
<=
1500
,
[
"ctr"
]]
=
0.01
...
...
@@ -159,14 +159,14 @@ def hospital():
columns
=
[
"doctor_id"
,
"score"
,
"ctr"
,
"commission"
,
"cpt"
,
"hospital_id"
,
"hospital_exposure_pv_30"
,
"service_exposure_pv_30"
,
"expert_exposure_pv_30"
,
"service_ctr_30"
,
"hospital_ctr_30"
,
"expert_ctr_30"
,
"merchant_id"
,
"budan_payment_30_days"
,
"doctor_
ad_money
_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"
doctor_discount_30_days
"
]
"doctor_
discount
_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"
expand_rechange_amount_30
"
]
data
=
df
.
loc
[:,
columns
]
data
=
data
.
drop_duplicates
()
print
(
data
.
head
(
6
))
data
.
to_csv
(
'/tmp/
hospital.csv'
,
index
=
False
,
encoding
=
"utf_8_sig"
)
data
.
to_csv
(
'/tmp/
20_hospital.csv'
,
index
=
False
)
if
__name__
==
"__main__"
:
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment