Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
ac22ea27
Commit
ac22ea27
authored
Feb 24, 2020
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
0661416b
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
12 additions
and
16 deletions
+12
-16
make_data.py
make_data.py
+12
-16
No files found.
make_data.py
View file @
ac22ea27
...
...
@@ -15,7 +15,7 @@ def hospital():
"left join hippo_merchantrelevance b on api.id = b.doctor_id "
\
"left join statistic_merchant_rank_factor m on b.merchant_id = m.merchant_id "
\
"where api.doctor_type = 1 and h.date = '{}' and m.partition_date = '{}';"
.
format
(
date_str
,
date_str
)
print
(
sql
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.141'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
...
...
@@ -42,27 +42,25 @@ def hospital():
print
(
"filter 1"
)
print
(
df
.
shape
)
df
[
"tmp"
]
=
df
[
"service_pv_30"
]
+
df
[
"mexpert_pv_30"
]
+
df
[
"organization_pv_30"
]
df
=
df
[
~
df
[
"tmp"
]
.
isin
([
0.0
])]
print
(
"filter 2"
)
print
(
df
.
shape
)
print
(
"ccc"
)
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"hospital_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"hospital_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"expert_ctr_30"
]
df
.
loc
[
df
[
"doctor_discount_30_days"
]
<
0
,
[
"doctor_discount_30_days"
]]
=
0
# df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
df
.
loc
[
df
[
"expand_rechange_amount_30"
]
<
0
,
[
"expand_rechange_amount_30"
]]
=
0
df
.
loc
[
df
[
"tmp"
]
<=
1500
,
[
"tmp"
]]
=
1500
df
[
"commission"
]
=
df
[
"doctor_discount_30_days"
]
/
df
[
"tmp"
]
df
[
"cpt"
]
=
df
[
"expand_rechange_amount_30"
]
/
df
[
"tmp"
]
df
.
loc
[
df
[
"all_exposure"
]
<=
1500
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
<
0.01
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
>
0.2
,
[
"ctr"
]]
=
0.2
df
.
loc
[
df
[
"cpt"
]
>
20
,
[
"cpt"
]]
=
2
0
df
.
loc
[
df
[
"cpt"
]
>
10
,
[
"cpt"
]]
=
1
0
df
.
loc
[
df
[
"cpt"
]
<
0.01
,
[
"cpt"
]]
=
0.01
df
.
loc
[
df
[
"commission"
]
>
20
,
[
"commission"
]]
=
2
0
df
.
loc
[
df
[
"commission"
]
>
10
,
[
"commission"
]]
=
1
0
df
.
loc
[
df
[
"commission"
]
<
0.01
,
[
"commission"
]]
=
0.01
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"cpt"
])
...
...
@@ -77,7 +75,7 @@ def hospital():
data
=
data
.
drop_duplicates
()
print
(
data
.
shape
)
data
.
to_csv
(
'/tmp/2
1
_hospital.csv'
,
index
=
False
)
data
.
to_csv
(
'/tmp/2
5
_hospital.csv'
,
index
=
False
)
def
new_doctor
():
...
...
@@ -114,14 +112,12 @@ def new_doctor():
print
(
"all_exposure"
)
print
(
df
.
shape
)
df
[
"tmp"
]
=
df
[
"service_pv_30"
]
+
df
[
"mexpert_pv_30"
]
+
df
[
"organization_pv_30"
]
df
=
df
[
~
df
[
"tmp"
]
.
isin
([
0.0
])]
print
(
"tmp"
)
print
(
df
.
shape
)
df
.
loc
[
df
[
"tmp"
]
<=
1500
,
[
"tmp"
]]
=
1500
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
(
df
[
"expert_pv_30"
]
/
df
[
"expert_exposure_pv_30"
])
df
.
loc
[
df
[
"doctor_discount_30_days"
]
<
0
,
[
"doctor_discount_30_days"
]]
=
0
# df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
df
.
loc
[
df
[
"expand_rechange_amount_30"
]
<
0
,
[
"expand_rechange_amount_30"
]]
=
0
df
[
"commission"
]
=
df
[
"doctor_discount_30_days"
]
/
df
[
"tmp"
]
...
...
@@ -130,9 +126,9 @@ def new_doctor():
df
.
loc
[
df
[
"all_exposure"
]
<=
1500
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
<
0.01
,
[
"ctr"
]]
=
0.01
df
.
loc
[
df
[
"ctr"
]
>
0.2
,
[
"ctr"
]]
=
0.2
df
.
loc
[
df
[
"commission"
]
>
20
,
[
"commission"
]]
=
2
0
df
.
loc
[
df
[
"commission"
]
>
10
,
[
"commission"
]]
=
1
0
df
.
loc
[
df
[
"commission"
]
<
0.01
,
[
"commission"
]]
=
0.01
df
.
loc
[
df
[
"pv_ad"
]
>
20
,
[
"pv_ad"
]]
=
2
0
df
.
loc
[
df
[
"pv_ad"
]
>
10
,
[
"pv_ad"
]]
=
1
0
df
.
loc
[
df
[
"pv_ad"
]
<
0.01
,
[
"pv_ad"
]]
=
0.01
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"pv_ad"
])
...
...
@@ -146,7 +142,7 @@ def new_doctor():
data
=
data
.
drop_duplicates
()
print
(
data
.
shape
)
data
.
to_csv
(
'/tmp/2
1
_doctor.csv'
,
index
=
False
)
data
.
to_csv
(
'/tmp/2
5
_doctor.csv'
,
index
=
False
)
print
(
"doctor end"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment