Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
f91f879b
Commit
f91f879b
authored
Feb 20, 2020
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
1ebe3a7d
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
30 additions
and
30 deletions
+30
-30
make_data.py
make_data.py
+30
-30
No files found.
make_data.py
View file @
f91f879b
...
...
@@ -129,7 +129,7 @@ def hospital():
"left join hippo_merchantrelevance b on api.id = b.doctor_id "
\
"left join al_meigou_service_smart_rank_budan_payment budan on b.merchant_id = budan.merchant_id "
\
"where api.doctor_type = 1 and h.date = '{}' "
\
"and budan.stat_date = '{}'
limit 6
;"
.
format
(
date_str
,
date_tmp
)
"and budan.stat_date = '{}';"
.
format
(
date_str
,
date_tmp
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.143'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
...
...
@@ -146,7 +146,7 @@ def hospital():
sql
=
"select merchant_id,doctor_ad_money_30_days,"
\
"service_pv_30,expert_pv_30,organization_pv_30,doctor_discount_30_days from statistic_merchant_rank_factor "
\
"where partition_date = '{}'
limit 6
;"
.
format
(
date_str
)
"where partition_date = '{}';"
.
format
(
date_str
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
...
...
@@ -158,34 +158,34 @@ def hospital():
tmp
=
tmp
.
rename
(
columns
=
dict
(
zip
(
list
(
range
(
len
(
name
))),
name
)))
print
(
tmp
.
head
(
6
))
#
df["merchant_id"] = df["merchant_id"].astype("str")
#
tmp["merchant_id"] = tmp["merchant_id"].astype("str")
#
df = pd.merge(df, tmp, on='merchant_id')
#
#
for i in ["hospital_exposure_pv_30","service_exposure_pv_30","expert_exposure_pv_30",
#
"service_ctr_30","hospital_ctr_30","expert_ctr_30",
#
"doctor_ad_money_30_days", "service_pv_30",
#
"mexpert_pv_30", "organization_pv_30", "budan_payment_30_days","doctor_discount_30_days"]:
#
df[i] = df[i].astype("float")
#
#
df["all_exposure"] = df["hospital_exposure_pv_30"] + df["service_exposure_pv_30"] + df["expert_exposure_pv_30"]
#
df = df[~df["all_exposure"].isin([0.0])]
#
df["tmp"] = df["service_pv_30"] + df["mexpert_pv_30"] +df["organization_pv_30"]
#
df = df[~df["tmp"].isin([0.0])]
#
print("aaaaaaaa")
#
df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \
#
df["hospital_exposure_pv_30"]/ df["all_exposure"] * df["hospital_ctr_30"] + \
#
df["expert_exposure_pv_30"]/df["all_exposure"] * df["expert_ctr_30"]
#
df["commission"] = (df["doctor_ad_money_30_days"] + df["budan_payment_30_days"])/df["tmp"]
#
df["cpt"] = df["doctor_discount_30_days"]/df["tmp"]
#
df["score"] = df["ctr"]**0.5 * (df["commission"] + df["cpt"])
#
columns = ["score","ctr","commission","cpt","hospital_exposure_pv_30","service_exposure_pv_30",
#
"expert_exposure_pv_30",
#
"service_ctr_30","hospital_ctr_30","expert_ctr_30", "service_pv_30",
#
"mexpert_pv_30", "organization_pv_30"]
#
data = df.loc[:, columns]
#
print(data.head(6))
#
data.to_csv('/home/gmuser/hospital.csv',index=False)
df
[
"merchant_id"
]
=
df
[
"merchant_id"
]
.
astype
(
"str"
)
tmp
[
"merchant_id"
]
=
tmp
[
"merchant_id"
]
.
astype
(
"str"
)
df
=
pd
.
merge
(
df
,
tmp
,
on
=
'merchant_id'
)
for
i
in
[
"hospital_exposure_pv_30"
,
"service_exposure_pv_30"
,
"expert_exposure_pv_30"
,
"service_ctr_30"
,
"hospital_ctr_30"
,
"expert_ctr_30"
,
"doctor_ad_money_30_days"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
,
"budan_payment_30_days"
,
"doctor_discount_30_days"
]:
df
[
i
]
=
df
[
i
]
.
astype
(
"float"
)
df
[
"all_exposure"
]
=
df
[
"hospital_exposure_pv_30"
]
+
df
[
"service_exposure_pv_30"
]
+
df
[
"expert_exposure_pv_30"
]
df
=
df
[
~
df
[
"all_exposure"
]
.
isin
([
0.0
])]
df
[
"tmp"
]
=
df
[
"service_pv_30"
]
+
df
[
"mexpert_pv_30"
]
+
df
[
"organization_pv_30"
]
df
=
df
[
~
df
[
"tmp"
]
.
isin
([
0.0
])]
print
(
"aaaaaaaa"
)
df
[
"ctr"
]
=
df
[
"service_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"service_ctr_30"
]
+
\
df
[
"hospital_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"hospital_ctr_30"
]
+
\
df
[
"expert_exposure_pv_30"
]
/
df
[
"all_exposure"
]
*
df
[
"expert_ctr_30"
]
df
[
"commission"
]
=
(
df
[
"doctor_ad_money_30_days"
]
+
df
[
"budan_payment_30_days"
])
/
df
[
"tmp"
]
df
[
"cpt"
]
=
df
[
"doctor_discount_30_days"
]
/
df
[
"tmp"
]
df
[
"score"
]
=
df
[
"ctr"
]
**
0.5
*
(
df
[
"commission"
]
+
df
[
"cpt"
])
columns
=
[
"score"
,
"ctr"
,
"commission"
,
"cpt"
,
"hospital_exposure_pv_30"
,
"service_exposure_pv_30"
,
"expert_exposure_pv_30"
,
"service_ctr_30"
,
"hospital_ctr_30"
,
"expert_ctr_30"
,
"service_pv_30"
,
"mexpert_pv_30"
,
"organization_pv_30"
]
data
=
df
.
loc
[:,
columns
]
print
(
data
.
head
(
6
))
data
.
to_csv
(
'/home/gmuser/hospital.csv'
,
index
=
False
)
if
__name__
==
"__main__"
:
hospital
()
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment