Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
F
ffm-baseline
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
ML
ffm-baseline
Commits
42e1b019
Commit
42e1b019
authored
Feb 24, 2020
by
张彦钊
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add
parent
cbbc62f1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
107 additions
and
6 deletions
+107
-6
make_data.py
make_data.py
+107
-6
No files found.
make_data.py
View file @
42e1b019
...
...
@@ -4,14 +4,14 @@ import pandas as pd
def
exp
():
date_str
=
"20200
101
"
date_str
=
"20200
222
"
sql
=
"select b.merchant_id "
\
"from statistic_doctor_rank_factor d "
\
"left join hippo_merchantrelevance b on d.doctor_id = b.doctor_id "
\
"where d.partition_date = '{}';"
.
format
(
date_str
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
3
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
1
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -49,7 +49,7 @@ def doctor():
"left join hippo_merchantrelevance b on d.doctor_id = b.doctor_id "
\
"where d.partition_date = '{}';"
.
format
(
date_str
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
3
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
1
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -151,7 +151,7 @@ def hospital():
"left join hippo_merchantrelevance b on api.id = b.doctor_id "
\
"where api.doctor_type = 1 and h.date = '{}';"
.
format
(
date_str
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
3
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
1
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -248,7 +248,7 @@ def old():
"left join al_meigou_service_smart_rank_budan_payment budan on b.merchant_id = budan.merchant_id "
\
"where d.partition_date = '{}' and budan.stat_date = '{}';"
.
format
(
date_str
,
date_tmp
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
3
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.14
1
'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -318,10 +318,111 @@ def old():
print
(
"doctor end"
)
def
new_doctor
():
date_str
=
"20200101"
sql
=
"select d.doctor_id,d.service_exposure_pv_30,d.service_ctr_30,d.expert_exposure_pv_30,"
\
"d.expert_pv_30,b.merchant_id,m.doctor_discount_30_days from statistic_doctor_rank_factor d "
\
"left join hippo_merchantrelevance b on d.doctor_id = b.doctor_id "
\
"left join statistic_merchant_rank_factor m on b.merchant_id = m.merchant_id"
\
"where d.partition_date = '{}' and m.partition_date = '{}';"
.
format
(
date_str
,
date_str
)
db
=
pymysql
.
connect
(
host
=
'172.16.30.141'
,
port
=
3306
,
user
=
'work'
,
passwd
=
'BJQaT9VzDcuPBqkd'
,
db
=
'zhengxing'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
df
=
pd
.
DataFrame
(
list
(
result
))
print
(
df
.
shape
)
print
(
df
.
head
(
6
))
# name = ["doctor_id", "service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
# "merchant_id"]
#
# df = df.rename(columns=dict(zip(list(range(len(name))), name)))
# print(df.shape)
#
# df = df.dropna(subset=["merchant_id"])
# print("drop")
# print(df.shape)
# print(df.head(6))
#
# sql = "select merchant_id,doctor_discount_30_days,expand_rechange_amount_30," \
# "service_pv_30,expert_pv_30,organization_pv_30 from statistic_merchant_rank_factor " \
# "where partition_date = '{}';".format(date_str)
#
# cursor = db.cursor()
# cursor.execute(sql)
# result = cursor.fetchall()
# db.close()
# tmp = pd.DataFrame(list(result))
# name = ["merchant_id", "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
# "mexpert_pv_30", "organization_pv_30"]
# tmp = tmp.rename(columns=dict(zip(list(range(len(name))), name)))
# print("tmp")
# print(tmp.shape)
# print(tmp.head(6))
#
# df["merchant_id"] = df["merchant_id"].astype('int64')
# df["merchant_id"] = df["merchant_id"].astype("str")
# tmp["merchant_id"] = tmp["merchant_id"].astype("str")
# df = pd.merge(df, tmp, on='merchant_id')
#
# print("merge")
# print(df.shape)
#
# for i in ["service_exposure_pv_30", "service_ctr_30", "expert_exposure_pv_30", "expert_pv_30",
# "doctor_discount_30_days", "expand_rechange_amount_30", "service_pv_30",
# "mexpert_pv_30", "organization_pv_30"]:
# df[i] = df[i].astype("float")
#
# df["all_exposure"] = df["service_exposure_pv_30"] + df["expert_exposure_pv_30"]
# df = df[~df["expert_exposure_pv_30"].isin([0.0])]
# print("expert_exposure_pv_30")
# print(df.shape)
# df = df[~df["all_exposure"].isin([0.0])]
# print("all_exposure")
# print(df.shape)
# df["tmp"] = df["service_pv_30"] + df["mexpert_pv_30"] +df["organization_pv_30"]
# df = df[~df["tmp"].isin([0.0])]
# print("tmp")
# print(df.shape)
# df["ctr"] = df["service_exposure_pv_30"] / df["all_exposure"] * df["service_ctr_30"] + \
# df["expert_exposure_pv_30"]/df["all_exposure"] * (df["expert_pv_30"] / df["expert_exposure_pv_30"])
#
# df.loc[df["doctor_discount_30_days"] < 0, ["doctor_discount_30_days"]] = 0
# # df.loc[df["budan_payment_30_days"] < 0, ["budan_payment_30_days"]] = 0
# df.loc[df["expand_rechange_amount_30"] < 0, ["expand_rechange_amount_30"]] = 0
#
# df["commission"] = df["doctor_discount_30_days"]/df["tmp"]
# df["pv_ad"] = df["expand_rechange_amount_30"]/df["tmp"]
#
# df.loc[df["all_exposure"] <= 1500, ["ctr"]] = 0.01
# df.loc[df["ctr"] < 0.01, ["ctr"]] = 0.01
# df.loc[df["ctr"] > 0.2, ["ctr"]] = 0.2
# df.loc[df["commission"] > 20, ["commission"]] = 20
# df.loc[df["commission"] < 0.01, ["commission"]] = 0.01
# df.loc[df["pv_ad"] > 20, ["pv_ad"]] = 20
# df.loc[df["pv_ad"] < 0.01, ["pv_ad"]] = 0.01
#
# df["score"] = df["ctr"] ** 0.5 * (df["commission"] + df["pv_ad"])
# print(df.shape)
#
# columns = ["doctor_id","score","ctr","commission","pv_ad","service_exposure_pv_30",
# "service_ctr_30","expert_exposure_pv_30","expert_pv_30",
# "merchant_id","doctor_discount_30_days","expand_rechange_amount_30","service_pv_30",
# "mexpert_pv_30","organization_pv_30"]
# data = df.loc[:, columns]
#
# data = data.drop_duplicates()
# print(data.shape)
# data.to_csv('/tmp/1_doctor.csv',index=False)
# print("doctor end")
if
__name__
==
"__main__"
:
doctor
()
#
doctor()
# hospital()
# old()
new_doctor
()
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment