Commit 4bba20c2 authored by 张彦钊's avatar 张彦钊

change test

parent 22bcaa40
import numpy as np, pandas as pd
from sklearn.cluster import DBSCAN
from shapely.geometry import MultiPoint
import geopandas
import shapefile
from matplotlib import pyplot as plt
data = pd.read_csv("/Users/mac/Downloads/location.csv")
data.drop(["device_id", "partition_date"], axis=1, inplace=True)
data = data[["lat", "lng"]]
data = data.as_matrix().astype("float32", copy=False)#convert to array
plt.title("beijing location")
plt.scatter(latlngs[:, 0], latlngs[:, 1], s=1, c="black", marker='.')
border_shape = shapefile.Reader(shape_path)
border_shape_2 = shapefile.Reader(shape_path_2huan)
border_shape_5 = shapefile.Reader(shape_path_5huan)
border = border_shape.shapes()
border_2 = border_shape_2.shapes()
border_5 = border_shape_5.shapes()
# 聚类中心区域
def get_centermost_point(cluster):
centroid = (MultiPoint(cluster).centroid.x, MultiPoint(cluster).centroid.y)
print(centroid)
return tuple(centroid)
# #渲染聚类结果
for border_detail in clusters:
x, y = [], []
for cell in border_detail:
x.append(cell[0])
y.append(cell[1])
plt.scatter(x, y, marker='o')
plt.show()
# coding=utf-8
import numpy as np
from scipy.spatial.distance import cdist
......@@ -51,4 +92,4 @@ for (label, color) in zip(unique_labels, colors):
plt.title("DBSCAN on beijing_users")
plt.xlabel("lat (scaled)")
plt.ylabel("lng (scaled)")
plt.savefig("results/(0.9,15)dbscan_wholesale.png", format="PNG")
# plt.savefig("results/(0.9,15)dbscan_wholesale.png", format="PNG")
import pandas as pd
import pymysql
import datetime
def con_sql(db,sql):
cursor = db.cursor()
cursor.execute(sql)
result = cursor.fetchone()[0]
db.close()
return result
def get_ctr():
db = pymysql.connect(host='172.16.40.158', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
yesterday = (datetime.date.today() - datetime.timedelta(days=1)).strftime("%Y-%m-%d")
print(yesterday)
sql = "select count(*) from data_feed_exposure_precise where stat_date = '{}'".format(yesterday)
exposures = con_sql(db, sql)
sql = "select count(*) from data_feed_exposure_precise where stat_date = '{}'".format(yesterday)
clicks = con_sql(db, sql)
print(exposures)
print(clicks)
print(exposures/clicks)
if __name__ == "__main__":
get_ctr()
\ No newline at end of file
import pandas as pd
import pymysql
from sklearn.preprocessing import MinMaxScaler
from sqlalchemy import create_engine
def con_sql(db,sql):
cursor = db.cursor()
try:
cursor.execute(sql)
result = cursor.fetchall()
df = pd.DataFrame(list(result))
except Exception:
print("发生异常", Exception)
df = pd.DataFrame()
finally:
db.close()
return df
def normal():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_test')
sql = "select * from device_read_time"
df = con_sql(db, sql)
df = df.rename(columns={0:"device_id",1:"kongbai",2:"eye",3:"simi",4:"zitizhifang",5:"banyongjiu",6:"teeth",
7:"kouchun",8:"ear",9:"nose",10:"banyongjiuzhuang",11:"qita",12:"lunkuo",
13:"shoushen",14:"skin",16:"shenghuo",
17:"breast",18:"hair",19:"kangshuai",20:"shili",21:"chanhou",22:"zhushe"})
# device_id = df[["device_id"]]
# df = df.drop("device_id",axis=1)
# minMax = MinMaxScaler()
# result = pd.DataFrame(minMax.fit_transform(df),columns=["0","1","10","1024","1080","11",
# "12","13","2","2054","2214","3","4","5","6933",
# "7","9","922","929","971","992"])
# result = device_id.join(result)
l = list(df.columns)
l.remove("device_id")
df["sum"] = df.sum(axis=1)
for i in l:
df[i] = df[i]/df["sum"]
df = df.drop("sum",axis=1)
yconnect = create_engine('mysql+pymysql://root:3SYz54LS9#^9sBvC@10.66.157.22:4000/jerry_test?charset=utf8')
pd.io.sql.to_sql(df, "device_read_time_normal", yconnect, schema='jerry_test', if_exists='fail', index=False)
if __name__ == "__main__":
normal()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment