Commit fcf2dcac authored by 王志伟's avatar 王志伟
parents d1d43b4f 752910be
# -*- coding: UTF-8 -*-
import pymysql
import datetime
import pandas as pd
DIRECTORY_PATH="/data2/ffm/"
def get_yesterday_date():
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
yesterday = yesterday.strftime("%Y-%m-%d")
print(yesterday)
return yesterday
#today = datetime.date.today().strftime("%Y%m%d")
#return today
#today = datetime.date.today().strftime("%Y%m%d")
#return today
def get_data():
conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
cursor = conn2db.cursor()
sql = "select device_id,city_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
sql = "select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor.execute(sql)
result = cursor.fetchall()
device_id = tuple(pd.DataFrame(list(result))[0].values.tolist())
cursor.close()
conn2db.commit()
return result
return device_id
def result2file(data):
output = DIRECTORY_PATH + "ffm_get_data_" + get_yesterday_date + ".csv"
with open(output,"w") as f:
for i in data:
line = str(i[0]) + "," + str(i[1]) + "\n"
f.write(line)
def ctr(date):
device_id = get_data()
sql_click = "select count(cid) from data_feed_click " \
"where cid_type = 'diary' " \
"and stat_date = '{}' and device_id in {};".format(date,device_id)
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
cursor = db.cursor()
cursor.execute(sql_click)
click = cursor.fetchone()[0]
print("点击数:"+str(click))
sql_exp = "select count(cid) from data_feed_exposure " \
"where cid_type = 'diary' and stat_date = '{}' and " \
"device_id in {}".format(date,device_id)
cursor.execute(sql_exp)
exp = cursor.fetchone()[0]
print("曝光数:"+str(exp))
if exp != 0:
print("点击率:"+str(click/exp))
if __name__ == "__main__":
result = get_data()
result2file(result)
date = get_yesterday_date()
ctr(date)
# -*- coding: UTF-8 -*-
import pymysql
import datetime
import pandas as pd
DIRECTORY_PATH="/data2/ffm/"
def get_yesterday_date():
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
#自动获取昨天的日期,如"2018-08-08"
"""
:rtype : str
"""
today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d")
yesterday = yesterday.strftime("%Y-%m-%d")
print(yesterday)
return yesterday
#today = datetime.date.today().strftime("%Y%m%d")
#return today
#today = datetime.date.today().strftime("%Y%m%d")
#return today
def get_data():
conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
......@@ -21,18 +25,10 @@ def get_data():
sql = "select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor.execute(sql)
result = cursor.fetchall()
pd.DataFrame(list(result)).to_csv("/data2/ffm/yesterday.csv")
cursor.close()
return result
def result2file(data):
output = DIRECTORY_PATH + "ffm_get_data_" + get_yesterday_date + ".csv"
with open(output,"w") as f:
for i in data:
line = str(i[0]) + "," + str(i[1]) + "\n"
f.write(line)
if __name__ == "__main__":
result = get_data()
result2file(result)
\ No newline at end of file
get_data()
......@@ -96,12 +96,13 @@ object Main {
ti.tidbMapTable(dbName = GmeiConfig.config.getString("tidb.database"),tableName = "data_feed_click")
// val date8 = GmeiConfig.getMinusNDate(8)
val date8 = GmeiConfig.getMinusNDate(30)
val tidb_inupt = sc.sql(
s"""
|SELECT
| service_id,cid
|FROM nd_data_meigou_cid
|where stat_date > '${date8}'
""".stripMargin
)
......@@ -184,29 +185,27 @@ object Main {
GmeiConfig.writeToJDBCTable(similar_result, table="nd_cid_similarity_matrix", SaveMode.Overwrite)
//3. cids queue map to device_id
ti.tidbMapTable(dbName = GmeiConfig.config.getString("tidb.database"),tableName = "nd_cid_similarity_matrix")
val device_id = sc.sql(
s"""
|select a.device_id device_id,a.city_id city_id ,b.similarity_cid similarity_cid from
|(select device_id,city_id,first(cid) as cid from data_feed_click
|(select device_id,first(city_id) as city_id,first(cid) as cid from data_feed_click
|where cid in (select cid from nd_cid_similarity_matrix)
|group by device_id order by time) a left join
|group by device_id) a left join
|nd_cid_similarity_matrix b
|on a.cid = b.cid
|where b.similarity_cid is not null
""".stripMargin
)
device_id.na.fill(Map("city_id" -> "beijing"))
).na.fill(Map("city_id"->"beijing"))
device_id.show()
val device_queue = device_id.rdd.map {item =>
val parts = (item.getAs[String](fieldName = "device_id"),item.getAs[String](fieldName = "city_id"),item.getAs[String](fieldName = "similarity_cid"))
Try {
(parts._1,Try(parts._2.toString.replace("worldwide","beijing")),Try(parts._3.toString.replace("diary|","")).getOrElse(null))
(parts._1,Try(parts._2.toString.replace("worldwide","beijing")).getOrElse(null),Try(parts._3.toString.replace("diary|","")).getOrElse(null))
}.getOrElse(null)
}.filter(_!=null).toDF("device_id","city_id","similarity_cid")
......@@ -221,6 +220,3 @@ object Main {
sys.exit(1)
}
}
import pymysql
import pandas as pd
# 从一个数据库读数据,把读到的数据写到另外一个数据库
def get_data():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
cursor = db.cursor()
sql = "select native_queue,nearby_queue,nation_queue,megacity_queue,device_id,city_id from ffm_diary_queue;"
cursor.execute(sql)
result = cursor.fetchall()
pd.DataFrame(list(result)).to_csv("/home/gmuser/ffm.csv", index=None)
df = pd.read_csv("/home/gmuser/ffm.csv")
df = pd.DataFrame(list(result))
for i in range(df.shape[0]):
a = df.loc[i, :].values
insert(a)
......@@ -19,6 +18,10 @@ def insert(a):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm',
db='doris_prod')
# List("AB20292B-5D15-4C44-9429-1C2FF5ED26F6", "802C5FDC-5DC6-42D0-8F6F-2DBE200BB21B",
# "358035085192742", "B2F0665E-4375-4169-8FE3-8A26A1CFE248", "863455037703008",
# "65EC6C14-1AD6-44C2-AED2-C41452284E91", "29548727-8242-4D58-8151-F603F975BB98")
# sql_delete = ""
sql = "INSERT INTO device_diary_queue (native_queue, nearby_queue, nation_queue, " \
"megacity_queue,device_id,city_id) VALUES ('{}','{}','{}','{}','{}','{}');".format \
(a[0], a[1], a[2],a[3],a[4], a[5])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment