Commit 41328a9b authored by 王志伟's avatar 王志伟

fix conflick

parents 3b477572 fcf2dcac
...@@ -12,28 +12,43 @@ def get_yesterday_date(): ...@@ -12,28 +12,43 @@ def get_yesterday_date():
""" """
today = datetime.date.today() today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1) yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d") yesterday = yesterday.strftime("%Y-%m-%d")
print(yesterday)
return yesterday return yesterday
#today = datetime.date.today().strftime("%Y%m%d") #today = datetime.date.today().strftime("%Y%m%d")
#return today #return today
def get_data(): def get_data():
conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
cursor = conn2db.cursor() cursor = conn2db.cursor()
sql = "select device_id,city_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'" sql = "select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
device_id = tuple(pd.DataFrame(list(result))[0].values.tolist())
cursor.close() cursor.close()
return result return device_id
def result2file(data): def ctr(date):
output = DIRECTORY_PATH + "ffm_get_data_" + get_yesterday_date() + ".csv" device_id = get_data()
with open(output,"w") as f: sql_click = "select count(cid) from data_feed_click " \
for i in data: "where cid_type = 'diary' " \
line = str(i[0]) + "," + str(i[1]) + "\n" "and stat_date = '{}' and device_id in {};".format(date,device_id)
f.write(line) db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='jerry_prod')
cursor = db.cursor()
cursor.execute(sql_click)
click = cursor.fetchone()[0]
print("点击数:"+str(click))
sql_exp = "select count(cid) from data_feed_exposure " \
"where cid_type = 'diary' and stat_date = '{}' and " \
"device_id in {}".format(date,device_id)
cursor.execute(sql_exp)
exp = cursor.fetchone()[0]
print("曝光数:"+str(exp))
if exp != 0:
print("点击率:"+str(click/exp))
if __name__ == "__main__": if __name__ == "__main__":
result = get_data() date = get_yesterday_date()
result2file(result) ctr(date)
# -*- coding: UTF-8 -*-
import pymysql import pymysql
import datetime import datetime
import pandas as pd
DIRECTORY_PATH="/data2/ffm/" DIRECTORY_PATH="/data2/ffm/"
...@@ -10,29 +12,23 @@ def get_yesterday_date(): ...@@ -10,29 +12,23 @@ def get_yesterday_date():
""" """
today = datetime.date.today() today = datetime.date.today()
yesterday = today - datetime.timedelta(days=1) yesterday = today - datetime.timedelta(days=1)
yesterday = yesterday.strftime("%Y%m%d") yesterday = yesterday.strftime("%Y-%m-%d")
print(yesterday)
return yesterday return yesterday
#today = datetime.date.today().strftime("%Y%m%d") #today = datetime.date.today().strftime("%Y%m%d")
#return today #return today
def get_data(): def get_data():
conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') conn2db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
cursor = conn2db.cursor() cursor = conn2db.cursor()
sql = "select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'" sql = "select device_id from eagle.ffm_diary_queue_temp where device_id regexp '[5|6]$'"
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
pd.DataFrame(list(result)).to_csv("/data2/ffm/yesterday.csv")
cursor.close() cursor.close()
return result
def result2file(data):
output = DIRECTORY_PATH + "ffm_get_data_" + get_yesterday_date + ".csv"
with open(output,"w") as f:
for i in data:
line = str(i[0]) + "," + str(i[1]) + "\n"
f.write(line)
if __name__ == "__main__": if __name__ == "__main__":
result = get_data() get_data()
result2file(result)
\ No newline at end of file
...@@ -96,12 +96,13 @@ object Main { ...@@ -96,12 +96,13 @@ object Main {
ti.tidbMapTable(dbName = GmeiConfig.config.getString("tidb.database"),tableName = "data_feed_click") ti.tidbMapTable(dbName = GmeiConfig.config.getString("tidb.database"),tableName = "data_feed_click")
// val date8 = GmeiConfig.getMinusNDate(8) val date8 = GmeiConfig.getMinusNDate(30)
val tidb_inupt = sc.sql( val tidb_inupt = sc.sql(
s""" s"""
|SELECT |SELECT
| service_id,cid | service_id,cid
|FROM nd_data_meigou_cid |FROM nd_data_meigou_cid
|where stat_date > '${date8}'
""".stripMargin """.stripMargin
) )
...@@ -184,29 +185,27 @@ object Main { ...@@ -184,29 +185,27 @@ object Main {
GmeiConfig.writeToJDBCTable(similar_result, table="nd_cid_similarity_matrix", SaveMode.Overwrite) GmeiConfig.writeToJDBCTable(similar_result, table="nd_cid_similarity_matrix", SaveMode.Overwrite)
//3. cids queue map to device_id //3. cids queue map to device_id
ti.tidbMapTable(dbName = GmeiConfig.config.getString("tidb.database"),tableName = "nd_cid_similarity_matrix") ti.tidbMapTable(dbName = GmeiConfig.config.getString("tidb.database"),tableName = "nd_cid_similarity_matrix")
val device_id = sc.sql( val device_id = sc.sql(
s""" s"""
|select a.device_id device_id,a.city_id city_id ,b.similarity_cid similarity_cid from |select a.device_id device_id,a.city_id city_id ,b.similarity_cid similarity_cid from
|(select device_id,city_id,first(cid) as cid from data_feed_click |(select device_id,first(city_id) as city_id,first(cid) as cid from data_feed_click
|where cid in (select cid from nd_cid_similarity_matrix) |where cid in (select cid from nd_cid_similarity_matrix)
|group by device_id order by time) a left join |group by device_id) a left join
|nd_cid_similarity_matrix b |nd_cid_similarity_matrix b
|on a.cid = b.cid |on a.cid = b.cid
|where b.similarity_cid is not null |where b.similarity_cid is not null
""".stripMargin """.stripMargin
) ).na.fill(Map("city_id"->"beijing"))
device_id.na.fill(Map("city_id" -> "beijing"))
device_id.show() device_id.show()
val device_queue = device_id.rdd.map {item => val device_queue = device_id.rdd.map {item =>
val parts = (item.getAs[String](fieldName = "device_id"),item.getAs[String](fieldName = "city_id"),item.getAs[String](fieldName = "similarity_cid")) val parts = (item.getAs[String](fieldName = "device_id"),item.getAs[String](fieldName = "city_id"),item.getAs[String](fieldName = "similarity_cid"))
Try { Try {
(parts._1,Try(parts._2.toString.replace("worldwide","beijing")),Try(parts._3.toString.replace("diary|","")).getOrElse(null)) (parts._1,Try(parts._2.toString.replace("worldwide","beijing")).getOrElse(null),Try(parts._3.toString.replace("diary|","")).getOrElse(null))
}.getOrElse(null) }.getOrElse(null)
}.filter(_!=null).toDF("device_id","city_id","similarity_cid") }.filter(_!=null).toDF("device_id","city_id","similarity_cid")
...@@ -221,6 +220,3 @@ object Main { ...@@ -221,6 +220,3 @@ object Main {
sys.exit(1) sys.exit(1)
} }
} }
import pymysql import pymysql
import pandas as pd import pandas as pd
# 从一个数据库读数据,把读到的数据写到另外一个数据库
def get_data(): def get_data():
db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle') db = pymysql.connect(host='10.66.157.22', port=4000, user='root', passwd='3SYz54LS9#^9sBvC', db='eagle')
cursor = db.cursor() cursor = db.cursor()
sql = "select native_queue,nearby_queue,nation_queue,megacity_queue,device_id,city_id from ffm_diary_queue;" sql = "select native_queue,nearby_queue,nation_queue,megacity_queue,device_id,city_id from ffm_diary_queue;"
cursor.execute(sql) cursor.execute(sql)
result = cursor.fetchall() result = cursor.fetchall()
pd.DataFrame(list(result)).to_csv("/home/gmuser/ffm.csv", index=None) df = pd.DataFrame(list(result))
df = pd.read_csv("/home/gmuser/ffm.csv")
for i in range(df.shape[0]): for i in range(df.shape[0]):
a = df.loc[i, :].values a = df.loc[i, :].values
insert(a) insert(a)
...@@ -19,6 +18,10 @@ def insert(a): ...@@ -19,6 +18,10 @@ def insert(a):
db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris', db = pymysql.connect(host='rm-m5e842126ng59jrv6.mysql.rds.aliyuncs.com', port=3306, user='doris',
passwd='o5gbA27hXHHm', passwd='o5gbA27hXHHm',
db='doris_prod') db='doris_prod')
# List("AB20292B-5D15-4C44-9429-1C2FF5ED26F6", "802C5FDC-5DC6-42D0-8F6F-2DBE200BB21B",
# "358035085192742", "B2F0665E-4375-4169-8FE3-8A26A1CFE248", "863455037703008",
# "65EC6C14-1AD6-44C2-AED2-C41452284E91", "29548727-8242-4D58-8151-F603F975BB98")
# sql_delete = ""
sql = "INSERT INTO device_diary_queue (native_queue, nearby_queue, nation_queue, " \ sql = "INSERT INTO device_diary_queue (native_queue, nearby_queue, nation_queue, " \
"megacity_queue,device_id,city_id) VALUES ('{}','{}','{}','{}','{}','{}');".format \ "megacity_queue,device_id,city_id) VALUES ('{}','{}','{}','{}','{}','{}');".format \
(a[0], a[1], a[2],a[3],a[4], a[5]) (a[0], a[1], a[2],a[3],a[4], a[5])
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment