Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in
Toggle navigation
M
meta_base_code
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
黎涛
meta_base_code
Commits
285e11fa
Commit
285e11fa
authored
Jan 29, 2021
by
litaolemo
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update
parent
18778568
Hide whitespace changes
Inline
Side-by-side
Showing
33 changed files
with
176 additions
and
176 deletions
+176
-176
new_user_behavior_analysis.py
new_user_analysis/new_user_behavior_analysis.py
+3
-3
new_user_has_protratit_rate.py
new_user_analysis/new_user_has_protratit_rate.py
+9
-9
new_user_project_analysis.py
new_user_analysis/new_user_project_analysis.py
+3
-3
new_user_project_protratit.py
new_user_analysis/new_user_project_protratit.py
+5
-5
new_user_word_count.py
new_user_analysis/new_user_word_count.py
+3
-3
user_behavior_path.py
new_user_analysis/user_behavior_path.py
+7
-7
meigou_huidu_huisu.py
output/meigou_huidu_huisu.py
+5
-5
out_put_diary_0923.py
output/out_put_diary_0923.py
+2
-2
out_put_user_post_each_strategy.py
output/out_put_user_post_each_strategy.py
+5
-5
output_article_distribution_0917.py
output/output_article_distribution_0917.py
+5
-5
advertisement_strategy_d.py
task/advertisement_strategy_d.py
+7
-7
conent_detail_page_grayscale_ctr.py
task/conent_detail_page_grayscale_ctr.py
+7
-7
core_indicators_monitoring.py
task/core_indicators_monitoring.py
+5
-5
daily_search_word_count.py
task/daily_search_word_count.py
+5
-5
daily_search_word_count_fix.py
task/daily_search_word_count_fix.py
+5
-5
daily_search_word_count_last_two_year.py
task/daily_search_word_count_last_two_year.py
+5
-5
ecommerce_income_report.py
task/ecommerce_income_report.py
+7
-7
high_quality_diary_analysis.py
task/high_quality_diary_analysis.py
+5
-5
meigou_data.py
task/meigou_data.py
+7
-7
portary_article_distribution.py
task/portary_article_distribution.py
+6
-7
recommend_strategy_d.py
task/recommend_strategy_d.py
+7
-7
recommend_strategy_fix.py
task/recommend_strategy_fix.py
+7
-6
search_answer_ctr.py
task/search_answer_ctr.py
+7
-7
search_diary_ctr.py
task/search_diary_ctr.py
+7
-7
search_meigou_ctr.py
task/search_meigou_ctr.py
+7
-7
search_strategy_d.py
task/search_strategy_d.py
+5
-5
search_tractate_ctr.py
task/search_tractate_ctr.py
+7
-7
spark_temp.py
task/spark_temp.py
+5
-5
spark_test.py
task/spark_test.py
+7
-7
tractate_analysis_in_7000.py
task/tractate_analysis_in_7000.py
+5
-5
内容灰度数据.sql
task/内容灰度数据.sql
+1
-1
func_from_es_get_article.py
utils/func_from_es_get_article.py
+2
-2
func_get_pv_card_id.py
utils/func_get_pv_card_id.py
+3
-3
No files found.
new_user_analysis/new_user_behavior_analysis.py
View file @
285e11fa
...
...
@@ -71,11 +71,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"new_user_project_protratit"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
new_user_analysis/new_user_has_protratit_rate.py
View file @
285e11fa
...
...
@@ -29,8 +29,8 @@ from meta_base_code.utils.func_from_redis_get_portrait import *
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -57,11 +57,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"new_user_has_protratit_rate"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -214,8 +214,8 @@ WHERE spam_pv.device_id IS NULL
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
@@ -268,8 +268,8 @@ WHERE spam_pv.device_id IS NULL
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
new_user_analysis/new_user_project_analysis.py
View file @
285e11fa
...
...
@@ -71,11 +71,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"new_user_project_protratit"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
new_user_analysis/new_user_project_protratit.py
View file @
285e11fa
...
...
@@ -72,11 +72,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"new_user_project_protratit"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -326,8 +326,8 @@ WHERE spam_pv.device_id IS NULL
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
new_user_analysis/new_user_word_count.py
View file @
285e11fa
...
...
@@ -58,11 +58,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"test"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
new_user_analysis/user_behavior_path.py
View file @
285e11fa
...
...
@@ -29,8 +29,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -57,11 +57,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"user_behavior_path"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -980,8 +980,8 @@ on t1.device_id = t2.cl_id""".format(partition_date=today_str)
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
output/meigou_huidu_huisu.py
View file @
285e11fa
...
...
@@ -28,8 +28,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +56,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"meigou_huidu_huisu"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
output/out_put_diary_0923.py
View file @
285e11fa
...
...
@@ -70,8 +70,8 @@ task_days = 2
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
output/out_put_user_post_each_strategy.py
View file @
285e11fa
...
...
@@ -64,8 +64,8 @@ from meta_base_code.utils.func_from_es_get_article import get_es_article_num, ge
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -92,11 +92,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"out_put_user_post_each_strategy"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
output/output_article_distribution_0917.py
View file @
285e11fa
...
...
@@ -44,11 +44,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"output_article_distribution_0917"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -67,8 +67,8 @@ task_days = 2
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
task/advertisement_strategy_d.py
View file @
285e11fa
...
...
@@ -23,16 +23,16 @@ from pyspark.sql import SparkSession, DataFrame
from
pyspark.sql.functions
import
lit
import
pytispark.pytispark
as
pti
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -59,11 +59,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
task/conent_detail_page_grayscale_ctr.py
View file @
285e11fa
...
...
@@ -26,8 +26,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -54,11 +54,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
@@ -87,8 +87,8 @@ device_df = spark.sql(sql)
device_df
.
show
(
1
,
False
)
sql_res
=
device_df
.
collect
()
print
(
"-----------------------------------------------------------------------------"
)
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'doris_olap
'
)
cursor
=
db
.
cursor
()
for
res
in
sql_res
:
# print(res)
...
...
task/core_indicators_monitoring.py
View file @
285e11fa
...
...
@@ -43,11 +43,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
@@ -542,8 +542,8 @@ FROM
sql_res
=
device_df
.
collect
()
for
res
in
sql_res
:
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod'
,
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap'
)
cursor
=
db
.
cursor
()
print
(
res
)
partition_date
=
res
.
partition_date
...
...
task/daily_search_word_count.py
View file @
285e11fa
...
...
@@ -45,11 +45,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"test"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -196,8 +196,8 @@ for t in range(0, task_days):
partition_date
=
str
(
now
+
datetime
.
timedelta
(
days
=-
1
))
tag_names_list_week
.
append
((
word
,
nums
,
uv
,
partition_date
))
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
insert_sql
=
"replace into daily_search_word_count(word, nums, uv,pid,partition_day) VALUES(
%
s,
%
s,
%
s,
%
s,
%
s)"
insert_list
=
[]
...
...
task/daily_search_word_count_fix.py
View file @
285e11fa
...
...
@@ -52,11 +52,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"test"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -214,8 +214,8 @@ for t in range(0, task_days):
partition_date
=
str
(
now
+
datetime
.
timedelta
(
days
=-
1
))
tag_names_list_week
.
append
((
word
,
nums
,
uv
,
partition_date
))
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
insert_sql
=
"replace into daily_search_word_count_fix(word, nums, uv,pid,partition_day) VALUES(
%
s,
%
s,
%
s,
%
s,
%
s)"
insert_list
=
[]
...
...
task/daily_search_word_count_last_two_year.py
View file @
285e11fa
...
...
@@ -97,11 +97,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"test"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -305,8 +305,8 @@ for t in range(1, task_days):
# pid = hashlib.md5((today_str + keywords).encode("utf8")).hexdigest()
tag_names_list_week
.
append
((
key_count
,
keywords
,
today_str
,
is_delete
,
servise_num
,
pv
,
is_tag
))
db
=
pymysql
.
connect
(
host
=
'172.16.
30.136
'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_
prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175
'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_
olap
'
)
# db = pymysql.connect(host='bj-cdb-6slgqwlc.sql.tencentcdb.com', port=62120, user='work', passwd='Gengmei1',
# db='doris_prod')
cursor
=
db
.
cursor
()
...
...
task/ecommerce_income_report.py
View file @
285e11fa
...
...
@@ -35,8 +35,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -63,11 +63,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"ecommerce_income_report"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
@@ -422,8 +422,8 @@ group by partition_day
cpc_item_pricing
=
cpc_proportion_expend_recharge_amount
/
cpc_click_num
cpc_flow_rat
=
cpc_click_num
/
welfare_pv
# tol_proportion_expend_amount
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
partition_date
=
yesterday_str
pid
=
hashlib
.
md5
(
partition_date
.
encode
(
"utf8"
))
.
hexdigest
()
...
...
task/high_quality_diary_analysis.py
View file @
285e11fa
...
...
@@ -46,8 +46,8 @@ from meta_base_code.utils.func_from_es_get_article import get_es_article_num, ge
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -74,11 +74,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"high_quality_diary_analysis"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
task/meigou_data.py
View file @
285e11fa
...
...
@@ -28,8 +28,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +56,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"meigou_data"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -195,8 +195,8 @@ order by 1
pid
=
hashlib
.
md5
(
(
partition_date
+
device_os_type
+
active_type
+
grey_type
)
.
encode
(
"utf8"
))
.
hexdigest
()
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
commit_sql
=
"""replace into meigou_data(partition_date,device_os_type,active_type,grey_type,clickpv_div_exposurepv,
ertiaopv_div_exposurepv,ertiaopv_div_clickpv,cpcpv_div_exposurepv,click_pv,exp_pv,two_click_pv,cpc_click_pv,cpc_exp_pv,
...
...
task/portary_article_distribution.py
View file @
285e11fa
...
...
@@ -31,8 +31,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -190,9 +190,8 @@ for redis_count,res in enumerate(user_portrait_scan):
)
# print(instert_sql)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy'
,
db
=
'jerry_prod'
)
db
=
pymysql
.
connect
(
host
=
'172.16.50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_olap'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
@@ -216,8 +215,8 @@ for page_type in range(3):
)
# print(instert_sql)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
task/recommend_strategy_d.py
View file @
285e11fa
...
...
@@ -22,16 +22,16 @@ from pyspark.sql import SparkSession, DataFrame
# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -58,11 +58,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
task/recommend_strategy_fix.py
View file @
285e11fa
...
...
@@ -22,14 +22,15 @@ from pyspark.sql import SparkSession, DataFrame
# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy'
,
db
=
'jerry_prod'
)
db
=
pymysql
.
connect
(
host
=
'172.16.50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_olap'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +57,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
task/search_answer_ctr.py
View file @
285e11fa
...
...
@@ -28,8 +28,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +56,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"search_answer_ctr"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -207,8 +207,8 @@ for t in range(0, task_days):
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
task/search_diary_ctr.py
View file @
285e11fa
...
...
@@ -28,8 +28,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +56,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"search_diary_ctr"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
@@ -198,8 +198,8 @@ group by t1.partition_date,device_os_type,active_type,channel
print
(
instert_sql_diary
)
print
(
instert_sql_qa
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql_diary
)
res
=
cursor
.
execute
(
instert_sql_qa
)
...
...
task/search_meigou_ctr.py
View file @
285e11fa
...
...
@@ -28,8 +28,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +56,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
.
config
(
"spark.tispark.pd.addresses"
,
"172.16.40.170:2379"
)
.
appName
(
...
...
@@ -207,8 +207,8 @@ for t in range(0, task_days):
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
task/search_strategy_d.py
View file @
285e11fa
...
...
@@ -24,8 +24,8 @@ from pyspark.sql import SparkSession, DataFrame
# from pyspark.sql.functions import lit
# import pytispark.pytispark as pti
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
startTime
=
time
.
time
()
...
...
@@ -50,12 +50,12 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"spark.sql.parquet.compression.codec"
,
"snappy"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
# sparkConf.set("spark.executor.extraJavaOptions", "-Djava.library.path=HADOOP_HOME/lib/native")
sparkConf
.
set
(
"spark.driver.extraLibraryPath"
,
"/opt/hadoop/lib/native"
)
# sparkConf.set("spark.driver.extraJavaOptions", "-Djava.library.path=HADOOP_HOME/lib/native")
...
...
task/search_tractate_ctr.py
View file @
285e11fa
...
...
@@ -28,8 +28,8 @@ from pyspark.sql import SparkSession, DataFrame
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -56,11 +56,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"search_tractate_ctr"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -206,8 +206,8 @@ for t in range(0, task_days):
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
task/spark_temp.py
View file @
285e11fa
...
...
@@ -35,8 +35,8 @@ from meta_base_code.utils.func_from_redis_get_portrait import *
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -63,11 +63,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"new_user_has_protratit_rate"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
task/spark_test.py
View file @
285e11fa
...
...
@@ -31,8 +31,8 @@ def con_sql(sql):
# db = pymysql.connect(host='172.16.40.158', port=4000, user='st_user', passwd='aqpuBLYzEV7tML5RPsN1pntUzFy',
# db='jerry_prod')
db
=
pymysql
.
connect
(
host
=
'172.16.
30.136
'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_
prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175
'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm'
,
db
=
'doris_
olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -59,11 +59,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"new_user_has_protratit_rate"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
@@ -233,8 +233,8 @@ WHERE spam_pv.device_id IS NULL
)
print
(
instert_sql
)
# cursor.execute("set names 'UTF8'")
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
res
=
cursor
.
execute
(
instert_sql
)
db
.
commit
()
...
...
task/tractate_analysis_in_7000.py
View file @
285e11fa
...
...
@@ -44,8 +44,8 @@ from meta_base_code.utils.func_from_es_get_article import get_es_article_num, ge
def
con_sql
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
@@ -72,11 +72,11 @@ sparkConf.set("prod.gaia.jdbcuri",
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
sparkConf
.
setAppName
(
"tractate_analysis_in_7000"
)
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
task/内容灰度数据.sql
View file @
285e11fa
...
...
@@ -279,7 +279,7 @@ FROM pm.tl_pm_contentpage_ctr
WHERE
partition_day
>=
'20200730'
and
partition_day
<=
regexp_replace
(
DATE_SUB
(
current_date
,
1
)
,
'-'
,
''
)
order
by
`日期`
desc
,
`设备类型`
,
`活跃类型`
,
`灰度`
,
`页面`
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
--
sparkConf.set("prod.jerry.jdbcuri", "jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
CREATE
TABLE
conent_detail_page_grayscale_ctr
(
day_id
varchar
(
100
),
...
...
utils/func_from_es_get_article.py
View file @
285e11fa
...
...
@@ -19,8 +19,8 @@ es = Elasticsearch([
def
con_sql_jerry_prod
(
sql
):
# 从数据库的表里获取数据
db
=
pymysql
.
connect
(
host
=
'172.16.
40.158'
,
port
=
4000
,
user
=
'st_user'
,
passwd
=
'aqpuBLYzEV7tML5RPsN1pntUzFy
'
,
db
=
'
jerry_prod
'
)
db
=
pymysql
.
connect
(
host
=
'172.16.
50.175'
,
port
=
3306
,
user
=
'doris'
,
passwd
=
'o5gbA27hXHHm
'
,
db
=
'
doris_olap
'
)
cursor
=
db
.
cursor
()
cursor
.
execute
(
sql
)
result
=
cursor
.
fetchall
()
...
...
utils/func_get_pv_card_id.py
View file @
285e11fa
...
...
@@ -45,11 +45,11 @@ def get_card_id():
"jdbc:mysql://172.16.30.143/zhengxing?user=work&password=BJQaT9VzDcuPBqkd&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.tidb.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/eagle?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
sparkConf
.
set
(
"prod.jerry.jdbcuri"
,
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true"
)
#
sparkConf.set("prod.jerry.jdbcuri",
#
"jdbc:mysql://172.16.40.158:4000/jerry_prod?user=st_user&password=aqpuBLYzEV7tML5RPsN1pntUzFy&rewriteBatchedStatements=true")
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.158:2379"
)
sparkConf
.
set
(
"prod.tispark.pd.addresses"
,
"172.16.40.170:4000"
)
sparkConf
.
set
(
"prod.tidb.database"
,
"jerry_prod"
)
#
sparkConf.set("prod.tidb.database", "jerry_prod")
spark
=
(
SparkSession
.
builder
.
config
(
conf
=
sparkConf
)
.
config
(
"spark.sql.extensions"
,
"org.apache.spark.sql.TiExtensions"
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment