Commit 94339d50 authored by 胡凯旋's avatar 胡凯旋

Merge branch 'hkx/feature/multi-platform-support' into 'master'

Hkx/feature/multi platform support

See merge request hukaixuan/gm-aliyun-rds-exporter!1
parents 6180d06b fb5fb180
......@@ -63,4 +63,6 @@ target/
.vscode/
.env/
.env2/
.python-version
\ No newline at end of file
.python-version
config.yaml
\ No newline at end of file
### GM-MYSQL_EXPORTER
\ No newline at end of file
# coding=utf-8
import yaml
with open('config.yaml', 'r') as f:
config = yaml.load(f)
############################ aliyun configuration ############################################
# 要监控的数据库实例
rds_instances:
rdsfewzdmf0jfjp9un8xj:
name: prod.zhengxing
aliyun_instances:
instance_id:
name: instance_name
# 连接数利用率因子,连接数利用率*该因子得到一个较为统一的标准,默认1
connection_usage_factor: 1
# iops利用率因子, 默认 1
......@@ -10,55 +11,14 @@ rds_instances:
cpu_usage_factor: 1
# 内存利用率因子
memory_usage_factor: 1
# 数据库类型,主库or从库, 默认master
# 数据库类型,主库(master) or 从库(slave), 默认master
type: master
rdsg2mpi623x6wioo796k:
name: prod.readonly2.zhengxing
type: slave
rm-m5e63m5z6o4re1p49:
name: mars.prod
connection_usage_factor: 0.1
rm-m5e842126ng59jrv6:
name: data.prod
connection_usage_factor: 0.1
rm-m5eu1wmu0zt6j5n72:
name: sms.prod
rr-m5e2vy0ke9q35kst5:
name: prod.readonly.zhengxing
type: slave
rm-m5e90y907sw3bnz88:
name: common.prod
rr-m5e2n024w0z293r5f:
name: common.readonly
type: slave
rr-m5efx2m67mi002jq9:
name: prod.readonly3.zhengxing
type: slave
rm-m5emg41za2w7l6au3:
name: mimas_prod
rm-m5ey2s823bq0lc616:
name: bran_prod
rr-m5et21lafq1677pid:
name: mimas_readonly
type: slave
rr-m5e3036a5l18ym4t6:
name: mimas_readonly01
type: slave
instance_id2:
name: instance_name2
# 要监控的数据库指标
metrics:
aliyun_metrics:
# 阿里云SDK查询字段
CpuUsage:
# prometheus metric name
......@@ -80,7 +40,55 @@ metrics:
# aliyun key and secret
aliyun_settings:
key:
secret:
key: xxx
secret: xxx
############################ aliyun configuration ############################################
############################ tencent configuration ############################################
# 要监控的数据库实例
tencent_instances:
instance_id:
name: instance_name
# 连接数利用率因子,连接数利用率*该因子得到一个较为统一的标准,默认1
connection_usage_factor: 1
# iops利用率因子, 默认 1
iops_usage_factor: 1
# cpu利用率因子
cpu_usage_factor: 1
# 内存利用率因子
memory_usage_factor: 1
# 数据库类型,主库(master)or从库(slave), 默认master
type: master
instance_id2:
name: instance_name2
# 要监控的数据库指标
tencent_metrics:
# 阿里云SDK查询字段
CPUUseRate:
# prometheus metric name
name: rds_cpu_usage_ratio
# CPU利用率因子,CPU利用率*该因子得到一个较为统一的标准
factor: cpu_usage_factor
MemoryUse:
name: rds_memory_usage_ratio
factor: memory_usage_factor
ThreadsConnected:
name: rds_connection_usage_ratio
factor: connection_usage_factor
# IOPSUsage:
# name: rds_iops_usage_ratio
# factor: iops_usage_factor
# tencent key and secret
tencent_settings:
secret_id: xxx
secret_key: xxx
data_interval: 60
############################ tencent configuration ############################################
\ No newline at end of file
# coding=utf-8
import abc
import math
import pprint
from prometheus_client.core import GaugeMetricFamily
from conf import config
from utils.tencent import Monitor
from utils.aliyun import CMS
class Exporter(object):
__metaclass__ = abc.ABCMeta
@abc.abstractmethod
def export_prometheus_data(self):
pass
class TencentExporter(Exporter):
def __init__(self):
self.monitor = Monitor()
self.instances = config['tencent_instances']
self.metrics = config['tencent_metrics']
self.data_interval = config['tencent_settings']['data_interval']
def export_prometheus_data(self):
data = self._fetch_monitor_data()
return self._transfer_to_prometheus_format(data)
def _fetch_monitor_data(self):
data_from_tencent = {}
instance_id_list = self.instances.keys()
# 遍历所有指标
for label in self.metrics.keys():
data_from_tencent[label] = []
# 一次批量取 10 个实例的监控数据
for i in range(0, int(math.ceil(len(instance_id_list)/10.0))):
instance_to_get = instance_id_list[10*i : 10*(i+1)]
try:
res = self.monitor.get_cdb_metric(instance_to_get, label, self.data_interval)
# print(res)
data_from_tencent[label].extend(res['DataPoints'])
except Exception as e:
print(e)
return data_from_tencent
def _transfer_to_prometheus_format(self, data):
# print(data)
metrics_to_export = []
service_provider = "tencent"
for label, instance_data_list in data.items():
metric = self.metrics[label]
metricname = metric['name']
factor = metric.get('factor', None)
avg_metric_family = GaugeMetricFamily(metricname, metricname, labels=[
'rds_name', 'type', 'service_provider'
])
with_factor_metricname = '{}_with_factor'.format(metricname)
with_factor_metric_family = GaugeMetricFamily(with_factor_metricname, with_factor_metricname, labels=[
'rds_name', 'type', 'service_provider'
])
for instance_data in instance_data_list:
timestamp_value_dict = dict(zip(instance_data['Timestamps'], instance_data['Values']))
instance_id = instance_data['Dimensions'][0]['Value']
instance = self.instances[instance_id]
instance_name = instance['name']
instance_type = instance.get('type', 'master')
# 当前queryname,当前rds实例的因子,若没有配置,默认为1
rds_factor = instance.get(factor, 1)
for timestamp, value in timestamp_value_dict.items():
avg_metric_family.add_metric([instance_name, instance_type, service_provider], value, timestamp)
with_factor_metric_family.add_metric(
['{}*{}'.format(instance_name,rds_factor), instance_type, service_provider],
value*rds_factor, timestamp)
metrics_to_export.extend([avg_metric_family, with_factor_metric_family])
return metrics_to_export
class AliyunExporter(Exporter):
def __init__(self):
self.monitor = CMS()
self.instances = config['aliyun_instances']
self.metrics = config['aliyun_metrics']
def export_prometheus_data(self):
data = self._fetch_monitor_data()
return self._transfer_to_prometheus_format(data)
def _fetch_monitor_data(self):
metrics_from_aliyun = {}
all_rds_id = self.instances.keys()
for label in self.metrics.keys():
metrics_from_aliyun[label] = []
for i in range(0, int(math.ceil(len(all_rds_id)/10.0))):
instance_id_list = all_rds_id[10*i : 10*(i+1)]
try:
res = self.monitor.get_rds_metric(instance_id_list, label)
metrics_from_aliyun[label].extend(res['Datapoints'])
except Exception as e:
print(e)
return metrics_from_aliyun
def _transfer_to_prometheus_format(self, data):
metrics_to_export = []
service_provider = "aliyun"
for queryname, datapoints in data.items():
metric = self.metrics[queryname]
metricname = metric['name']
factor = metric.get('factor', None)
avg_metric_family = GaugeMetricFamily(metricname, metricname,
labels=['rds_name', 'type', 'service_provider'])
max_metricname = '{}_max'.format(metricname)
max_metric_family = GaugeMetricFamily(max_metricname,
max_metricname, labels=['rds_name', 'type', 'service_provider'])
with_factor_metricname = '{}_with_factor'.format(metricname)
with_factor_metric_family = GaugeMetricFamily(with_factor_metricname,
with_factor_metricname, labels=['rds_name', 'type', 'service_provider'])
for point in datapoints:
rds = self.instances[point['instanceId']]
rds_name = rds['name']
rds_type = rds.get('type', 'master')
# 当前queryname,当前rds实例的因子,若没有配置,默认为1
rds_factor = rds.get(factor, 1)
avg_metric_family.add_metric([rds_name, rds_type, service_provider],
point['Average']/100.0, point['timestamp']/1000)
max_metric_family.add_metric([rds_name, rds_type, service_provider],
point['Maximum']/100.0, point['timestamp']/1000)
with_factor_metric_family.add_metric(['{}*{}'.format(rds_name,rds_factor), rds_type, service_provider],
point['Average']*rds_factor/100.0, point['timestamp']/1000)
metrics_to_export.extend([avg_metric_family, max_metric_family, with_factor_metric_family])
return metrics_to_export
if __name__ == "__main__":
# tencent_exporter = TencentExporter()
# data = tencent_exporter.export_prometheus_data()
# # print(data)
aliyun_exporter = AliyunExporter()
data = aliyun_exporter._fetch_monitor_data()
# print(data)
......@@ -6,76 +6,21 @@ import math
import yaml
from prometheus_client import start_http_server
from prometheus_client.core import GaugeMetricFamily, REGISTRY
from prometheus_client.core import REGISTRY
from aliyun import CMS
aliyun_cms = CMS()
with open('config.yaml', 'r') as f:
config = yaml.load(f)
# rds_instances = settings.RDS_INSTANCES
rds_instances = config['rds_instances']
queryname_metricname = config['metrics']
from exporter import TencentExporter, AliyunExporter
class RDSCollector(object):
def __init__(self):
self.exporter_list = [
TencentExporter(),
# AliyunExporter(),
]
def collect(self):
metrics = []
# 阿里云SDK查询字段与prometheus metric name的映射
# queryname_metricname = {
# 'CpuUsage': 'rds_cpu_usage_ratio',
# # 'DiskUsage': 'rds_disk_usage_ratio',
# 'MemoryUsage': 'rds_memory_usage_radio',
# 'ConnectionUsage': 'rds_connection_usage_radio',
# 'IOPSUsage': 'rds_iops_usage_radio',
# }
metrics_from_aliyun = {}
# all_rds_id = list(rds_instances.keys())
all_rds_id = rds_instances.keys()
# get metrics from aliyun
for label in queryname_metricname.keys():
metrics_from_aliyun[label] = []
for i in range(0, int(math.ceil(len(all_rds_id)/10.0))):
instance_id_list = all_rds_id[10*i : 10*(i+1)]
try:
res = aliyun_cms.get_rds_metric(instance_id_list, label)
metrics_from_aliyun[label].extend(res['Datapoints'])
except Exception as e:
print(e)
# transform to prometheus metric format
for queryname, datapoints in metrics_from_aliyun.items():
metric = queryname_metricname[queryname]
metricname = metric['name']
factor = metric.get('factor', None)
avg_metric_family = GaugeMetricFamily(metricname, metricname,
labels=['rds_name', 'type'])
max_metricname = '{}_max'.format(metricname)
max_metric_family = GaugeMetricFamily(max_metricname,
max_metricname, labels=['rds_name', 'type'])
with_factor_metricname = '{}_with_factor'.format(metricname)
with_factor_metric_family = GaugeMetricFamily(with_factor_metricname,
with_factor_metricname, labels=['rds_name', 'type'])
for point in datapoints:
# rds_name = rds_instances[point['instanceId']]
rds = rds_instances[point['instanceId']]
rds_name = rds['name']
rds_type = rds.get('type', 'master')
# 当前queryname,当前rds实例的因子,若没有配置,默认为1
rds_factor = rds.get(factor, 1)
avg_metric_family.add_metric([rds_name, rds_type],
point['Average']/100.0, point['timestamp']/1000)
max_metric_family.add_metric([rds_name, rds_type],
point['Maximum']/100.0, point['timestamp']/1000)
with_factor_metric_family.add_metric(['{}*{}'.format(rds_name,rds_factor), rds_type],
point['Average']*rds_factor/100.0, point['timestamp']/1000)
metrics.extend([avg_metric_family, max_metric_family,
with_factor_metric_family])
for exporter in self.exporter_list:
metrics.extend(exporter.export_prometheus_data())
for m in metrics:
yield m
......
# -*- coding: utf-8 -*-
import abc
import json
import time
import pprint
import yaml
from tencentcloud.common import credential
from tencentcloud.common.exception.tencent_cloud_sdk_exception import \
TencentCloudSDKException
from tencentcloud.common.profile.client_profile import ClientProfile
from tencentcloud.common.profile.http_profile import HttpProfile
from tencentcloud.monitor.v20180724 import models, monitor_client
from conf import config
from utils.time_tools import timestamp2isoformat
class Tencent(object):
def __init__(self):
self.secret_id = config['tencent_settings']['secret_id']
self.secret_key = config['tencent_settings']['secret_key']
self.cred = credential.Credential(self.secret_id, self.secret_key)
@abc.abstractmethod
def _get_client(self):
""" get client
"""
def json_format(self, response):
if isinstance(response, bytes):
response = response.decode()
return json.loads(response)
class Monitor(Tencent):
def __init__(self, region="ap-beijing", endpoint="monitor.tencentcloudapi.com"):
super(Monitor, self).__init__()
self.client = self._get_client(region, endpoint)
def _get_client(self, region, endpoint):
httpProfile = HttpProfile()
httpProfile.endpoint = endpoint
clientProfile = ClientProfile()
clientProfile.httpProfile = httpProfile
client = monitor_client.MonitorClient(self.cred, region, clientProfile)
return client
def _describe_base_metrics(self, params_dict):
assert isinstance(params_dict, dict)
req = models.DescribeBaseMetricsRequest()
req.from_json_string(json.dumps(params_dict))
resp = self.client.DescribeBaseMetrics(req)
return self.json_format(resp.to_json_string())
def _get_monitor_data(self, params_dict):
assert isinstance(params_dict, dict)
req = models.GetMonitorDataRequest()
req.from_json_string(json.dumps(params_dict))
resp = self.client.GetMonitorData(req)
return self.json_format(resp.to_json_string())
def get_cdb_metric(self, instance_id_list, metric, duration=60*5):
""" 批量获取腾讯云mysql实例监控指标
:param instance_id_list: mysql实例列表
:type instance_id_list: list
:param metric: 指标名称,详见腾讯云文档 https://cloud.tencent.com/document/api/248/30386#2.2-.E6.8C.87.E6.A0.87.E5.90.8D.E7.A7.B0
:type metric: string
:param duration: 过去多长时间监控数据的时间段时长, 单位(s)
:type duration: int
"""
now = int(time.time())
end_time_str = timestamp2isoformat(now)
start_time_str = timestamp2isoformat(now - duration)
params_dict = {
"Namespace": "QCE/CDB",
"MetricName": metric,
"Period": 5,
"Instances": [
{
"Dimensions": [
{
"Name": "InstanceId",
"Value": instance_id
}
]
} for instance_id in instance_id_list
],
"StartTime": start_time_str,
"EndTime": end_time_str
}
resp = self._get_monitor_data(params_dict)
return resp
# coding=utf-8
import arrow
def timestamp2isoformat(timestamp, tz="local"):
return arrow.get(timestamp).to(tz).isoformat()
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment