目录
输入和输出集成概述
VMware vSphere Telegraf 插件提供了一种从 VMware vCenter 服务器收集指标的方法,从而可以全面监控和管理 vSphere 环境中的虚拟资源。
Telegraf Elasticsearch 插件无缝地将指标发送到 Elasticsearch 服务器。该插件处理模板创建和动态索引管理,并支持各种 Elasticsearch 特定的功能,以确保数据格式正确,以便存储和检索。
集成详情
VMware vSphere
该插件连接到 VMware vSphere 服务器,以收集来自虚拟环境的各种指标,从而实现对虚拟资源的高效监控和管理。它与 vSphere API 接口,收集关于集群、主机、资源池、虚拟机、数据存储和 vSAN 实体的统计信息,并以适合分析和可视化的格式呈现它们。该插件对于管理基于 VMware 的基础设施的管理员尤其有价值,因为它有助于实时跟踪系统性能、资源使用情况和操作问题。通过聚合来自多个来源的数据,该插件使用户能够获得洞察力,从而促进关于资源分配、故障排除和确保最佳系统性能的明智决策。此外,对 secret-store 集成的支持允许安全地处理敏感凭据,从而促进安全和合规性评估方面的最佳实践。
Elasticsearch
此插件将指标写入 Elasticsearch,Elasticsearch 是一种分布式、RESTful 搜索和分析引擎,能够以近乎实时的速度存储大量数据。它旨在处理 Elasticsearch 5.x 到 7.x 版本,并利用其动态模板功能来正确管理数据类型映射。该插件支持高级功能,例如模板管理、动态索引命名以及与 OpenSearch 的集成。它还允许配置 Elasticsearch 节点的身份验证和运行状况监控。
配置
VMware vSphere
[[inputs.vsphere]]
vcenters = [ "https://vcenter.local/sdk" ]
username = "[email protected]"
password = "secret"
vm_metric_include = [
"cpu.demand.average",
"cpu.idle.summation",
"cpu.latency.average",
"cpu.readiness.average",
"cpu.ready.summation",
"cpu.run.summation",
"cpu.usagemhz.average",
"cpu.used.summation",
"cpu.wait.summation",
"mem.active.average",
"mem.granted.average",
"mem.latency.average",
"mem.swapin.average",
"mem.swapinRate.average",
"mem.swapout.average",
"mem.swapoutRate.average",
"mem.usage.average",
"mem.vmmemctl.average",
"net.bytesRx.average",
"net.bytesTx.average",
"net.droppedRx.summation",
"net.droppedTx.summation",
"net.usage.average",
"power.power.average",
"virtualDisk.numberReadAveraged.average",
"virtualDisk.numberWriteAveraged.average",
"virtualDisk.read.average",
"virtualDisk.readOIO.latest",
"virtualDisk.throughput.usage.average",
"virtualDisk.totalReadLatency.average",
"virtualDisk.totalWriteLatency.average",
"virtualDisk.write.average",
"virtualDisk.writeOIO.latest",
"sys.uptime.latest",
]
host_metric_include = [
"cpu.coreUtilization.average",
"cpu.costop.summation",
"cpu.demand.average",
"cpu.idle.summation",
"cpu.latency.average",
"cpu.readiness.average",
"cpu.ready.summation",
"cpu.swapwait.summation",
"cpu.usage.average",
"cpu.usagemhz.average",
"cpu.used.summation",
"cpu.utilization.average",
"cpu.wait.summation",
"disk.deviceReadLatency.average",
"disk.deviceWriteLatency.average",
"disk.kernelReadLatency.average",
"disk.kernelWriteLatency.average",
"disk.numberReadAveraged.average",
"disk.numberWriteAveraged.average",
"disk.read.average",
"disk.totalReadLatency.average",
"disk.totalWriteLatency.average",
"disk.write.average",
"mem.active.average",
"mem.latency.average",
"mem.state.latest",
"mem.swapin.average",
"mem.swapinRate.average",
"mem.swapout.average",
"mem.swapoutRate.average",
"mem.totalCapacity.average",
"mem.usage.average",
"mem.vmmemctl.average",
"net.bytesRx.average",
"net.bytesTx.average",
"net.droppedRx.summation",
"net.droppedTx.summation",
"net.errorsRx.summation",
"net.errorsTx.summation",
"net.usage.average",
"power.power.average",
"storageAdapter.numberReadAveraged.average",
"storageAdapter.numberWriteAveraged.average",
"storageAdapter.read.average",
"storageAdapter.write.average",
"sys.uptime.latest",
]
datacenter_metric_include = [] ## if omitted or empty, all metrics are collected
datacenter_metric_exclude = [ "*" ] ## Datacenters are not collected by default.
vsan_metric_include = [] ## if omitted or empty, all metrics are collected
vsan_metric_exclude = [ "*" ] ## vSAN are not collected by default.
separator = "_"
max_query_objects = 256
max_query_metrics = 256
collect_concurrency = 1
discover_concurrency = 1
object_discovery_interval = "300s"
timeout = "60s"
use_int_samples = true
custom_attribute_include = []
custom_attribute_exclude = ["*"]
metric_lookback = 3
ssl_ca = "/path/to/cafile"
ssl_cert = "/path/to/certfile"
ssl_key = "/path/to/keyfile"
insecure_skip_verify = false
historical_interval = "5m"
disconnected_servers_behavior = "error"
use_system_proxy = true
http_proxy_url = ""
Elasticsearch
[[outputs.elasticsearch]]
## The full HTTP endpoint URL for your Elasticsearch instance
## Multiple urls can be specified as part of the same cluster,
## this means that only ONE of the urls will be written to each interval
urls = [ "http://node1.es.example.com:9200" ] # required.
## Elasticsearch client timeout, defaults to "5s" if not set.
timeout = "5s"
## Set to true to ask Elasticsearch a list of all cluster nodes,
## thus it is not necessary to list all nodes in the urls config option
enable_sniffer = false
## Set to true to enable gzip compression
enable_gzip = false
## Set the interval to check if the Elasticsearch nodes are available
## Setting to "0s" will disable the health check (not recommended in production)
health_check_interval = "10s"
## Set the timeout for periodic health checks.
# health_check_timeout = "1s"
## HTTP basic authentication details.
## HTTP basic authentication details
# username = "telegraf"
# password = "mypassword"
## HTTP bearer token authentication details
# auth_bearer_token = "eyJhbGciOiJIUzI1NiIsInR5cCI6IkpXVCJ9"
## Index Config
## The target index for metrics (Elasticsearch will create if it not exists).
## You can use the date specifiers below to create indexes per time frame.
## The metric timestamp will be used to decide the destination index name
# %Y - year (2016)
# %y - last two digits of year (00..99)
# %m - month (01..12)
# %d - day of month (e.g., 01)
# %H - hour (00..23)
# %V - week of the year (ISO week) (01..53)
## Additionally, you can specify a tag name using the notation {{tag_name}}
## which will be used as part of the index name. If the tag does not exist,
## the default tag value will be used.
# index_name = "telegraf-{{host}}-%Y.%m.%d"
# default_tag_value = "none"
index_name = "telegraf-%Y.%m.%d" # required.
## Optional Index Config
## Set to true if Telegraf should use the "create" OpType while indexing
# use_optype_create = false
## Optional TLS Config
# tls_ca = "/etc/telegraf/ca.pem"
# tls_cert = "/etc/telegraf/cert.pem"
# tls_key = "/etc/telegraf/key.pem"
## Use TLS but skip chain & host verification
# insecure_skip_verify = false
## Template Config
## Set to true if you want telegraf to manage its index template.
## If enabled it will create a recommended index template for telegraf indexes
manage_template = true
## The template name used for telegraf indexes
template_name = "telegraf"
## Set to true if you want telegraf to overwrite an existing template
overwrite_template = false
## If set to true a unique ID hash will be sent as sha256(concat(timestamp,measurement,series-hash)) string
## it will enable data resend and update metric points avoiding duplicated metrics with different id's
force_document_id = false
## Specifies the handling of NaN and Inf values.
## This option can have the following values:
## none -- do not modify field-values (default); will produce an error if NaNs or infs are encountered
## drop -- drop fields containing NaNs or infs
## replace -- replace with the value in "float_replacement_value" (default: 0.0)
## NaNs and inf will be replaced with the given number, -inf with the negative of that number
# float_handling = "none"
# float_replacement_value = 0.0
## Pipeline Config
## To use a ingest pipeline, set this to the name of the pipeline you want to use.
# use_pipeline = "my_pipeline"
## Additionally, you can specify a tag name using the notation {{tag_name}}
## which will be used as part of the pipeline name. If the tag does not exist,
## the default pipeline will be used as the pipeline. If no default pipeline is set,
## no pipeline is used for the metric.
# use_pipeline = "{{es_pipeline}}"
# default_pipeline = "my_pipeline"
#
# Custom HTTP headers
# To pass custom HTTP headers please define it in a given below section
# [outputs.elasticsearch.headers]
# "X-Custom-Header" = "custom-value"
## Template Index Settings
## Overrides the template settings.index section with any provided options.
## Defaults provided here in the config
# template_index_settings = {
# refresh_interval = "10s",
# mapping.total_fields.limit = 5000,
# auto_expand_replicas = "0-1",
# codec = "best_compression"
# }
输入和输出集成示例
VMware vSphere
-
动态资源分配:利用此插件来监控虚拟机群的资源使用情况,并根据性能指标自动调整资源分配。这种情况可能涉及根据从 vSphere API 收集的 CPU 和内存使用率指标实时触发扩展操作,从而确保最佳性能和成本效率。
-
容量规划和预测:利用从 vSphere 收集的历史指标来进行容量规划。分析 CPU、内存和存储使用量随时间变化的趋势,有助于管理员预测何时需要额外资源,从而避免中断并确保虚拟基础设施能够应对增长。
-
自动化警报和事件响应:将此插件与警报工具集成,以根据收集的指标设置自动通知。例如,如果主机上的 CPU 使用率超过指定阈值,则可能会触发警报并自动启动预定义的补救步骤,例如将虚拟机迁移到利用率较低的主机。
-
跨集群的性能基准测试:使用收集的指标来比较不同 vCenter 中集群的性能。此基准测试提供了关于哪些集群配置产生最佳资源效率的见解,并可以指导未来的基础设施增强。
Elasticsearch
-
基于时间的索引:使用此插件将指标存储在 Elasticsearch 中,以根据收集时间为每个指标建立索引。例如,CPU 指标可以存储在名为
telegraf-2023.01.01
的每日索引中,从而方便基于时间的查询和保留策略。 -
动态模板管理:利用模板管理功能自动创建针对您的指标定制的自定义模板。这允许您定义如何索引和分析不同的字段,而无需手动配置 Elasticsearch,从而确保最佳的查询数据结构。
-
OpenSearch 兼容性:如果您正在使用 AWS OpenSearch,则可以通过激活兼容模式来配置此插件以无缝工作,从而确保您现有的 Elasticsearch 客户端保持功能并与较新的集群设置兼容。
反馈
感谢您成为我们社区的一份子!如果您有任何一般性反馈或在这些页面上发现了任何错误,我们欢迎并鼓励您提出意见。请在 InfluxDB 社区 Slack 中提交您的反馈。