# Kafka 巡检关键命令
# 一、基础信息收集
$KAFKA_BIN/kafka-broker-api-versions.sh \
--bootstrap-server $BROKERS
$KAFKA_BIN/kafka-controller-info.sh \
--bootstrap-server $BROKERS
$KAFKA_BIN/kafka-metadata-shell.sh \
--bootstrap-server $BROKERS
# 二、Broker 节点信息
$KAFKA_BIN/zookeeper-shell.sh $ZK_CONNECT ls /brokers/ids
$KAFKA_BIN/kafka-configs.sh \
--bootstrap-server $BROKERS \
--entity-type brokers \
--describe
for id in $(seq 0 $((${BROKER_COUNT}-1)));
do
echo "检查 Broker $id ..."
$KAFKA_BIN/kafka-configs.sh \
--bootstrap-server $BROKERS \
--entity-type brokers \
--entity-name $id --describe
done
# 三、Topic 信息检查
$KAFKA_BIN/kafka-topics.sh \
--bootstrap-server $BROKERS \
--list
$KAFKA_BIN/kafka-topics.sh \
--bootstrap-server $BROKERS \
--describe \
--topic $TOPIC
$KAFKA_BIN/kafka-topics.sh \
--bootstrap-server $BROKERS \
--describe \
| grep -E 'Leader|Isr'
# 四、集群状态与监控指标
$KAFKA_BIN/kafka-broker-api-versions.sh \
--bootstrap-server $BROKERS \
| grep Controller
$KAFKA_BIN/kafka-topics.sh \
--bootstrap-server $BROKERS \
--describe \
| grep "UnderReplicated"
$KAFKA_BIN/kafka-topics.sh \
--bootstrap-server $BROKERS \
--describe \
| grep "Leader: -1"
$KAFKA_BIN/kafka-topics.sh \
--bootstrap-server $BROKERS \
--describe \
| grep "Offline"
# 五、消费组与滞后检查
$KAFKA_BIN/kafka-consumer-groups.sh \
--bootstrap-server $BROKERS \
--list
$KAFKA_BIN/kafka-consumer-groups.sh \
--bootstrap-server $BROKERS \
--describe \
--group $GROUP
$KAFKA_BIN/kafka-consumer-groups.sh \
--bootstrap-server $BROKERS \
--all-groups \
--describe \
| grep -v 'LAG *0'
# 六、JMX 或系统级监控(可选)
jps -v | grep Kafka
jmxterm -l service:jmx:rmi:///jndi/rmi://$BROKER_HOST:9999/jmxrmi -n -v silent
top -b -n1 | head -20
# 七、日志与错误分析
grep -E "ERROR|WARN" /var/log/kafka/server.log | tail -n 50
grep "Controller" /var/log/kafka/server.log | tail -n 20
$KAFKA_BIN/zookeeper-shell.sh $ZK_CONNECT get /zookeeper/config
# 八、集群版本与环境信息
$KAFKA_BIN/kafka-topics.sh --version
java -version
cat /etc/os-release
原创不易,转载时请标明作者及出处。
作者:打个小肥鸡
转自:https://www.sretalk.com/?p=147