Loki、promtail、Grafana、Prometheus日志监控安装与配置

Loki,promtail配置文件下载
wget https://raw.githubusercontent.com/grafana/loki/master/cmd/loki/loki-local-config.yaml
wget https://raw.githubusercontent.com/grafana/loki/master/cmd/promtail/promtail-local-config.yaml


二进制包下载:
wget https://github.com/grafana/loki/releases/download/v1.6.0/loki-linux-amd64.zip
wget https://github.com/grafana/loki/releases/download/v1.6.0/promtail-linux-amd64.zip
wget https://mirrors.huaweicloud.com/grafana/7.1.5/grafana-7.1.5.linux-amd64.tar.gz
wget https://github.com/prometheus/prometheus/releases/download/v2.13.1/prometheus-2.13.1.linux-amd64.tar.gz

1

2

3

4

5

6

7

8

9

10

Loki,promtail配置文件下载

wget https://raw.githubusercontent.com/grafana/loki/master/cmd/loki/loki-local-config.yaml

wget https://raw.githubusercontent.com/grafana/loki/master/cmd/promtail/promtail-local-config.yaml

二进制包下载:

wget https://github.com/grafana/loki/releases/download/v1.6.0/loki-linux-amd64.zip

wget https://github.com/grafana/loki/releases/download/v1.6.0/promtail-linux-amd64.zip

wget https://mirrors.huaweicloud.com/grafana/7.1.5/grafana-7.1.5.linux-amd64.tar.gz

wget https://github.com/prometheus/prometheus/releases/download/v2.13.1/prometheus-2.13.1.linux-amd64.tar.gz

auth_enabled: false

server:
  http_listen_port: 3100
  grpc_listen_port: 39095 #grpc监听端口，默认为9095
  grpc_server_max_recv_msg_size: 15728640  #grpc最大接收消息值，默认4m
  grpc_server_max_send_msg_size: 15728640  #grpc最大发送消息值，默认4m

ingester:
  lifecycler:
    address: 192.168.66.178 #IP地址
    ring:
      kvstore:
        store: inmemory
      replication_factor: 1
    final_sleep: 0s
  chunk_idle_period: 5m
  chunk_retain_period: 30s
  max_transfer_retries: 0
  max_chunk_age: 20m  #一个timeseries块在内存中的最大持续时间。如果timeseries运行的时间超过此时间，则当前块将刷新到存储并创建一个新块

schema_config:
  configs:
    - from: 2018-04-15
      store: boltdb
      object_store: filesystem
      schema: v11
      index:
        prefix: index_
        period: 168h

storage_config:
  boltdb:
    directory: /opt/loki/loki_data/index

  filesystem:
    directory: /opt/loki/loki_data/chunks

limits_config:
  enforce_metric_name: false
  reject_old_samples: true
  reject_old_samples_max_age: 168h
  ingestion_rate_mb: 30  #修改每用户摄入速率限制，即每秒样本量，默认值为4M
  ingestion_burst_size_mb: 15  #修改每用户摄入速率限制，即每秒样本量，默认值为6M

chunk_store_config:
  max_look_back_period: 168h   #回看日志行的最大时间，只适用于即时日志

table_manager:
  retention_deletes_enabled: true #日志保留周期开关，默认为false
  retention_period: 168h  #日志保留周期

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

auth_enabled: false

server:

http_listen_port: 3100

grpc_listen_port: 39095 #grpc监听端口，默认为9095

grpc_server_max_recv_msg_size: 15728640 #grpc最大接收消息值，默认4m

grpc_server_max_send_msg_size: 15728640 #grpc最大发送消息值，默认4m

ingester:

lifecycler:

address: 192.168.66.178 #IP地址

ring:

kvstore:

store: inmemory

replication_factor: 1

final_sleep: 0s

chunk_idle_period: 5m

chunk_retain_period: 30s

max_transfer_retries: 0

max_chunk_age: 20m #一个timeseries块在内存中的最大持续时间。如果timeseries运行的时间超过此时间，则当前块将刷新到存储并创建一个新块

schema_config:

configs:

- from: 2018-04-15

store: boltdb

object_store: filesystem

schema: v11

index:

prefix: index_

period: 168h

storage_config:

boltdb:

directory: /opt/loki/loki_data/index

filesystem:

directory: /opt/loki/loki_data/chunks

limits_config:

enforce_metric_name: false

reject_old_samples: true

reject_old_samples_max_age: 168h

ingestion_rate_mb: 30 #修改每用户摄入速率限制，即每秒样本量，默认值为4M

ingestion_burst_size_mb: 15 #修改每用户摄入速率限制，即每秒样本量，默认值为6M

chunk_store_config:

max_look_back_period: 168h #回看日志行的最大时间，只适用于即时日志

table_manager:

retention_deletes_enabled: true #日志保留周期开关，默认为false

retention_period: 168h #日志保留周期

server:
  http_listen_port: 9080
  grpc_listen_port: 0
  grpc_server_max_recv_msg_size: 15728640
  grpc_server_max_send_msg_size: 15728640

positions:
  filename: /opt/loki/promtail/positions.yaml

clients:
  - url: http://192.168.66.178:3100/loki/api/v1/push

scrape_configs:
- job_name: app_log
  static_configs:
  - targets:
      - localhost
    labels:
      job: varlogs_178
      __path__: /var/log/*log
      #__path__: /ceph_root/{dev,test}/apps_data/*/log/*log

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

server:

http_listen_port: 9080

grpc_listen_port: 0

grpc_server_max_recv_msg_size: 15728640

grpc_server_max_send_msg_size: 15728640

positions:

filename: /opt/loki/promtail/positions.yaml

clients:

- url: http://192.168.66.178:3100/loki/api/v1/push

scrape_configs:

- job_name: app_log

static_configs:

- targets:

- localhost

labels:

job: varlogs_178

__path__: /var/log/*log

#__path__: /ceph_root/{dev,test}/apps_data/*/log/*log

loki:
mkdir -p /opt/loki
mkdir -p /opt/loki/loki_data
mv loki-linux-amd64 loki-local-config.yaml /opt/loki

promtail：
mkdir -p /opt/promtail
mv promtail-linux-amd64 promtail-local-config.yaml /opt/promtail

1

2

3

4

5

6

7

8

loki:

mkdir -p /opt/loki

mkdir -p /opt/loki/loki_data

mv loki-linux-amd64 loki-local-config.yaml /opt/loki

promtail：

mkdir -p /opt/promtail

mv promtail-linux-amd64 promtail-local-config.yaml /opt/promtail

useradd  -s /sbin/nologin -M loki
chown -R loki:loki /opt/loki/

1 2	useradd -s /sbin/nologin -M loki chown -R loki:loki /opt/loki/

vim /etc/systemd/system/loki.service

1	vim /etc/systemd/system/loki.service

[Unit]
Description=loki
Documentation=https://grafana.com/oss/loki/
After=network.target
[Service]
User=loki
Group=loki
Type=simple
ExecStart=/opt/loki/loki-linux-amd64 --config.file=/opt/loki/loki-local-config.yaml 
Restart=on-failure
[Install]
WantedBy=multi-user.target

1

2

3

4

5

6

7

8

9

10

11

12

[Unit]

Description=loki

Documentation=https://grafana.com/oss/loki/

After=network.target

[Service]

User=loki

Group=loki

Type=simple

ExecStart=/opt/loki/loki-linux-amd64 --config.file=/opt/loki/loki-local-config.yaml

Restart=on-failure

[Install]

WantedBy=multi-user.target

systemctl start loki
systemctl status loki
systemctl enable loki

1

2

3

systemctl start loki

systemctl status loki

systemctl enable loki

useradd  -s /sbin/nologin -M promtail
chown -R promtail:promtail /opt/promtail/

1 2	useradd -s /sbin/nologin -M promtail chown -R promtail:promtail /opt/promtail/

vim /etc/systemd/system/promtail.service

1	vim /etc/systemd/system/promtail.service

[Unit]
Description=promtail
Documentation=https://github.com/topics/promtail
After=network.target
[Service]
User=promtail
Group=promtail
Type=simple
ExecStart=/opt/promtail/promtail-linux-amd64 --config.file=/opt/promtail/promtail-local-config.yaml
Restart=on-failure
[Install]
WantedBy=multi-user.target

1

2

3

4

5

6

7

8

9

10

11

12

[Unit]

Description=promtail

Documentation=https://github.com/topics/promtail

After=network.target

[Service]

User=promtail

Group=promtail

Type=simple

ExecStart=/opt/promtail/promtail-linux-amd64 --config.file=/opt/promtail/promtail-local-config.yaml

Restart=on-failure

[Install]

WantedBy=multi-user.target

systemctl start promtail
systemctl status promtail
systemctl enable promtail

1

2

3

systemctl start promtail

systemctl status promtail

systemctl enable promtail

    Prometheus基于Golang编写，编译后的软件包，不依赖于任何的第三方依赖。用户只需要下载对应平台的二进制包，解压并且添加基本的配置即可正常启Prometheus Server。

1	Prometheus基于Golang编写，编译后的软件包，不依赖于任何的第三方依赖。用户只需要下载对应平台的二进制包，解压并且添加基本的配置即可正常启Prometheus Server。

global:
  scrape_interval:     15s # 设置抓取间隔，默认为1分钟
  evaluation_interval: 15s #估算规则的默认周期，每15秒计算一次规则。默认1分钟
  # scrape_timeout  #默认抓取超时，默认为10s

# Alertmanager相关配置
alerting:
  alertmanagers:
  - static_configs:
    - targets:
      # - alertmanager:9093

# 规则文件列表，使用'evaluation_interval' 参数去抓取
rule_files:
  # - "first_rules.yml"
  # - "second_rules.yml"

#  抓取配置列表
scrape_configs:
  - job_name: 'prometheus'
    static_configs:
    - targets: ['localhost:9090']

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

global:

scrape_interval: 15s # 设置抓取间隔，默认为1分钟

evaluation_interval: 15s #估算规则的默认周期，每15秒计算一次规则。默认1分钟

# scrape_timeout #默认抓取超时，默认为10s

# Alertmanager相关配置

alerting:

alertmanagers:

- static_configs:

- targets:

# - alertmanager:9093

# 规则文件列表，使用'evaluation_interval' 参数去抓取

rule_files:

# - "first_rules.yml"

# - "second_rules.yml"

# 抓取配置列表

scrape_configs:

- job_name: 'prometheus'

static_configs:

- targets: ['localhost:9090']

为了安全，使用普通用户来启动prometheus服务。作为一个时序型的数据库产品，prometheus的数据默认会存放在应用所在目录下

1	为了安全，使用普通用户来启动prometheus服务。作为一个时序型的数据库产品，prometheus的数据默认会存放在应用所在目录下

useradd  -s /sbin/nologin -M prometheus 
#创建数据目录
mkdir  /export/prometheus/data -p
#修改目录属主 
chown -R prometheus:prometheus /opt/prometheus/

1

2

3

4

5

useradd -s /sbin/nologin -M prometheus

#创建数据目录

mkdir /export/prometheus/data -p

#修改目录属主

chown -R prometheus:prometheus /opt/prometheus/

prometheus的启动很简单，只需要直接启动解压目录的二进制文件prometheus即可，但是为了更加方便对prometheus进行管理，这里使用systemd来启停prometheus

1	prometheus的启动很简单，只需要直接启动解压目录的二进制文件prometheus即可，但是为了更加方便对prometheus进行管理，这里使用systemd来启停prometheus

vim /etc/systemd/system/prometheus.service

1	vim /etc/systemd/system/prometheus.service

[Unit]
Description=Prometheus
Documentation=https://prometheus.io/
After=network.target
[Service]
Type=simple
User=prometheus
ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data
Restart=on-failure
[Install]
WantedBy=multi-user.target

1

2

3

4

5

6

7

8

9

10

11

[Unit]

Description=Prometheus

Documentation=https://prometheus.io/

After=network.target

[Service]

Type=simple

User=prometheus

ExecStart=/opt/prometheus/prometheus --config.file=/opt/prometheus/prometheus.yml --storage.tsdb.path=/opt/prometheus/data

Restart=on-failure

[Install]

WantedBy=multi-user.target

备注：在service文件里面，我们定义了启动的命令，可以定义数据存储路径，否则默认会在prometheus二进制的目录的data下

1	备注：在service文件里面，我们定义了启动的命令，可以定义数据存储路径，否则默认会在prometheus二进制的目录的data下

systemctl start prometheus
systemctl status prometheus
systemctl enable prometheus

1

2

3

systemctl start prometheus

systemctl status prometheus

systemctl enable prometheus

grafana:
useradd -s /sbin/nologin -M grafana
mkdir /opt/grafana/data
chown -R grafana:grafana /opt/grafana/
修改grafana配置文件：
vim /opt/grafana/conf/defaults.ini
修改内容如下:(一般保持默认即可)
data = /opt/grafana/data
logs = /opt/grafana/log
plugins = /opt/grafana/plugins
provisioning = /opt/grafana/conf/provisioning

1

2

3

4

5

6

7

8

9

10

11

grafana:

useradd -s /sbin/nologin -M grafana

mkdir /opt/grafana/data

chown -R grafana:grafana /opt/grafana/

修改grafana配置文件：

vim /opt/grafana/conf/defaults.ini

修改内容如下:(一般保持默认即可)

data = /opt/grafana/data

logs = /opt/grafana/log

plugins = /opt/grafana/plugins

provisioning = /opt/grafana/conf/provisioning

vim /etc/systemd/system/grafana-server.service

1	vim /etc/systemd/system/grafana-server.service

[Unit]
Description=Grafana
After=network.target

[Service]
User=grafana
Group=grafana
Type=notify
ExecStart=/opt/grafana/bin/grafana-server -homepath /opt/grafana
Restart=on-failure

[Install]
WantedBy=multi-user.target

1

2

3

4

5

6

7

8

9

10

11

12

13

[Unit]

Description=Grafana

After=network.target

[Service]

User=grafana

Group=grafana

Type=notify

ExecStart=/opt/grafana/bin/grafana-server -homepath /opt/grafana

Restart=on-failure

[Install]

WantedBy=multi-user.target

systemctl start  grafana-server
systemctl status  grafana-server
systemctl enable  grafana-server

1

2

3

systemctl start grafana-server

systemctl status grafana-server

systemctl enable grafana-server

grafana已经安装完毕。默认情况下，grafana-server会启动3000端口，使用浏览器打开grafana页面:http://IP:3000/login，输入默认的账号密码 admin/admin登录,第一次登陆需要修改密码。

1	grafana已经安装完毕。默认情况下，grafana-server会启动3000端口，使用浏览器打开grafana页面:http://IP:3000/login，输入默认的账号密码 admin/admin登录,第一次登陆需要修改密码。

   grafana虽然已经安装好了，但是这个时候还没有数据，没办法作图。下面我们把grafana和loki 或者和 prometheus关联起来，也就是在grafana中添加添加数据源。

1	grafana虽然已经安装好了，但是这个时候还没有数据，没办法作图。下面我们把grafana和loki 或者和 prometheus关联起来，也就是在grafana中添加添加数据源。

  在配置页面点击添加Data Sources，然后选择loki，输入loki 服务的参数然后点Save & Test即可。

1	在配置页面点击添加Data Sources，然后选择loki，输入loki 服务的参数然后点Save & Test即可。

左侧菜单栏选择 Explore

详细查看方法 ，这位大佬写的很清晰：https://www.cnblogs.com/turingguo/p/13847003.html

1

2

3

左侧菜单栏选择 Explore

详细查看方法，这位大佬写的很清晰：https://www.cnblogs.com/turingguo/p/13847003.html

  在配置页面点击添加Data Sources，然后选择prometheus，输入prometheus服务的参数即可。

1	在配置页面点击添加Data Sources，然后选择prometheus，输入prometheus服务的参数即可。

    与传统的监控zabbix来对比的话，prometheus-server就像是mysql，负责存储数据。只不过这是时序数据库而不是关系型的数据库。数据的收集还需要其他的客户端，在prometheus中叫做exporter。针对不同的服务，有各种各样的exporter，就好比zabbix的zabbix-agent一样。

这里为了能够采集到主机的运行指标如CPU, 内存，磁盘等信息。我们可以使用Node Exporter。Node Exporter同样采用Golang编写，并且不存在任何的第三方依赖，只需要下载，解压即可运行。可以从https://prometheus.io/download/获取最新的node exporter版本的二进制包

1

2

3

与传统的监控zabbix来对比的话，prometheus-server就像是mysql，负责存储数据。只不过这是时序数据库而不是关系型的数据库。数据的收集还需要其他的客户端，在prometheus中叫做exporter。针对不同的服务，有各种各样的exporter，就好比zabbix的zabbix-agent一样。

这里为了能够采集到主机的运行指标如CPU, 内存，磁盘等信息。我们可以使用Node Exporter。Node Exporter同样采用Golang编写，并且不存在任何的第三方依赖，只需要下载，解压即可运行。可以从https://prometheus.io/download/获取最新的node exporter版本的二进制包

wget https://github.com/prometheus/node_exporter/releases/download/v0.18.1/node_exporter-0.18.1.linux-amd64.tar.gz
tar -xf node_exporter-0.18.1.linux-amd64.tar.gz
#新建一个目录专门安装各种exporter
mkdir -p /opt/prometheus/exporter
mv node_exporter-0.18.1.linux-amd64 /opt/prometheus/exporter/
cd /opt/prometheus/exporter/
mv node_exporter-0.18.1.linux-amd64 node_exporter

1

2

3

4

5

6

7

wget https://github.com/prometheus/node_exporter/releases/download/v0.18.1/node_exporter-0.18.1.linux-amd64.tar.gz

tar -xf node_exporter-0.18.1.linux-amd64.tar.gz

#新建一个目录专门安装各种exporter

mkdir -p /opt/prometheus/exporter

mv node_exporter-0.18.1.linux-amd64 /opt/prometheus/exporter/

cd /opt/prometheus/exporter/

mv node_exporter-0.18.1.linux-amd64 node_exporter

直接打开node_exporter的可执行文件即可启动 node export，默认会启动9100端口。建议使用systemctl来启动

1	直接打开node_exporter的可执行文件即可启动 node export，默认会启动9100端口。建议使用systemctl来启动

vim /etc/systemd/system/node_exporter.service

1	vim /etc/systemd/system/node_exporter.service

[Unit]
Description=node_exporter
After=network.target

[Service]
Restart=on-failure
ExecStart=/opt/prometheus/exporter/node_exporter/node_exporter

[Install]
WantedBy=multi-user.target

1

2

3

4

5

6

7

8

9

10

[Unit]

Description=node_exporter

After=network.target

[Service]

Restart=on-failure

ExecStart=/opt/prometheus/exporter/node_exporter/node_exporter

[Install]

WantedBy=multi-user.target

systemctl enable node_exporter
systemctl start node_exporter

1 2	systemctl enable node_exporter systemctl start node_exporter

可以看到node exporter启动后也就是暴露了9100端口，并没有把数据传到prometheus，我们还需要在prometheus中配置，让prometheus去pull这个接口的数据。

1	可以看到node exporter启动后也就是暴露了9100端口，并没有把数据传到prometheus，我们还需要在prometheus中配置，让prometheus去pull这个接口的数据。

编辑prometheus.yml文件，增加后面4行.

1	编辑prometheus.yml文件，增加后面4行.

scrape_configs:
  - job_name: 'prometheus'

    # metrics_path defaults to '/metrics'
    # scheme defaults to 'http'.
    static_configs:
    - targets: ['localhost:9090']

  #采集node exporter监控数据
  - job_name: 'node'
    static_configs:
    - targets: ['localhost:9100']

1

2

3

4

5

6

7

8

9

10

11

12

13

scrape_configs:

- job_name: 'prometheus'

# metrics_path defaults to '/metrics'

# scheme defaults to 'http'.

static_configs:

- targets: ['localhost:9090']

#采集node exporter监控数据

- job_name: 'node'

static_configs:

- targets: ['localhost:9100']

在prometheus的web界面看到这个节点是up的状态了，接下来我们在grafana中添加对应的模板。

1	在prometheus的web界面看到这个节点是up的状态了，接下来我们在grafana中添加对应的模板。

    在导入界面，我们输入模板的编号，这里我使用的是9276号模板，如要使用其他的模板，请到grafana的官网去查找 https://grafana.com/dashboards

1	在导入界面，我们输入模板的编号，这里我使用的是9276号模板，如要使用其他的模板，请到grafana的官网去查找 https://grafana.com/dashboards

详情看大佬这篇
https://www.cnblogs.com/shawhe/p/11833368.html

1 2	详情看大佬这篇 https://www.cnblogs.com/shawhe/p/11833368.html

您可能还会对这些文章感兴趣！