feat: Metrics for Prometheus (#309)

* feat: prometheus metrics

* Added Prometheus resources support to helm chart
This commit is contained in:
Dmytro Bondar
2024-09-29 22:10:50 +02:00
committed by GitHub
parent ae1be0e367
commit f22a7e4a2e
19 changed files with 399 additions and 26 deletions

View File

@@ -0,0 +1,161 @@
package adapters
import (
"context"
"net/http"
"time"
"github.com/h44z/wg-portal/internal"
"github.com/h44z/wg-portal/internal/config"
"github.com/h44z/wg-portal/internal/domain"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promauto"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/sirupsen/logrus"
)
type MetricsServer struct {
*http.Server
db *SqlRepo
ifaceInfo *prometheus.GaugeVec
ifaceReceivedBytesTotal *prometheus.GaugeVec
ifaceSendBytesTotal *prometheus.GaugeVec
peerInfo *prometheus.GaugeVec
peerIsConnected *prometheus.GaugeVec
peerLastHandshakeSeconds *prometheus.GaugeVec
peerReceivedBytesTotal *prometheus.GaugeVec
peerSendBytesTotal *prometheus.GaugeVec
}
// Wireguard metrics labels
var (
labels = []string{"interface"}
ifaceLabels = []string{}
peerLabels = []string{"addresses", "id", "name"}
)
// NewMetricsServer returns a new prometheus server
func NewMetricsServer(cfg *config.Config, db *SqlRepo) *MetricsServer {
reg := prometheus.NewRegistry()
mux := http.NewServeMux()
mux.Handle("/metrics", promhttp.HandlerFor(reg, promhttp.HandlerOpts{Registry: reg}))
return &MetricsServer{
Server: &http.Server{
Addr: cfg.Statistics.ListeningAddress,
Handler: mux,
},
db: db,
ifaceInfo: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_interface_info",
Help: "Interface info.",
}, append(labels, ifaceLabels...),
),
ifaceReceivedBytesTotal: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_interface_received_bytes_total",
Help: "Bytes received througth the interface.",
}, append(labels, ifaceLabels...),
),
ifaceSendBytesTotal: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_interface_sent_bytes_total",
Help: "Bytes sent through the interface.",
}, append(labels, ifaceLabels...),
),
peerInfo: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_peer_info",
Help: "Peer info.",
}, append(labels, peerLabels...),
),
peerIsConnected: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_peer_up",
Help: "Peer connection state (boolean: 1/0).",
}, append(labels, peerLabels...),
),
peerLastHandshakeSeconds: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_peer_last_handshake_seconds",
Help: "Seconds from the last handshake with the peer.",
}, append(labels, peerLabels...),
),
peerReceivedBytesTotal: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_peer_received_bytes_total",
Help: "Bytes received from the peer.",
}, append(labels, peerLabels...),
),
peerSendBytesTotal: promauto.With(reg).NewGaugeVec(
prometheus.GaugeOpts{
Name: "wireguard_peer_sent_bytes_total",
Help: "Bytes sent to the peer.",
}, append(labels, peerLabels...),
),
}
}
// Run starts the metrics server
func (m *MetricsServer) Run(ctx context.Context) {
// Run the metrics server in a goroutine
go func() {
if err := m.ListenAndServe(); err != nil && err != http.ErrServerClosed {
logrus.Errorf("metrics service on %s exited: %v", m.Addr, err)
}
}()
logrus.Infof("started metrics service on %s", m.Addr)
// Wait for the context to be done
<-ctx.Done()
// Create a context with timeout for the shutdown process
shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
// Attempt to gracefully shutdown the metrics server
if err := m.Shutdown(shutdownCtx); err != nil {
logrus.Errorf("metrics service on %s shutdown failed: %v", m.Addr, err)
} else {
logrus.Infof("metrics service on %s shutdown gracefully", m.Addr)
}
}
// UpdateInterfaceMetrics updates the metrics for the given interface
func (m *MetricsServer) UpdateInterfaceMetrics(status domain.InterfaceStatus) {
labels := []string{string(status.InterfaceId)}
m.ifaceInfo.WithLabelValues(labels...).Set(1)
m.ifaceReceivedBytesTotal.WithLabelValues(labels...).Set(float64(status.BytesReceived))
m.ifaceSendBytesTotal.WithLabelValues(labels...).Set(float64(status.BytesTransmitted))
}
// UpdatePeerMetrics updates the metrics for the given peer
func (m *MetricsServer) UpdatePeerMetrics(ctx context.Context, status domain.PeerStatus) {
// Fetch peer data from the database
peer, err := m.db.GetPeer(ctx, status.PeerId)
if err != nil {
logrus.Warnf("failed to fetch peer data for labels %s: %v", status.PeerId, err)
return
}
labels := []string{
string(peer.InterfaceIdentifier),
string(peer.Interface.AddressStr()),
string(status.PeerId),
string(peer.DisplayName),
}
m.peerInfo.WithLabelValues(labels...).Set(1)
if status.LastHandshake != nil {
m.peerLastHandshakeSeconds.WithLabelValues(labels...).Set(float64(status.LastHandshake.Unix()))
}
m.peerReceivedBytesTotal.WithLabelValues(labels...).Set(float64(status.BytesReceived))
m.peerSendBytesTotal.WithLabelValues(labels...).Set(float64(status.BytesTransmitted))
m.peerIsConnected.WithLabelValues(labels...).Set(internal.BoolToFloat64(status.IsConnected()))
}

View File

@@ -95,8 +95,6 @@ func NewServer(cfg *config.Config, endpoints ...ApiEndpointSetupFunc) (*Server,
}
func (s *Server) Run(ctx context.Context, listenAddress string) {
logrus.Infof("starting web service on %s", listenAddress)
// Run web service
srv := &http.Server{
Addr: listenAddress,
@@ -116,6 +114,7 @@ func (s *Server) Run(ctx context.Context, listenAddress string) {
cancelFn()
}
}()
logrus.Infof("started web service on %s", listenAddress)
// Wait for the main context to end
<-srvContext.Done()

View File

@@ -2,6 +2,7 @@ package wireguard
import (
"context"
"github.com/h44z/wg-portal/internal/domain"
)
@@ -27,6 +28,7 @@ type InterfaceAndPeerDatabaseRepo interface {
type StatisticsDatabaseRepo interface {
GetAllInterfaces(ctx context.Context) ([]domain.Interface, error)
GetInterfacePeers(ctx context.Context, id domain.InterfaceIdentifier) ([]domain.Peer, error)
GetPeer(ctx context.Context, id domain.PeerIdentifier) (*domain.Peer, error)
UpdatePeerStatus(ctx context.Context, id domain.PeerIdentifier, updateFunc func(in *domain.PeerStatus) (*domain.PeerStatus, error)) error
UpdateInterfaceStatus(ctx context.Context, id domain.InterfaceIdentifier, updateFunc func(in *domain.InterfaceStatus) (*domain.InterfaceStatus, error)) error
@@ -48,3 +50,8 @@ type WgQuickController interface {
SetDNS(id domain.InterfaceIdentifier, dnsStr, dnsSearchStr string) error
UnsetDNS(id domain.InterfaceIdentifier) error
}
type MetricsServer interface {
UpdateInterfaceMetrics(status domain.InterfaceStatus)
UpdatePeerMetrics(ctx context.Context, status domain.PeerStatus)
}

View File

@@ -2,12 +2,13 @@ package wireguard
import (
"context"
"github.com/h44z/wg-portal/internal/config"
"github.com/h44z/wg-portal/internal/domain"
"github.com/prometheus-community/pro-bing"
"github.com/sirupsen/logrus"
"sync"
"time"
"github.com/h44z/wg-portal/internal/config"
"github.com/h44z/wg-portal/internal/domain"
probing "github.com/prometheus-community/pro-bing"
"github.com/sirupsen/logrus"
)
type StatisticsCollector struct {
@@ -18,14 +19,16 @@ type StatisticsCollector struct {
db StatisticsDatabaseRepo
wg InterfaceController
ms MetricsServer
}
func NewStatisticsCollector(cfg *config.Config, db StatisticsDatabaseRepo, wg InterfaceController) (*StatisticsCollector, error) {
func NewStatisticsCollector(cfg *config.Config, db StatisticsDatabaseRepo, wg InterfaceController, ms MetricsServer) (*StatisticsCollector, error) {
return &StatisticsCollector{
cfg: cfg,
db: db,
wg: wg,
ms: ms,
}, nil
}
@@ -70,11 +73,15 @@ func (c *StatisticsCollector) collectInterfaceData(ctx context.Context) {
i.UpdatedAt = time.Now()
i.BytesReceived = physicalInterface.BytesDownload
i.BytesTransmitted = physicalInterface.BytesUpload
// Update prometheus metrics
go c.ms.UpdateInterfaceMetrics(*i)
return i, nil
})
if err != nil {
logrus.Warnf("failed to update interface status for %s: %v", in.Identifier, err)
}
logrus.Tracef("updated interface status for %s", in.Identifier)
}
}
}
@@ -126,11 +133,15 @@ func (c *StatisticsCollector) collectPeerData(ctx context.Context) {
p.Endpoint = peer.Endpoint
p.LastHandshake = lastHandshake
// Update prometheus metrics
go c.ms.UpdatePeerMetrics(ctx, *p)
return p, nil
})
if err != nil {
logrus.Warnf("failed to update interface status for %s: %v", in.Identifier, err)
}
logrus.Tracef("updated peer status for %s", peer.Identifier)
}
}
}
@@ -234,7 +245,7 @@ func (c *StatisticsCollector) pingWorker(ctx context.Context) {
}
func (c *StatisticsCollector) isPeerPingable(ctx context.Context, peer domain.Peer) bool {
if c.cfg.Statistics.UsePingChecks == false {
if !c.cfg.Statistics.UsePingChecks {
return false
}

View File

@@ -48,6 +48,7 @@ type Config struct {
CollectInterfaceData bool `yaml:"collect_interface_data"`
CollectPeerData bool `yaml:"collect_peer_data"`
CollectAuditData bool `yaml:"collect_audit_data"`
ListeningAddress string `yaml:"listening_address"`
} `yaml:"statistics"`
Mail MailConfig `yaml:"mail"`
@@ -117,10 +118,11 @@ func defaultConfig() *Config {
cfg.Statistics.PingCheckWorkers = 10
cfg.Statistics.PingUnprivileged = false
cfg.Statistics.PingCheckInterval = 1 * time.Minute
cfg.Statistics.DataCollectionInterval = 10 * time.Second
cfg.Statistics.DataCollectionInterval = 1 * time.Minute
cfg.Statistics.CollectInterfaceData = true
cfg.Statistics.CollectPeerData = true
cfg.Statistics.CollectAuditData = true
cfg.Statistics.ListeningAddress = ":8787"
cfg.Mail = MailConfig{
Host: "127.0.0.1",

View File

@@ -126,3 +126,10 @@ func TruncateString(s string, max int) string {
}
return s[:max]
}
func BoolToFloat64(b bool) float64 {
if b {
return 1.0
}
return 0.0
}