mirror of
https://github.com/fosrl/gerbil.git
synced 2026-05-07 04:09:58 -05:00
feat(prometheus): robust label validation and registration handling
This commit is contained in:
@@ -7,6 +7,7 @@ package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"log"
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
@@ -30,9 +31,10 @@ type Config struct {
|
||||
// in the backend-specific instrument types that implement the observability
|
||||
// instrument interfaces.
|
||||
type Backend struct {
|
||||
cfg Config
|
||||
registry *prometheus.Registry
|
||||
handler http.Handler
|
||||
cfg Config
|
||||
registry *prometheus.Registry
|
||||
handler http.Handler
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// New creates and initialises a Prometheus backend.
|
||||
@@ -48,6 +50,11 @@ func New(cfg Config) (*Backend, error) {
|
||||
}
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
droppedSamplesCounter := prometheus.NewCounter(prometheus.CounterOpts{
|
||||
Name: "gerbil_dropped_metric_samples_total",
|
||||
Help: "Total number of metric samples dropped due to invalid labels or unsupported label sets",
|
||||
})
|
||||
registry.MustRegister(droppedSamplesCounter)
|
||||
|
||||
// Include Go and process metrics by default.
|
||||
includeGo := cfg.IncludeGoMetrics == nil || *cfg.IncludeGoMetrics
|
||||
@@ -62,7 +69,7 @@ func New(cfg Config) (*Backend, error) {
|
||||
EnableOpenMetrics: false,
|
||||
})
|
||||
|
||||
return &Backend{cfg: cfg, registry: registry, handler: handler}, nil
|
||||
return &Backend{cfg: cfg, registry: registry, handler: handler, droppedSamplesCounter: droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// HTTPHandler returns the Prometheus /metrics HTTP handler.
|
||||
@@ -78,60 +85,107 @@ func (b *Backend) Shutdown(_ context.Context) error {
|
||||
}
|
||||
|
||||
// NewCounter creates a Prometheus CounterVec registered on the backend's registry.
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) *Counter {
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) (*Counter, error) {
|
||||
vec := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Counter{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.CounterVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Counter{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Counter{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates a Prometheus GaugeVec (Prometheus gauges are
|
||||
// bidirectional) registered on the backend's registry.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) *UpDownCounter {
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) (*UpDownCounter, error) {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &UpDownCounter{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.GaugeVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &UpDownCounter{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &UpDownCounter{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) *Int64Gauge {
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) (*Int64Gauge, error) {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Int64Gauge{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.GaugeVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Int64Gauge{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Int64Gauge{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) *Float64Gauge {
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) (*Float64Gauge, error) {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Float64Gauge{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.GaugeVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Float64Gauge{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Float64Gauge{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// NewHistogram creates a Prometheus HistogramVec registered on the backend's registry.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) *Histogram {
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (*Histogram, error) {
|
||||
vec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
Buckets: buckets,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Histogram{vec: vec}
|
||||
if err := b.registry.Register(vec); err != nil {
|
||||
if are, ok := err.(prometheus.AlreadyRegisteredError); ok {
|
||||
existing, ok := are.ExistingCollector.(*prometheus.HistogramVec)
|
||||
if !ok {
|
||||
return nil, err
|
||||
}
|
||||
return &Histogram{vec: existing, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
return nil, err
|
||||
}
|
||||
return &Histogram{vec: vec, labelNames: append([]string(nil), labelNames...), droppedSamplesCounter: b.droppedSamplesCounter}, nil
|
||||
}
|
||||
|
||||
// Counter is a native Prometheus counter instrument.
|
||||
type Counter struct {
|
||||
vec *prometheus.CounterVec
|
||||
vec *prometheus.CounterVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Add increments the counter by value for the given labels.
|
||||
@@ -139,47 +193,118 @@ type Counter struct {
|
||||
// value must be non-negative. Negative values are ignored.
|
||||
func (c *Counter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
if value < 0 {
|
||||
log.Printf("WARN: counter add called with negative value=%d labels=%v expected_labels=%v", value, labels, c.labelNames)
|
||||
return
|
||||
}
|
||||
c.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
normalized, ok := normalizeLabels(c.labelNames, labels, c.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("counter", c.labelNames, labels)
|
||||
c.vec.With(normalized).Add(float64(value))
|
||||
}
|
||||
|
||||
// UpDownCounter is a native Prometheus gauge used as a bidirectional counter.
|
||||
type UpDownCounter struct {
|
||||
vec *prometheus.GaugeVec
|
||||
vec *prometheus.GaugeVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Add adjusts the gauge by value for the given labels.
|
||||
func (u *UpDownCounter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
u.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
normalized, ok := normalizeLabels(u.labelNames, labels, u.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("updown", u.labelNames, labels)
|
||||
u.vec.With(normalized).Add(float64(value))
|
||||
}
|
||||
|
||||
// Int64Gauge is a native Prometheus gauge recording integer snapshot values.
|
||||
type Int64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
vec *prometheus.GaugeVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Int64Gauge) Record(_ context.Context, value int64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(float64(value))
|
||||
normalized, ok := normalizeLabels(g.labelNames, labels, g.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("int64-gauge", g.labelNames, labels)
|
||||
g.vec.With(normalized).Set(float64(value))
|
||||
}
|
||||
|
||||
// Float64Gauge is a native Prometheus gauge recording float snapshot values.
|
||||
type Float64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
vec *prometheus.GaugeVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Float64Gauge) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(value)
|
||||
normalized, ok := normalizeLabels(g.labelNames, labels, g.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("float64-gauge", g.labelNames, labels)
|
||||
g.vec.With(normalized).Set(value)
|
||||
}
|
||||
|
||||
// Histogram is a native Prometheus histogram instrument.
|
||||
type Histogram struct {
|
||||
vec *prometheus.HistogramVec
|
||||
vec *prometheus.HistogramVec
|
||||
labelNames []string
|
||||
droppedSamplesCounter prometheus.Counter
|
||||
}
|
||||
|
||||
// Record observes value for the given labels.
|
||||
func (h *Histogram) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
h.vec.With(prometheus.Labels(labels)).Observe(value)
|
||||
normalized, ok := normalizeLabels(h.labelNames, labels, h.droppedSamplesCounter)
|
||||
if !ok {
|
||||
return
|
||||
}
|
||||
defer guardMetricPanic("histogram", h.labelNames, labels)
|
||||
h.vec.With(normalized).Observe(value)
|
||||
}
|
||||
|
||||
func normalizeLabels(labelNames []string, labels map[string]string, droppedSamplesCounter prometheus.Counter) (prometheus.Labels, bool) {
|
||||
if len(labelNames) == 0 {
|
||||
if len(labels) > 0 {
|
||||
if droppedSamplesCounter != nil {
|
||||
droppedSamplesCounter.Inc()
|
||||
}
|
||||
log.Printf("WARN: dropping metric sample due to unexpected labels: got=%v expected=none", labels)
|
||||
return nil, false
|
||||
}
|
||||
return nil, true
|
||||
}
|
||||
|
||||
normalized := make(prometheus.Labels, len(labelNames))
|
||||
for _, name := range labelNames {
|
||||
normalized[name] = ""
|
||||
}
|
||||
|
||||
for k, v := range labels {
|
||||
if _, ok := normalized[k]; !ok {
|
||||
if droppedSamplesCounter != nil {
|
||||
droppedSamplesCounter.Inc()
|
||||
}
|
||||
log.Printf("WARN: dropping metric sample due to unexpected label key %q (expected=%v)", k, labelNames)
|
||||
return nil, false
|
||||
}
|
||||
normalized[k] = v
|
||||
}
|
||||
|
||||
return normalized, true
|
||||
}
|
||||
|
||||
func guardMetricPanic(kind string, expected []string, labels map[string]string) {
|
||||
if recovered := recover(); recovered != nil {
|
||||
log.Printf("WARN: dropped %s metric sample due to label panic: expected=%v got=%v err=%v", kind, expected, labels, recovered)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -36,7 +36,10 @@ func TestPrometheusBackendShutdown(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||
c, err := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
c.Add(context.Background(), 3, map[string]string{"result": "ok"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -45,7 +48,10 @@ func TestPrometheusBackendCounter(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
u := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||
u, err := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||
if err != nil {
|
||||
t.Fatalf("NewUpDownCounter returned error: %v", err)
|
||||
}
|
||||
u.Add(context.Background(), 5, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -2, map[string]string{"state": "active"})
|
||||
|
||||
@@ -55,7 +61,10 @@ func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||
g, err := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||
if err != nil {
|
||||
t.Fatalf("NewInt64Gauge returned error: %v", err)
|
||||
}
|
||||
g.Record(context.Background(), 42, map[string]string{"ifname": "wg0"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -64,7 +73,10 @@ func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||
g, err := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||
if err != nil {
|
||||
t.Fatalf("NewFloat64Gauge returned error: %v", err)
|
||||
}
|
||||
g.Record(context.Background(), 7.5, map[string]string{"cert": "example.com"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -74,7 +86,10 @@ func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||
func TestPrometheusBackendHistogram(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
buckets := []float64{0.1, 0.5, 1.0, 5.0}
|
||||
h := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||
h, err := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||
if err != nil {
|
||||
t.Fatalf("NewHistogram returned error: %v", err)
|
||||
}
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
@@ -85,7 +100,10 @@ func TestPrometheusBackendHistogram(t *testing.T) {
|
||||
|
||||
func TestPrometheusBackendMultipleLabels(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||
c, err := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
c.Add(context.Background(), 1, map[string]string{
|
||||
"method": "POST",
|
||||
"route": "/api/peers",
|
||||
@@ -122,23 +140,29 @@ func TestPrometheusBackendNoGoMetrics(t *testing.T) {
|
||||
func TestPrometheusBackendNilLabels(t *testing.T) {
|
||||
// Adding with nil labels should not panic (treated as empty map).
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||
c, err := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
// nil labels with no label names declared should be safe
|
||||
c.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||
c, err := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < 10; i++ {
|
||||
go func(_ int) {
|
||||
go func() {
|
||||
for j := 0; j < 100; j++ {
|
||||
c.Add(context.Background(), 1, map[string]string{"worker": "w"})
|
||||
}
|
||||
done <- struct{}{}
|
||||
}(i)
|
||||
}()
|
||||
}
|
||||
for i := 0; i < 10; i++ {
|
||||
<-done
|
||||
@@ -148,6 +172,40 @@ func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||
assertMetricPresent(t, body, `concurrent_total{worker="w"} 1000`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendAlreadyRegisteredCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c1, err := b.NewCounter("dupe_counter_total", "duplicate counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("first NewCounter returned error: %v", err)
|
||||
}
|
||||
c2, err := b.NewCounter("dupe_counter_total", "duplicate counter", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("second NewCounter returned error: %v", err)
|
||||
}
|
||||
|
||||
c1.Add(context.Background(), 1, map[string]string{"result": "ok"})
|
||||
c2.Add(context.Background(), 2, map[string]string{"result": "ok"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `dupe_counter_total{result="ok"} 3`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendInvalidLabelsNoPanic(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c, err := b.NewCounter("invalid_labels_total", "invalid labels test", "result")
|
||||
if err != nil {
|
||||
t.Fatalf("NewCounter returned error: %v", err)
|
||||
}
|
||||
|
||||
// Extra label key should be dropped and must not panic.
|
||||
c.Add(context.Background(), 5, map[string]string{"result": "ok", "unexpected": "x"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
if strings.Contains(body, `invalid_labels_total{result="ok"}`) {
|
||||
t.Error("invalid label sample should have been dropped")
|
||||
}
|
||||
}
|
||||
|
||||
// --- helpers ---
|
||||
|
||||
func scrapeMetrics(t *testing.T, b *obsprom.Backend) string {
|
||||
|
||||
Reference in New Issue
Block a user