diff --git a/internal/observability/config.go b/internal/observability/config.go index 9643727..7ac9ba2 100644 --- a/internal/observability/config.go +++ b/internal/observability/config.go @@ -60,6 +60,10 @@ type OTelConfig struct { // ExportInterval is how often metrics are pushed to the collector. // Defaults to 60 s. ExportInterval time.Duration + + // Timeout bounds OTLP exporter construction calls. + // Defaults to 10 s. + Timeout time.Duration } // DefaultMetricsConfig returns a MetricsConfig with sensible defaults. @@ -75,6 +79,7 @@ func DefaultMetricsConfig() MetricsConfig { Endpoint: "localhost:4317", Insecure: true, ExportInterval: 60 * time.Second, + Timeout: 10 * time.Second, }, ServiceName: "gerbil", ServiceVersion: "1.0.0", @@ -88,8 +93,10 @@ func (c *MetricsConfig) Validate() error { } switch c.Backend { - case "prometheus", "none", "": + case "prometheus", "none": // valid + case "": + return fmt.Errorf("metrics: enabled requires a non-empty backend") case "otel": if c.OTel.Endpoint == "" { return fmt.Errorf("metrics: backend=otel requires a non-empty OTel endpoint") @@ -100,6 +107,9 @@ func (c *MetricsConfig) Validate() error { if c.OTel.ExportInterval <= 0 { return fmt.Errorf("metrics: otel export interval must be positive") } + if c.OTel.Timeout <= 0 { + return fmt.Errorf("metrics: otel timeout must be positive") + } default: return fmt.Errorf("metrics: unknown backend %q (must be \"prometheus\", \"otel\", or \"none\")", c.Backend) } diff --git a/internal/observability/metrics.go b/internal/observability/metrics.go index ff2e2ae..9ea0013 100644 --- a/internal/observability/metrics.go +++ b/internal/observability/metrics.go @@ -43,20 +43,20 @@ type Histogram interface { type Backend interface { // NewCounter creates a counter metric. // labelNames declares the set of label keys that will be passed at observation time. - NewCounter(name, desc string, labelNames ...string) Counter + NewCounter(name, desc string, labelNames ...string) (Counter, error) // NewUpDownCounter creates an up-down counter metric. - NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter + NewUpDownCounter(name, desc string, labelNames ...string) (UpDownCounter, error) // NewInt64Gauge creates an integer gauge metric. - NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge + NewInt64Gauge(name, desc string, labelNames ...string) (Int64Gauge, error) // NewFloat64Gauge creates a float gauge metric. - NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge + NewFloat64Gauge(name, desc string, labelNames ...string) (Float64Gauge, error) // NewHistogram creates a histogram metric. // buckets are the explicit upper-bound bucket boundaries. - NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram + NewHistogram(name, desc string, buckets []float64, labelNames ...string) (Histogram, error) // HTTPHandler returns the /metrics HTTP handler. // Implementations that do not expose an HTTP endpoint return nil. @@ -88,6 +88,7 @@ func New(cfg MetricsConfig) (Backend, error) { Endpoint: cfg.OTel.Endpoint, Insecure: cfg.OTel.Insecure, ExportInterval: cfg.OTel.ExportInterval, + Timeout: cfg.OTel.Timeout, ServiceName: cfg.ServiceName, ServiceVersion: cfg.ServiceVersion, DeploymentEnvironment: cfg.DeploymentEnvironment, @@ -110,19 +111,19 @@ type promAdapter struct { b *obsprom.Backend } -func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) Counter { +func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) (Counter, error) { return a.b.NewCounter(name, desc, labelNames...) } -func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter { +func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) (UpDownCounter, error) { return a.b.NewUpDownCounter(name, desc, labelNames...) } -func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge { +func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) (Int64Gauge, error) { return a.b.NewInt64Gauge(name, desc, labelNames...) } -func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge { +func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) (Float64Gauge, error) { return a.b.NewFloat64Gauge(name, desc, labelNames...) } -func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram { +func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (Histogram, error) { return a.b.NewHistogram(name, desc, buckets, labelNames...) } func (a *promAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() } @@ -133,19 +134,19 @@ type otelAdapter struct { b *obsotel.Backend } -func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) Counter { +func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) (Counter, error) { return a.b.NewCounter(name, desc, labelNames...) } -func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter { +func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) (UpDownCounter, error) { return a.b.NewUpDownCounter(name, desc, labelNames...) } -func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge { +func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) (Int64Gauge, error) { return a.b.NewInt64Gauge(name, desc, labelNames...) } -func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge { +func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) (Float64Gauge, error) { return a.b.NewFloat64Gauge(name, desc, labelNames...) } -func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram { +func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (Histogram, error) { return a.b.NewHistogram(name, desc, buckets, labelNames...) } func (a *otelAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() } diff --git a/internal/observability/metrics_test.go b/internal/observability/metrics_test.go index 91a048c..cf3eccb 100644 --- a/internal/observability/metrics_test.go +++ b/internal/observability/metrics_test.go @@ -2,6 +2,8 @@ package observability_test import ( "context" + "net" + "os" "testing" "time" @@ -39,20 +41,19 @@ func TestValidateValidConfigs(t *testing.T) { }{ {name: "disabled", cfg: observability.MetricsConfig{Enabled: false}}, {name: "backend none", cfg: observability.MetricsConfig{Enabled: true, Backend: "none"}}, - {name: "backend empty", cfg: observability.MetricsConfig{Enabled: true, Backend: ""}}, {name: "prometheus", cfg: observability.MetricsConfig{Enabled: true, Backend: "prometheus"}}, { name: "otel grpc", cfg: observability.MetricsConfig{ Enabled: true, Backend: "otel", - OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second}, + OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second, Timeout: 2 * time.Second}, }, }, { name: "otel http", cfg: observability.MetricsConfig{ Enabled: true, Backend: "otel", - OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second}, + OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second, Timeout: 2 * time.Second}, }, }, } @@ -71,25 +72,36 @@ func TestValidateInvalidConfigs(t *testing.T) { cfg observability.MetricsConfig }{ {name: "unknown backend", cfg: observability.MetricsConfig{Enabled: true, Backend: "datadog"}}, + { + name: "backend empty while enabled", + cfg: observability.MetricsConfig{Enabled: true, Backend: ""}, + }, { name: "otel missing endpoint", cfg: observability.MetricsConfig{ Enabled: true, Backend: "otel", - OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second}, + OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second, Timeout: 2 * time.Second}, }, }, { name: "otel invalid protocol", cfg: observability.MetricsConfig{ Enabled: true, Backend: "otel", - OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second}, + OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second, Timeout: 2 * time.Second}, }, }, { name: "otel zero interval", cfg: observability.MetricsConfig{ Enabled: true, Backend: "otel", - OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0}, + OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0, Timeout: 2 * time.Second}, + }, + }, + { + name: "otel zero timeout", + cfg: observability.MetricsConfig{ + Enabled: true, Backend: "otel", + OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second, Timeout: 0}, }, }, } @@ -157,11 +169,32 @@ func TestPrometheusAdapterAllInstruments(t *testing.T) { ctx := context.Background() labels := observability.Labels{"k": "v"} - b.NewCounter("prom_adapter_counter_total", "desc", "k").Add(ctx, 1, labels) - b.NewUpDownCounter("prom_adapter_updown", "desc", "k").Add(ctx, 2, labels) - b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels) - b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels) - b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels) + c, err := b.NewCounter("prom_adapter_counter_total", "desc", "k") + if err != nil { + t.Fatalf("NewCounter error: %v", err) + } + u, err := b.NewUpDownCounter("prom_adapter_updown", "desc", "k") + if err != nil { + t.Fatalf("NewUpDownCounter error: %v", err) + } + ig, err := b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k") + if err != nil { + t.Fatalf("NewInt64Gauge error: %v", err) + } + fg, err := b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k") + if err != nil { + t.Fatalf("NewFloat64Gauge error: %v", err) + } + h, err := b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k") + if err != nil { + t.Fatalf("NewHistogram error: %v", err) + } + + c.Add(ctx, 1, labels) + u.Add(ctx, 2, labels) + ig.Record(ctx, 99, labels) + fg.Record(ctx, 1.23, labels) + h.Record(ctx, 0.5, labels) if b.HTTPHandler() == nil { t.Error("prometheus adapter HTTPHandler should not be nil") @@ -172,9 +205,20 @@ func TestPrometheusAdapterAllInstruments(t *testing.T) { } func TestOtelAdapterAllInstruments(t *testing.T) { + if os.Getenv("SKIP_OTEL_INTEGRATION") != "" { + t.Skip("skipping OTel integration test because SKIP_OTEL_INTEGRATION is set") + } + + dialTimeout := 300 * time.Millisecond + conn, err := net.DialTimeout("tcp", otelGRPCEndpoint, dialTimeout) + if err != nil { + t.Skipf("skipping OTel integration test; collector %s not reachable: %v", otelGRPCEndpoint, err) + } + _ = conn.Close() + b, err := observability.New(observability.MetricsConfig{ Enabled: true, Backend: "otel", - OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond}, + OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond, Timeout: 2 * time.Second}, }) if err != nil { t.Fatalf("failed to create otel backend: %v", err) @@ -182,11 +226,32 @@ func TestOtelAdapterAllInstruments(t *testing.T) { ctx := context.Background() labels := observability.Labels{"k": "v"} - b.NewCounter("otel_adapter_counter_total", "desc", "k").Add(ctx, 1, labels) - b.NewUpDownCounter("otel_adapter_updown", "desc", "k").Add(ctx, 2, labels) - b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels) - b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels) - b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels) + c, err := b.NewCounter("otel_adapter_counter_total", "desc", "k") + if err != nil { + t.Fatalf("NewCounter error: %v", err) + } + u, err := b.NewUpDownCounter("otel_adapter_updown", "desc", "k") + if err != nil { + t.Fatalf("NewUpDownCounter error: %v", err) + } + ig, err := b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k") + if err != nil { + t.Fatalf("NewInt64Gauge error: %v", err) + } + fg, err := b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k") + if err != nil { + t.Fatalf("NewFloat64Gauge error: %v", err) + } + h, err := b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k") + if err != nil { + t.Fatalf("NewHistogram error: %v", err) + } + + c.Add(ctx, 1, labels) + u.Add(ctx, 2, labels) + ig.Record(ctx, 99, labels) + fg.Record(ctx, 1.23, labels) + h.Record(ctx, 0.5, labels) if b.HTTPHandler() != nil { t.Error("OTel adapter HTTPHandler should be nil") diff --git a/internal/observability/noop.go b/internal/observability/noop.go index 47acc07..cfd8280 100644 --- a/internal/observability/noop.go +++ b/internal/observability/noop.go @@ -13,38 +13,31 @@ type NoopBackend struct{} // Compile-time interface check. var _ Backend = (*NoopBackend)(nil) -func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) Counter { - _ = n - return noopCounter{} +func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) (Counter, error) { + return noopCounter{}, nil } -func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) UpDownCounter { - _ = n - return noopUpDownCounter{} +func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) (UpDownCounter, error) { + return noopUpDownCounter{}, nil } -func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) Int64Gauge { - _ = n - return noopInt64Gauge{} +func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) (Int64Gauge, error) { + return noopInt64Gauge{}, nil } -func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) Float64Gauge { - _ = n - return noopFloat64Gauge{} +func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) (Float64Gauge, error) { + return noopFloat64Gauge{}, nil } -func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) Histogram { - _ = n - return noopHistogram{} +func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) (Histogram, error) { + return noopHistogram{}, nil } func (n *NoopBackend) HTTPHandler() http.Handler { - _ = n return nil } func (n *NoopBackend) Shutdown(_ context.Context) error { - _ = n return nil } diff --git a/internal/observability/noop_test.go b/internal/observability/noop_test.go index 9496a0a..037ceb2 100644 --- a/internal/observability/noop_test.go +++ b/internal/observability/noop_test.go @@ -13,32 +13,32 @@ func TestNoopBackendAllInstruments(t *testing.T) { ctx := context.Background() labels := observability.Labels{"k": "v"} - t.Run("Counter", func(_ *testing.T) { - c := n.NewCounter("test_counter", "desc") + t.Run("Counter", func(t *testing.T) { + c, _ := n.NewCounter("test_counter", "desc") c.Add(ctx, 1, labels) c.Add(ctx, 0, nil) }) - t.Run("UpDownCounter", func(_ *testing.T) { - u := n.NewUpDownCounter("test_updown", "desc") + t.Run("UpDownCounter", func(t *testing.T) { + u, _ := n.NewUpDownCounter("test_updown", "desc") u.Add(ctx, 1, labels) u.Add(ctx, -1, nil) }) - t.Run("Int64Gauge", func(_ *testing.T) { - g := n.NewInt64Gauge("test_int64gauge", "desc") + t.Run("Int64Gauge", func(t *testing.T) { + g, _ := n.NewInt64Gauge("test_int64gauge", "desc") g.Record(ctx, 42, labels) g.Record(ctx, 0, nil) }) - t.Run("Float64Gauge", func(_ *testing.T) { - g := n.NewFloat64Gauge("test_float64gauge", "desc") + t.Run("Float64Gauge", func(t *testing.T) { + g, _ := n.NewFloat64Gauge("test_float64gauge", "desc") g.Record(ctx, 3.14, labels) g.Record(ctx, 0, nil) }) - t.Run("Histogram", func(_ *testing.T) { - h := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10}) + t.Run("Histogram", func(t *testing.T) { + h, _ := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10}) h.Record(ctx, 2.5, labels) h.Record(ctx, 0, nil) }) @@ -56,12 +56,47 @@ func TestNoopBackendAllInstruments(t *testing.T) { }) } -func TestNoopBackendLabelNames(_ *testing.T) { +func TestNoopBackendLabelNames(t *testing.T) { // Verify that label names passed at creation time are accepted without panic. n := &observability.NoopBackend{} - n.NewCounter("c", "d", "label1", "label2") - n.NewUpDownCounter("u", "d", "l1") - n.NewInt64Gauge("g1", "d", "l1", "l2", "l3") - n.NewFloat64Gauge("g2", "d") - n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1") + + assertNoPanic := func(t *testing.T, constructor string, fn func()) { + t.Helper() + defer func() { + if r := recover(); r != nil { + t.Fatalf("%s panicked: %v", constructor, r) + } + }() + fn() + } + + t.Run("NewCounter", func(t *testing.T) { + assertNoPanic(t, "NewCounter", func() { + _, _ = n.NewCounter("c", "d", "label1", "label2") + }) + }) + + t.Run("NewUpDownCounter", func(t *testing.T) { + assertNoPanic(t, "NewUpDownCounter", func() { + _, _ = n.NewUpDownCounter("u", "d", "l1") + }) + }) + + t.Run("NewInt64Gauge", func(t *testing.T) { + assertNoPanic(t, "NewInt64Gauge", func() { + _, _ = n.NewInt64Gauge("g1", "d", "l1", "l2", "l3") + }) + }) + + t.Run("NewFloat64Gauge", func(t *testing.T) { + assertNoPanic(t, "NewFloat64Gauge", func() { + _, _ = n.NewFloat64Gauge("g2", "d") + }) + }) + + t.Run("NewHistogram", func(t *testing.T) { + assertNoPanic(t, "NewHistogram", func() { + _, _ = n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1") + }) + }) } diff --git a/internal/observability/otel/backend.go b/internal/observability/otel/backend.go index d3e3a23..7f49579 100644 --- a/internal/observability/otel/backend.go +++ b/internal/observability/otel/backend.go @@ -9,7 +9,10 @@ package otel import ( "context" "fmt" + "log" "net/http" + "regexp" + "strings" "time" "go.opentelemetry.io/otel/attribute" @@ -17,6 +20,8 @@ import ( sdkmetric "go.opentelemetry.io/otel/sdk/metric" ) +var metricLabelNameRE = regexp.MustCompile(`^[a-zA-Z_][a-zA-Z0-9_]*$`) + // Config holds OTel backend configuration. type Config struct { // Protocol is "grpc" (default) or "http". @@ -31,6 +36,9 @@ type Config struct { // ExportInterval is the period between pushes to the collector. ExportInterval time.Duration + // Timeout bounds exporter construction calls. + Timeout time.Duration + ServiceName string ServiceVersion string DeploymentEnvironment string @@ -57,9 +65,15 @@ func New(cfg Config) (*Backend, error) { if cfg.Protocol == "" { cfg.Protocol = "grpc" } + if strings.TrimSpace(cfg.Endpoint) == "" { + return nil, fmt.Errorf("otel backend: empty cfg.Endpoint") + } if cfg.ExportInterval <= 0 { cfg.ExportInterval = 60 * time.Second } + if cfg.Timeout <= 0 { + cfg.Timeout = 10 * time.Second + } if cfg.ServiceName == "" { cfg.ServiceName = "gerbil" } @@ -100,111 +114,196 @@ func (b *Backend) Shutdown(ctx context.Context) error { } // NewCounter creates an OTel Int64Counter. -func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter { +func (b *Backend) NewCounter(name, desc string, labelNames ...string) (*Counter, error) { + normalizedLabelNames, err := validateLabelNames(labelNames) + if err != nil { + return nil, fmt.Errorf("otel: create counter %q: %w", name, err) + } c, err := b.meter.Int64Counter(name, metric.WithDescription(desc)) if err != nil { - panic(fmt.Sprintf("otel: create counter %q: %v", name, err)) + return nil, fmt.Errorf("otel: create counter %q: %w", name, err) } - return &Counter{c: c} + return &Counter{c: c, labelNames: normalizedLabelNames}, nil } // NewUpDownCounter creates an OTel Int64UpDownCounter. -func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter { +func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) (*UpDownCounter, error) { + normalizedLabelNames, err := validateLabelNames(labelNames) + if err != nil { + return nil, fmt.Errorf("otel: create up-down counter %q: %w", name, err) + } c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc)) if err != nil { - panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err)) + return nil, fmt.Errorf("otel: create up-down counter %q: %w", name, err) } - return &UpDownCounter{c: c} + return &UpDownCounter{c: c, labelNames: normalizedLabelNames}, nil } // NewInt64Gauge creates an OTel Int64Gauge. -func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge { +func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) (*Int64Gauge, error) { + normalizedLabelNames, err := validateLabelNames(labelNames) + if err != nil { + return nil, fmt.Errorf("otel: create int64 gauge %q: %w", name, err) + } g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc)) if err != nil { - panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err)) + return nil, fmt.Errorf("otel: create int64 gauge %q: %w", name, err) } - return &Int64Gauge{g: g} + return &Int64Gauge{g: g, labelNames: normalizedLabelNames}, nil } // NewFloat64Gauge creates an OTel Float64Gauge. -func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge { +func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) (*Float64Gauge, error) { + normalizedLabelNames, err := validateLabelNames(labelNames) + if err != nil { + return nil, fmt.Errorf("otel: create float64 gauge %q: %w", name, err) + } g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc)) if err != nil { - panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err)) + return nil, fmt.Errorf("otel: create float64 gauge %q: %w", name, err) } - return &Float64Gauge{g: g} + return &Float64Gauge{g: g, labelNames: normalizedLabelNames}, nil } // NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries. -func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram { +func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) (*Histogram, error) { + normalizedLabelNames, err := validateLabelNames(labelNames) + if err != nil { + return nil, fmt.Errorf("otel: create histogram %q: %w", name, err) + } h, err := b.meter.Float64Histogram(name, metric.WithDescription(desc), metric.WithExplicitBucketBoundaries(buckets...), ) if err != nil { - panic(fmt.Sprintf("otel: create histogram %q: %v", name, err)) + return nil, fmt.Errorf("otel: create histogram %q: %w", name, err) } - return &Histogram{h: h} + return &Histogram{h: h, labelNames: normalizedLabelNames}, nil } -// labelsToAttrs converts a Labels map to OTel attribute key-value pairs. -func labelsToAttrs(labels map[string]string) []attribute.KeyValue { - if len(labels) == 0 { - return nil +func validateLabelNames(labelNames []string) ([]string, error) { + if len(labelNames) == 0 { + return nil, nil } - attrs := make([]attribute.KeyValue, 0, len(labels)) - for k, v := range labels { - attrs = append(attrs, attribute.String(k, v)) + + normalized := make([]string, len(labelNames)) + seen := make(map[string]struct{}, len(labelNames)) + for i, name := range labelNames { + if !metricLabelNameRE.MatchString(name) { + return nil, fmt.Errorf("invalid label name %q", name) + } + if _, exists := seen[name]; exists { + return nil, fmt.Errorf("duplicate label name %q", name) + } + seen[name] = struct{}{} + normalized[i] = name } + + return normalized, nil +} + +func labelsToAttrs(labelNames []string, labels map[string]string) []attribute.KeyValue { + if len(labelNames) == 0 { + if len(labels) > 0 { + log.Printf("WARN: dropping otel metric sample due to unexpected labels: got=%v expected=none", labels) + return nil + } + return []attribute.KeyValue{} + } + + attrs := make([]attribute.KeyValue, 0, len(labelNames)) + for _, labelName := range labelNames { + attrs = append(attrs, attribute.String(labelName, labels[labelName])) + } + + for got := range labels { + found := false + for _, expected := range labelNames { + if got == expected { + found = true + break + } + } + if !found { + log.Printf("WARN: dropping otel metric sample due to unexpected label key %q (expected=%v)", got, labelNames) + return nil + } + } + return attrs } // Counter wraps an OTel Int64Counter. type Counter struct { - c metric.Int64Counter + c metric.Int64Counter + labelNames []string } // Add increments the counter by value. func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) { - c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...)) + attrs := labelsToAttrs(c.labelNames, labels) + if attrs == nil { + return + } + c.c.Add(ctx, value, metric.WithAttributes(attrs...)) } // UpDownCounter wraps an OTel Int64UpDownCounter. type UpDownCounter struct { - c metric.Int64UpDownCounter + c metric.Int64UpDownCounter + labelNames []string } // Add adjusts the up-down counter by value. func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) { - u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...)) + attrs := labelsToAttrs(u.labelNames, labels) + if attrs == nil { + return + } + u.c.Add(ctx, value, metric.WithAttributes(attrs...)) } // Int64Gauge wraps an OTel Int64Gauge. type Int64Gauge struct { - g metric.Int64Gauge + g metric.Int64Gauge + labelNames []string } // Record sets the gauge to value. func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) { - g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...)) + attrs := labelsToAttrs(g.labelNames, labels) + if attrs == nil { + return + } + g.g.Record(ctx, value, metric.WithAttributes(attrs...)) } // Float64Gauge wraps an OTel Float64Gauge. type Float64Gauge struct { - g metric.Float64Gauge + g metric.Float64Gauge + labelNames []string } // Record sets the gauge to value. func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) { - g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...)) + attrs := labelsToAttrs(g.labelNames, labels) + if attrs == nil { + return + } + g.g.Record(ctx, value, metric.WithAttributes(attrs...)) } // Histogram wraps an OTel Float64Histogram. type Histogram struct { - h metric.Float64Histogram + h metric.Float64Histogram + labelNames []string } // Record observes value in the histogram. func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) { - h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...)) + attrs := labelsToAttrs(h.labelNames, labels) + if attrs == nil { + return + } + h.h.Record(ctx, value, metric.WithAttributes(attrs...)) } diff --git a/internal/observability/otel/backend_test.go b/internal/observability/otel/backend_test.go index e527678..ef753e8 100644 --- a/internal/observability/otel/backend_test.go +++ b/internal/observability/otel/backend_test.go @@ -55,7 +55,10 @@ func TestOtelBackendCounter(t *testing.T) { b := newInMemoryBackend(t) defer b.Shutdown(context.Background()) //nolint:errcheck - c := b.NewCounter("gerbil_test_counter_total", "test counter", "result") + c, err := b.NewCounter("gerbil_test_counter_total", "test counter", "result") + if err != nil { + t.Fatalf("NewCounter returned error: %v", err) + } // Should not panic c.Add(context.Background(), 1, map[string]string{"result": "ok"}) c.Add(context.Background(), 5, nil) @@ -65,7 +68,10 @@ func TestOtelBackendUpDownCounter(t *testing.T) { b := newInMemoryBackend(t) defer b.Shutdown(context.Background()) //nolint:errcheck - u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state") + u, err := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state") + if err != nil { + t.Fatalf("NewUpDownCounter returned error: %v", err) + } u.Add(context.Background(), 3, map[string]string{"state": "active"}) u.Add(context.Background(), -1, map[string]string{"state": "active"}) } @@ -74,7 +80,10 @@ func TestOtelBackendInt64Gauge(t *testing.T) { b := newInMemoryBackend(t) defer b.Shutdown(context.Background()) //nolint:errcheck - g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge") + g, err := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge") + if err != nil { + t.Fatalf("NewInt64Gauge returned error: %v", err) + } g.Record(context.Background(), 42, nil) } @@ -82,7 +91,10 @@ func TestOtelBackendFloat64Gauge(t *testing.T) { b := newInMemoryBackend(t) defer b.Shutdown(context.Background()) //nolint:errcheck - g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge") + g, err := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge") + if err != nil { + t.Fatalf("NewFloat64Gauge returned error: %v", err) + } g.Record(context.Background(), 3.14, nil) } @@ -90,8 +102,11 @@ func TestOtelBackendHistogram(t *testing.T) { b := newInMemoryBackend(t) defer b.Shutdown(context.Background()) //nolint:errcheck - h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram", + h, err := b.NewHistogram("gerbil_test_duration_seconds", "test histogram", []float64{0.1, 0.5, 1.0}, "method") + if err != nil { + t.Fatalf("NewHistogram returned error: %v", err) + } h.Record(context.Background(), 0.3, map[string]string{"method": "GET"}) } @@ -139,3 +154,22 @@ func TestOtelBackendDeploymentEnvironment(t *testing.T) { } defer b.Shutdown(context.Background()) //nolint:errcheck } + +func TestOtelBackendRejectsInvalidLabelNames(t *testing.T) { + b := newInMemoryBackend(t) + defer b.Shutdown(context.Background()) //nolint:errcheck + + t.Run("duplicate labels", func(t *testing.T) { + _, err := b.NewCounter("gerbil_test_invalid_labels_total", "test counter", "result", "result") + if err == nil { + t.Fatal("expected error for duplicate label names") + } + }) + + t.Run("invalid label name", func(t *testing.T) { + _, err := b.NewHistogram("gerbil_test_invalid_histogram", "test histogram", []float64{0.1, 1.0}, "status-code") + if err == nil { + t.Fatal("expected error for invalid label name") + } + }) +} diff --git a/internal/observability/otel/exporter.go b/internal/observability/otel/exporter.go index 44fe1e2..89950fa 100644 --- a/internal/observability/otel/exporter.go +++ b/internal/observability/otel/exporter.go @@ -3,6 +3,8 @@ package otel import ( "context" "fmt" + "net/url" + "strings" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc" "go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp" @@ -11,6 +13,10 @@ import ( // newExporter creates the appropriate OTLP exporter based on cfg.Protocol. func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) { + if strings.TrimSpace(cfg.Endpoint) == "" { + return nil, fmt.Errorf("otel: cfg.Endpoint is empty") + } + switch cfg.Protocol { case "grpc", "": return newGRPCExporter(ctx, cfg) @@ -36,8 +42,20 @@ func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error } func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) { - opts := []otlpmetrichttp.Option{ - otlpmetrichttp.WithEndpoint(cfg.Endpoint), + endpoint := strings.TrimSpace(cfg.Endpoint) + + opts := make([]otlpmetrichttp.Option, 0, 3) + if strings.Contains(endpoint, "://") { + parsed, err := url.Parse(endpoint) + if err != nil { + return nil, fmt.Errorf("otlp http exporter: parse endpoint URL %q: %w", endpoint, err) + } + opts = append(opts, otlpmetrichttp.WithEndpointURL(parsed.String())) + } else { + opts = append(opts, + otlpmetrichttp.WithEndpoint(endpoint), + otlpmetrichttp.WithURLPath("/v1/metrics"), + ) } if cfg.Insecure { opts = append(opts, otlpmetrichttp.WithInsecure())