Adding OpenTelemetry Metrics and Tracing

This commit is contained in:
Marc Schäfer
2025-10-11 18:19:51 +02:00
parent b383cec0b0
commit c086e69dd0
30 changed files with 3457 additions and 158 deletions

5
.env.example Normal file
View File

@@ -0,0 +1,5 @@
# Copy this file to .env and fill in your values
# Required for connecting to Pangolin service
PANGOLIN_ENDPOINT=https://example.com
NEWT_ID=changeme-id
NEWT_SECRET=changeme-secret

View File

@@ -1,19 +1,29 @@
#ghcr.io/marcschaeferger/newt-private:1.0.0-otel
#tademsh/newt:1.0.0-otel
FROM golang:1.25-alpine AS builder
# Install git and ca-certificates
RUN apk --no-cache add ca-certificates git tzdata
# Set the working directory inside the container
WORKDIR /app
# Copy go mod and sum files
COPY go.mod go.sum ./
# Coolify specific Test - set Go proxy to direct to avoid issues
# ENV GOSUMDB=off
ENV GOPROXY=https://goproxy.io,https://proxy.golang.org,direct
RUN go env | grep -E 'GOPROXY|GOSUMDB|GOPRIVATE' && go mod download
# Download all dependencies
RUN go mod download
#RUN go mod download
# Copy the source code into the container
COPY . .
# Build the application
RUN CGO_ENABLED=0 GOOS=linux go build -o /newt
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /newt
FROM alpine:3.22 AS runner
@@ -22,6 +32,9 @@ RUN apk --no-cache add ca-certificates tzdata
COPY --from=builder /newt /usr/local/bin/
COPY entrypoint.sh /
# Admin/metrics endpoint (Prometheus scrape)
EXPOSE 2112
RUN chmod +x /entrypoint.sh
ENTRYPOINT ["/entrypoint.sh"]
CMD ["newt"]
CMD ["newt"]

View File

@@ -0,0 +1,41 @@
services:
newt:
build: .
image: newt:dev
env_file:
- .env
environment:
- NEWT_METRICS_PROMETHEUS_ENABLED=false # important: disable direct /metrics scraping
- NEWT_METRICS_OTLP_ENABLED=true # OTLP to the Collector
# optional:
# - NEWT_METRICS_INCLUDE_TUNNEL_ID=false
# When using the Collector pattern, do NOT map the Newt admin/metrics port
# (2112) on the application service. Mapping 2112 here can cause port
# conflicts and may result in duplicated Prometheus scraping (app AND
# collector being scraped for the same metrics). Instead either:
# - leave ports unset on the app service (recommended), or
# - map 2112 only on a dedicated metrics/collector service that is
# responsible for exposing metrics to Prometheus.
# Example: do NOT map here
# ports: []
# Example: map 2112 only on a collector service
# collector:
# ports:
# - "2112:2112" # collector's prometheus exporter (scraped by Prometheus)
otel-collector:
image: otel/opentelemetry-collector-contrib:latest
command: ["--config=/etc/otelcol/config.yaml"]
volumes:
- ./examples/otel-collector.yaml:/etc/otelcol/config.yaml:ro
ports:
- "4317:4317" # OTLP gRPC
- "8889:8889" # Prometheus Exporter (scraped by Prometheus)
prometheus:
image: prom/prometheus:latest
volumes:
- ./examples/prometheus.with-collector.yml:/etc/prometheus/prometheus.yml:ro
ports:
- "9090:9090"

View File

@@ -0,0 +1,56 @@
name: Newt-Metrics
services:
# Recommended Variant A: Direct Prometheus scrape of Newt (/metrics)
# Optional: You may add the Collector service and enable OTLP export, but do NOT
# scrape both Newt and the Collector for the same process.
newt:
build: .
image: newt:dev
env_file:
- .env
environment:
OTEL_SERVICE_NAME: newt
NEWT_METRICS_PROMETHEUS_ENABLED: "true"
NEWT_METRICS_OTLP_ENABLED: "false" # avoid double-scrape by default
NEWT_ADMIN_ADDR: ":2112"
# Base NEWT configuration
PANGOLIN_ENDPOINT: ${PANGOLIN_ENDPOINT}
NEWT_ID: ${NEWT_ID}
NEWT_SECRET: ${NEWT_SECRET}
LOG_LEVEL: "DEBUG"
ports:
- "2112:2112"
# Optional Variant B: Enable the Collector and switch Prometheus scrape to it.
# collector:
# image: otel/opentelemetry-collector-contrib:0.136.0
# command: ["--config=/etc/otelcol/config.yaml"]
# volumes:
# - ./examples/otel-collector.yaml:/etc/otelcol/config.yaml:ro
# ports:
# - "4317:4317" # OTLP gRPC in
# - "8889:8889" # Prometheus scrape out
prometheus:
image: prom/prometheus:v3.6.0
volumes:
- ./examples/prometheus.yml:/etc/prometheus/prometheus.yml:ro
ports:
- "9090:9090"
grafana:
image: grafana/grafana:12.2.0
container_name: newt-metrics-grafana
restart: unless-stopped
environment:
- GF_SECURITY_ADMIN_USER=admin
- GF_SECURITY_ADMIN_PASSWORD=admin
ports:
- "3005:3000"
depends_on:
- prometheus
volumes:
- ./examples/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
- ./examples/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
- ./examples/grafana/dashboards:/var/lib/grafana/dashboards:ro

View File

@@ -0,0 +1,898 @@
{
"annotations": {
"list": [
{
"builtIn": 1,
"datasource": {
"type": "grafana",
"uid": "-- Grafana --"
},
"enable": true,
"hide": true,
"iconColor": "rgba(0, 211, 255, 1)",
"name": "Annotations & Alerts",
"type": "dashboard"
}
]
},
"editable": true,
"fiscalYearStartMonth": 0,
"graphTooltip": 0,
"id": null,
"links": [],
"liveNow": false,
"panels": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 0,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "red",
"value": 500
}
]
}
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 0,
"y": 0
},
"id": 1,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "go_goroutine_count",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "Goroutines",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 1,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 256
},
{
"color": "red",
"value": 512
}
]
},
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 6,
"y": 0
},
"id": 2,
"options": {
"colorMode": "background",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "go_memory_gc_goal_bytes / 1024 / 1024",
"format": "time_series",
"instant": true,
"legendFormat": "",
"refId": "A"
}
],
"title": "GC Target Heap (MiB)",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 2,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 10
},
{
"color": "red",
"value": 25
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 12,
"y": 0
},
"id": 3,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(http_server_request_duration_seconds_count[$__rate_interval]))",
"instant": false,
"legendFormat": "req/s",
"refId": "A"
}
],
"title": "HTTP Requests / s",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"color": {
"mode": "thresholds"
},
"decimals": 3,
"mappings": [],
"thresholds": {
"mode": "absolute",
"steps": [
{
"color": "green",
"value": null
},
{
"color": "orange",
"value": 0.1
},
{
"color": "red",
"value": 0.5
}
]
},
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 7,
"w": 6,
"x": 18,
"y": 0
},
"id": 4,
"options": {
"colorMode": "value",
"graphMode": "area",
"justifyMode": "auto",
"orientation": "horizontal",
"reduceOptions": {
"calcs": [
"lastNotNull"
],
"fields": "",
"values": false
},
"textMode": "value_and_name"
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_connection_errors_total{site_id=~\"$site_id\"}[$__rate_interval]))",
"instant": false,
"legendFormat": "errors/s",
"refId": "A"
}
],
"title": "Connection Errors / s",
"transformations": [],
"type": "stat"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "bytes"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 7
},
"id": 5,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(go_memory_used_bytes)",
"legendFormat": "Used",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "go_memory_gc_goal_bytes",
"legendFormat": "GC Goal",
"refId": "B"
}
],
"title": "Go Heap Usage vs GC Goal",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"decimals": 0,
"mappings": [],
"unit": "short"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 7
},
"id": 6,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(go_memory_allocations_total[$__rate_interval])",
"legendFormat": "Allocations/s",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "rate(go_memory_allocated_bytes_total[$__rate_interval])",
"legendFormat": "Allocated bytes/s",
"refId": "B"
}
],
"title": "Allocation Activity",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 16
},
"id": 7,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"title": "HTTP Request Duration Quantiles",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 16
},
"id": 8,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(http_server_request_duration_seconds_count[$__rate_interval])) by (http_response_status_code)",
"legendFormat": "{{http_response_status_code}}",
"refId": "A"
}
],
"title": "HTTP Requests by Status",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 25
},
"id": 9,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_connection_attempts_total{site_id=~\"$site_id\"}[$__rate_interval])) by (transport, result)",
"legendFormat": "{{transport}} • {{result}}",
"refId": "A"
}
],
"title": "Connection Attempts by Transport/Result",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "ops"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 25
},
"id": 10,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_connection_errors_total{site_id=~\"$site_id\"}[$__rate_interval])) by (transport, error_type)",
"legendFormat": "{{transport}} • {{error_type}}",
"refId": "A"
}
],
"title": "Connection Errors by Type",
"type": "timeseries"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"decimals": 3,
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 0,
"y": 34
},
"id": 11,
"options": {
"legend": {
"calcs": [],
"displayMode": "table",
"placement": "right"
},
"tooltip": {
"mode": "multi",
"sort": "desc"
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.5, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
"legendFormat": "p50",
"refId": "A"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.95, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
"legendFormat": "p95",
"refId": "B"
},
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "histogram_quantile(0.99, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
"legendFormat": "p99",
"refId": "C"
}
],
"title": "Tunnel Latency Quantiles",
"type": "timeseries"
},
{
"cards": {},
"color": {
"cardColor": "#b4ff00",
"colorScale": "sqrt",
"colorScheme": "interpolateTurbo"
},
"dataFormat": "tsbuckets",
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"fieldConfig": {
"defaults": {
"custom": {},
"mappings": [],
"unit": "s"
},
"overrides": []
},
"gridPos": {
"h": 9,
"w": 12,
"x": 12,
"y": 34
},
"heatmap": {},
"hideZeroBuckets": true,
"id": 12,
"legend": {
"show": false
},
"options": {
"calculate": true,
"cellGap": 2,
"cellSize": "auto",
"color": {
"exponent": 0.5
},
"exemplars": {
"color": "rgba(255,255,255,0.7)"
},
"filterValues": {
"le": 1e-9
},
"legend": {
"show": false
},
"tooltip": {
"mode": "single",
"show": true
},
"xAxis": {
"show": true
},
"yAxis": {
"decimals": 3,
"show": true
}
},
"pluginVersion": "11.1.0",
"targets": [
{
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"editorMode": "code",
"expr": "sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le)",
"format": "heatmap",
"legendFormat": "{{le}}",
"refId": "A"
}
],
"title": "Tunnel Latency Bucket Rate",
"type": "heatmap"
}
],
"refresh": "30s",
"schemaVersion": 39,
"style": "dark",
"tags": [
"newt",
"otel"
],
"templating": {
"list": [
{
"current": {
"selected": false,
"text": "Prometheus",
"value": "prometheus"
},
"hide": 0,
"label": "Datasource",
"name": "DS_PROMETHEUS",
"options": [],
"query": "prometheus",
"refresh": 1,
"type": "datasource"
},
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(target_info, site_id)",
"hide": 0,
"includeAll": true,
"label": "Site",
"multi": true,
"name": "site_id",
"options": [],
"query": {
"query": "label_values(target_info, site_id)",
"refId": "SiteIdVar"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
},
{
"current": {
"selected": false,
"text": "All",
"value": "$__all"
},
"datasource": {
"type": "prometheus",
"uid": "prometheus"
},
"definition": "label_values(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\"}, tunnel_id)",
"hide": 0,
"includeAll": true,
"label": "Tunnel",
"multi": true,
"name": "tunnel_id",
"options": [],
"query": {
"query": "label_values(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\"}, tunnel_id)",
"refId": "TunnelVar"
},
"refresh": 2,
"regex": "",
"skipUrlSync": false,
"sort": 1,
"tagValuesQuery": "",
"tags": [],
"tagsQuery": "",
"type": "query",
"useTags": false
}
]
},
"time": {
"from": "now-6h",
"to": "now"
},
"timepicker": {
"refresh_intervals": [
"10s",
"30s",
"1m",
"5m",
"15m",
"30m",
"1h",
"2h",
"1d"
],
"time_options": [
"5m",
"15m",
"1h",
"6h",
"12h",
"24h",
"2d",
"7d",
"30d"
]
},
"timezone": "browser",
"title": "Newt Overview",
"uid": "newt-overview",
"version": 1,
"weekStart": ""
}

View File

@@ -0,0 +1,9 @@
apiVersion: 1
providers:
- name: "newt"
folder: "Newt"
type: file
disableDeletion: false
allowUiUpdates: true
options:
path: /var/lib/grafana/dashboards

View File

@@ -0,0 +1,9 @@
apiVersion: 1
datasources:
- name: Prometheus
type: prometheus
access: proxy
url: http://prometheus:9090
uid: prometheus
isDefault: true
editable: true

View File

@@ -0,0 +1,61 @@
# Variant A: Direct scrape of Newt (/metrics) via Prometheus (no Collector needed)
# Note: Newt already exposes labels like site_id, protocol, direction. Do not promote
# resource attributes into labels when scraping Newt directly.
#
# Example Prometheus scrape config:
# global:
# scrape_interval: 15s
# scrape_configs:
# - job_name: newt
# static_configs:
# - targets: ["newt:2112"]
#
# Variant B: Use OTEL Collector (Newt -> OTLP -> Collector -> Prometheus)
# This pipeline scrapes metrics from the Collector's Prometheus exporter.
# Labels are already on datapoints; promotion from resource is OPTIONAL and typically NOT required.
# If you enable transform/promote below, ensure you do not duplicate labels.
receivers:
otlp:
protocols:
grpc:
endpoint: ":4317"
processors:
memory_limiter:
check_interval: 5s
limit_percentage: 80
spike_limit_percentage: 25
resourcedetection:
detectors: [env, system]
timeout: 5s
batch: {}
# OPTIONAL: Only enable if you need to promote resource attributes to labels.
# WARNING: Newt already provides site_id as a label; avoid double-promotion.
# transform/promote:
# error_mode: ignore
# metric_statements:
# - context: datapoint
# statements:
# - set(attributes["service_instance_id"], resource.attributes["service.instance.id"]) where resource.attributes["service.instance.id"] != nil
# - set(attributes["site_id"], resource.attributes["site_id"]) where resource.attributes["site_id"] != nil
exporters:
prometheus:
endpoint: ":8889"
send_timestamps: true
# prometheusremotewrite:
# endpoint: http://mimir:9009/api/v1/push
debug:
verbosity: basic
service:
pipelines:
metrics:
receivers: [otlp]
processors: [memory_limiter, resourcedetection, batch] # add transform/promote if you really need it
exporters: [prometheus]
traces:
receivers: [otlp]
processors: [memory_limiter, resourcedetection, batch]
exporters: [debug]

View File

@@ -0,0 +1,16 @@
global:
scrape_interval: 15s
scrape_configs:
# IMPORTANT: Do not scrape Newt directly; scrape only the Collector!
- job_name: 'otel-collector'
static_configs:
- targets: ['otel-collector:8889']
# optional: limit metric cardinality
relabel_configs:
- action: labeldrop
regex: 'tunnel_id'
# - action: keep
# source_labels: [site_id]
# regex: '(site-a|site-b)'

21
examples/prometheus.yml Normal file
View File

@@ -0,0 +1,21 @@
global:
scrape_interval: 15s
scrape_configs:
- job_name: 'newt'
scrape_interval: 15s
static_configs:
- targets: ['newt:2112'] # /metrics
relabel_configs:
# optional: drop tunnel_id
- action: labeldrop
regex: 'tunnel_id'
# optional: allow only specific sites
- action: keep
source_labels: [site_id]
regex: '(site-a|site-b)'
# WARNING: Do not enable this together with the 'newt' job above or you will double-count.
# - job_name: 'otel-collector'
# static_configs:
# - targets: ['otel-collector:8889']

55
go.mod
View File

@@ -6,29 +6,45 @@ require (
github.com/docker/docker v28.5.0+incompatible
github.com/google/gopacket v1.1.19
github.com/gorilla/websocket v1.5.3
github.com/prometheus/client_golang v1.23.2
github.com/vishvananda/netlink v1.3.1
golang.org/x/crypto v0.42.0
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0
go.opentelemetry.io/otel v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
go.opentelemetry.io/otel/exporters/prometheus v0.60.0
go.opentelemetry.io/otel/metric v1.38.0
go.opentelemetry.io/otel/sdk v1.38.0
go.opentelemetry.io/otel/sdk/metric v1.38.0
golang.org/x/crypto v0.43.0
golang.org/x/net v0.45.0
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10
google.golang.org/grpc v1.76.0
gopkg.in/yaml.v3 v3.0.1
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c
software.sslmate.com/src/go-pkcs12 v0.6.0
)
require (
github.com/Microsoft/go-winio v0.6.2 // indirect
github.com/containerd/errdefs v1.0.0 // indirect
github.com/Microsoft/go-winio v0.6.0 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/containerd/errdefs v0.3.0 // indirect
github.com/containerd/errdefs/pkg v0.3.0 // indirect
github.com/distribution/reference v0.6.0 // indirect
github.com/docker/go-connections v0.5.0 // indirect
github.com/docker/go-units v0.5.0 // indirect
github.com/docker/go-connections v0.6.0 // indirect
github.com/docker/go-units v0.4.0 // indirect
github.com/felixge/httpsnoop v1.0.4 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/btree v1.1.3 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
github.com/josharian/native v1.1.0 // indirect
github.com/mdlayher/genetlink v1.3.2 // indirect
github.com/mdlayher/netlink v1.7.2 // indirect
@@ -37,18 +53,29 @@ require (
github.com/moby/sys/atomicwriter v0.1.0 // indirect
github.com/moby/term v0.5.2 // indirect
github.com/morikuni/aec v1.0.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/opencontainers/go-digest v1.0.0 // indirect
github.com/opencontainers/image-spec v1.1.1 // indirect
github.com/opencontainers/image-spec v1.1.0 // indirect
github.com/pkg/errors v0.9.1 // indirect
github.com/prometheus/client_model v0.6.2 // indirect
github.com/prometheus/common v0.66.1 // indirect
github.com/prometheus/otlptranslator v0.0.2 // indirect
github.com/prometheus/procfs v0.17.0 // indirect
github.com/vishvananda/netns v0.0.5 // indirect
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect
go.opentelemetry.io/otel v1.37.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 // indirect
go.opentelemetry.io/otel/metric v1.37.0 // indirect
go.opentelemetry.io/otel/trace v1.37.0 // indirect
golang.org/x/sync v0.16.0 // indirect
golang.org/x/sys v0.36.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect
go.opentelemetry.io/otel/trace v1.38.0 // indirect
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
go.yaml.in/yaml/v2 v2.4.2 // indirect
golang.org/x/mod v0.28.0 // indirect
golang.org/x/sync v0.17.0 // indirect
golang.org/x/sys v0.37.0 // indirect
golang.org/x/text v0.30.0 // indirect
golang.org/x/time v0.12.0 // indirect
golang.org/x/tools v0.37.0 // indirect
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
google.golang.org/protobuf v1.36.8 // indirect
)

146
go.sum
View File

@@ -1,12 +1,15 @@
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8=
github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4=
github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
@@ -17,10 +20,10 @@ github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5Qvfr
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
github.com/docker/docker v28.5.0+incompatible h1:ZdSQoRUE9XxhFI/B8YLvhnEFMmYN9Pp8Egd2qcaFk1E=
github.com/docker/docker v28.5.0+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
@@ -28,6 +31,8 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
@@ -38,14 +43,20 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI=
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/klauspost/compress v1.18.0 h1:c/Cqfb0r+Yi+JtIEq73FWXVkRonBlf0CRNYc8Zttxdo=
github.com/klauspost/compress v1.18.0/go.mod h1:2Pp+KzxcywXVXMr50+X0Q/Lsb43OQHYWRCY2AiWywWQ=
github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE=
github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk=
github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY=
github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
@@ -64,71 +75,97 @@ github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
github.com/prometheus/otlptranslator v0.0.2 h1:+1CdeLVrRQ6Psmhnobldo0kTp96Rj80DRXRd5OSnMEQ=
github.com/prometheus/otlptranslator v0.0.2/go.mod h1:P8AwMgdD7XEr6QRUJ2QWLpiAZTgTE2UYgjlu3svompI=
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
github.com/rogpeppe/go-internal v1.13.1 h1:KvO1DLK/DRN07sQ1LQKScxyZJuNnedQ5/wKSR38lUII=
github.com/rogpeppe/go-internal v1.13.1/go.mod h1:uMEvuHeurkdAXX61udpOXGD/AzZDWNMNyH2VO9fmH0o=
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY=
go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 h1:nRVXXvf78e00EwY6Wp0YII8ww2JVWshZ20HfTlE11AM=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0/go.mod h1:r49hO7CgrxY9Voaj3Xe8pANWtr0Oq916d0XAmOoCZAQ=
go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI=
go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg=
go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc=
go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps=
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI=
go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ=
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw=
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 h1:vl9obrcoWVKp/lwl8tRE33853I8Xru9HFbw/skNeLs8=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0/go.mod h1:GAXRxmLJcVM3u22IjTg74zWBrRCKq8BnOqUVLodpcpw=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 h1:aTL7F04bJHUlztTsNGJ2l+6he8c+y/b//eR0jjjemT4=
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4=
go.opentelemetry.io/otel/exporters/prometheus v0.60.0 h1:cGtQxGvZbnrWdC2GyjZi0PDKVSLWP/Jocix3QWfXtbo=
go.opentelemetry.io/otel/exporters/prometheus v0.60.0/go.mod h1:hkd1EekxNo69PTV4OWFGZcKQiIqg0RfuWExcPKFvepk=
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
go.uber.org/goleak v1.3.0 h1:2K3zAYmnTNqV73imy9J1T3WC+gmCePx2hEGkimedGto=
go.uber.org/goleak v1.3.0/go.mod h1:CoHD4mav9JJNrW/WLlf7HGZPjdw8EucARQHekz1X6bE=
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
golang.org/x/crypto v0.42.0 h1:chiH31gIWm57EkTXpwnqf8qeuMUi0yekh6mT2AvFlqI=
golang.org/x/crypto v0.42.0/go.mod h1:4+rDnOTJhQCx2q7/j6rAN5XDw8kPjeaXEUR2eL94ix8=
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 h1:R9PFI6EUdfVKgwKjZef7QIwGcBKu86OEFpJ9nUEP2l4=
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792/go.mod h1:A+z0yzpGtvnG90cToK5n2tu8UJVP2XUATh+r+sfOOOc=
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
golang.org/x/net v0.45.0 h1:RLBg5JKixCy82FtLJpeNlVM0nrSqpCRYzVU1n8kj0tM=
golang.org/x/net v0.45.0/go.mod h1:ECOoLqd5U3Lhyeyo/QDCEVQ4sNgYsqvCZ722XogGieY=
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.36.0 h1:KVRy2GtZBrk1cBYA7MKu5bEZFxQk4NIDV6RLVcC8o0k=
golang.org/x/sys v0.36.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
golang.org/x/text v0.29.0 h1:1neNs90w9YzJ9BocxfsQNHKuAT4pkghyXc4nhZ6sJvk=
golang.org/x/text v0.29.0/go.mod h1:7MhJOA9CD2qZyOKYazxdYMF85OwPdEr9jTtBpO7ydH4=
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
@@ -136,15 +173,16 @@ golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+Z
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 h1:3GDAcqdIg1ozBNLgPy4SLT84nfcBjr6rhGtXYtrkWLU=
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10/go.mod h1:T97yPqesLiNrOYxkwmhMI0ZIlJDm+p0PMR8eRVeR5tQ=
google.golang.org/genproto v0.0.0-20230920204549-e6e6cdab5c13 h1:vlzZttNJGVqTsRFU9AmdnrcO1Znh8Ew9kCD//yjigk0=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 h1:Kog3KlB4xevJlAcbbbzPfRG0+X9fdoGM+UBRKVz6Wr0=
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237/go.mod h1:ezi0AVyMKDWy5xAncvjLWH7UcLBB5n7y2fQ8MzjJcto=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 h1:cJfm9zPbe1e873mHJzmQ1nwVEeRDU/T1wXDK2kUSU34=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA=
google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY=
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc=
google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A=
google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c=
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=

View File

@@ -0,0 +1,80 @@
package state
import (
"sync"
"sync/atomic"
"time"
"github.com/fosrl/newt/internal/telemetry"
)
// TelemetryView is a minimal, thread-safe implementation to feed observables.
// Since one Newt process represents one site, we expose a single logical site.
// site_id is a resource attribute, so we do not emit per-site labels here.
type TelemetryView struct {
online atomic.Bool
lastHBUnix atomic.Int64 // unix seconds
// per-tunnel sessions
sessMu sync.RWMutex
sessions map[string]*atomic.Int64
}
var (
globalView atomic.Pointer[TelemetryView]
)
// Global returns a singleton TelemetryView.
func Global() *TelemetryView {
if v := globalView.Load(); v != nil { return v }
v := &TelemetryView{ sessions: make(map[string]*atomic.Int64) }
globalView.Store(v)
telemetry.RegisterStateView(v)
return v
}
// Instrumentation helpers
func (v *TelemetryView) IncSessions(tunnelID string) {
v.sessMu.Lock(); defer v.sessMu.Unlock()
c := v.sessions[tunnelID]
if c == nil { c = &atomic.Int64{}; v.sessions[tunnelID] = c }
c.Add(1)
}
func (v *TelemetryView) DecSessions(tunnelID string) {
v.sessMu.Lock(); defer v.sessMu.Unlock()
if c := v.sessions[tunnelID]; c != nil {
c.Add(-1)
if c.Load() <= 0 { delete(v.sessions, tunnelID) }
}
}
func (v *TelemetryView) ClearTunnel(tunnelID string) {
v.sessMu.Lock(); defer v.sessMu.Unlock()
delete(v.sessions, tunnelID)
}
func (v *TelemetryView) SetOnline(b bool) { v.online.Store(b) }
func (v *TelemetryView) TouchHeartbeat() { v.lastHBUnix.Store(time.Now().Unix()) }
// --- telemetry.StateView interface ---
func (v *TelemetryView) ListSites() []string { return []string{"self"} }
func (v *TelemetryView) Online(_ string) (bool, bool) { return v.online.Load(), true }
func (v *TelemetryView) LastHeartbeat(_ string) (time.Time, bool) {
sec := v.lastHBUnix.Load()
if sec == 0 { return time.Time{}, false }
return time.Unix(sec, 0), true
}
func (v *TelemetryView) ActiveSessions(_ string) (int64, bool) {
// aggregated sessions (not used for per-tunnel gauge)
v.sessMu.RLock(); defer v.sessMu.RUnlock()
var sum int64
for _, c := range v.sessions { if c != nil { sum += c.Load() } }
return sum, true
}
// Extended accessor used by telemetry callback to publish per-tunnel samples.
func (v *TelemetryView) SessionsByTunnel() map[string]int64 {
v.sessMu.RLock(); defer v.sessMu.RUnlock()
out := make(map[string]int64, len(v.sessions))
for id, c := range v.sessions { if c != nil && c.Load() > 0 { out[id] = c.Load() } }
return out
}

View File

@@ -0,0 +1,19 @@
package telemetry
// Protocol labels (low-cardinality)
const (
ProtocolTCP = "tcp"
ProtocolUDP = "udp"
)
// Reconnect reason bins (fixed, low-cardinality)
const (
ReasonServerRequest = "server_request"
ReasonTimeout = "timeout"
ReasonPeerClose = "peer_close"
ReasonNetworkChange = "network_change"
ReasonAuthError = "auth_error"
ReasonHandshakeError = "handshake_error"
ReasonConfigChange = "config_change"
ReasonError = "error"
)

View File

@@ -0,0 +1,32 @@
package telemetry
import "testing"
func TestAllowedConstants(t *testing.T) {
allowedReasons := map[string]struct{}{
ReasonServerRequest: {},
ReasonTimeout: {},
ReasonPeerClose: {},
ReasonNetworkChange: {},
ReasonAuthError: {},
ReasonHandshakeError: {},
ReasonConfigChange: {},
ReasonError: {},
}
for k := range allowedReasons {
if k == "" {
t.Fatalf("empty reason constant")
}
}
allowedProtocols := map[string]struct{}{
ProtocolTCP: {},
ProtocolUDP: {},
}
for k := range allowedProtocols {
if k == "" {
t.Fatalf("empty protocol constant")
}
}
}

View File

@@ -0,0 +1,542 @@
package telemetry
import (
"context"
"sync"
"sync/atomic"
"time"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// Instruments and helpers for Newt metrics following the naming, units, and
// low-cardinality label guidance from the issue description.
//
// Counters end with _total, durations are in seconds, sizes in bytes.
// Only low-cardinality stable labels are supported: tunnel_id,
// transport, direction, result, reason, error_type.
var (
initOnce sync.Once
meter metric.Meter
// Site / Registration
mSiteRegistrations metric.Int64Counter
mSiteOnline metric.Int64ObservableGauge
mSiteLastHeartbeat metric.Float64ObservableGauge
// Tunnel / Sessions
mTunnelSessions metric.Int64ObservableGauge
mTunnelBytes metric.Int64Counter
mTunnelLatency metric.Float64Histogram
mReconnects metric.Int64Counter
// Connection / NAT
mConnAttempts metric.Int64Counter
mConnErrors metric.Int64Counter
// Config/Restart
mConfigReloads metric.Int64Counter
mConfigApply metric.Float64Histogram
mCertRotationTotal metric.Int64Counter
mProcessStartTime metric.Float64ObservableGauge
// Build info
mBuildInfo metric.Int64ObservableGauge
// WebSocket
mWSConnectLatency metric.Float64Histogram
mWSMessages metric.Int64Counter
mWSDisconnects metric.Int64Counter
mWSKeepaliveFailure metric.Int64Counter
mWSSessionDuration metric.Float64Histogram
mWSConnected metric.Int64ObservableGauge
mWSReconnects metric.Int64Counter
// Proxy
mProxyActiveConns metric.Int64ObservableGauge
mProxyBufferBytes metric.Int64ObservableGauge
mProxyAsyncBacklogByte metric.Int64ObservableGauge
mProxyDropsTotal metric.Int64Counter
mProxyAcceptsTotal metric.Int64Counter
mProxyConnDuration metric.Float64Histogram
mProxyConnectionsTotal metric.Int64Counter
buildVersion string
buildCommit string
processStartUnix = float64(time.Now().UnixNano()) / 1e9
wsConnectedState atomic.Int64
)
// Proxy connection lifecycle events.
const (
ProxyConnectionOpened = "opened"
ProxyConnectionClosed = "closed"
)
// attrsWithSite appends site/region labels only when explicitly enabled to keep
// label cardinality low by default.
func attrsWithSite(extra ...attribute.KeyValue) []attribute.KeyValue {
attrs := make([]attribute.KeyValue, len(extra))
copy(attrs, extra)
if ShouldIncludeSiteLabels() {
attrs = append(attrs, siteAttrs()...)
}
return attrs
}
func registerInstruments() error {
var err error
initOnce.Do(func() {
meter = otel.Meter("newt")
if e := registerSiteInstruments(); e != nil {
err = e
return
}
if e := registerTunnelInstruments(); e != nil {
err = e
return
}
if e := registerConnInstruments(); e != nil {
err = e
return
}
if e := registerConfigInstruments(); e != nil {
err = e
return
}
if e := registerBuildWSProxyInstruments(); e != nil {
err = e
return
}
})
return err
}
func registerSiteInstruments() error {
var err error
mSiteRegistrations, err = meter.Int64Counter("newt_site_registrations_total",
metric.WithDescription("Total site registration attempts"))
if err != nil {
return err
}
mSiteOnline, err = meter.Int64ObservableGauge("newt_site_online",
metric.WithDescription("Site online (0/1)"))
if err != nil {
return err
}
mSiteLastHeartbeat, err = meter.Float64ObservableGauge("newt_site_last_heartbeat_timestamp_seconds",
metric.WithDescription("Unix timestamp of the last site heartbeat"),
metric.WithUnit("s"))
if err != nil {
return err
}
return nil
}
func registerTunnelInstruments() error {
var err error
mTunnelSessions, err = meter.Int64ObservableGauge("newt_tunnel_sessions",
metric.WithDescription("Active tunnel sessions"))
if err != nil {
return err
}
mTunnelBytes, err = meter.Int64Counter("newt_tunnel_bytes_total",
metric.WithDescription("Tunnel bytes ingress/egress"),
metric.WithUnit("By"))
if err != nil {
return err
}
mTunnelLatency, err = meter.Float64Histogram("newt_tunnel_latency_seconds",
metric.WithDescription("Per-tunnel latency in seconds"),
metric.WithUnit("s"))
if err != nil {
return err
}
mReconnects, err = meter.Int64Counter("newt_tunnel_reconnects_total",
metric.WithDescription("Tunnel reconnect events"))
if err != nil {
return err
}
return nil
}
func registerConnInstruments() error {
var err error
mConnAttempts, err = meter.Int64Counter("newt_connection_attempts_total",
metric.WithDescription("Connection attempts"))
if err != nil {
return err
}
mConnErrors, err = meter.Int64Counter("newt_connection_errors_total",
metric.WithDescription("Connection errors by type"))
if err != nil {
return err
}
return nil
}
func registerConfigInstruments() error {
mConfigReloads, _ = meter.Int64Counter("newt_config_reloads_total",
metric.WithDescription("Configuration reloads"))
mConfigApply, _ = meter.Float64Histogram("newt_config_apply_seconds",
metric.WithDescription("Configuration apply duration in seconds"),
metric.WithUnit("s"))
mCertRotationTotal, _ = meter.Int64Counter("newt_cert_rotation_total",
metric.WithDescription("Certificate rotation events (success/failure)"))
mProcessStartTime, _ = meter.Float64ObservableGauge("process_start_time_seconds",
metric.WithDescription("Unix timestamp of the process start time"),
metric.WithUnit("s"))
if mProcessStartTime != nil {
if _, err := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
o.ObserveFloat64(mProcessStartTime, processStartUnix)
return nil
}, mProcessStartTime); err != nil {
otel.Handle(err)
}
}
return nil
}
func registerBuildWSProxyInstruments() error {
// Build info gauge (value 1 with version/commit attributes)
mBuildInfo, _ = meter.Int64ObservableGauge("newt_build_info",
metric.WithDescription("Newt build information (value is always 1)"))
// WebSocket
mWSConnectLatency, _ = meter.Float64Histogram("newt_websocket_connect_latency_seconds",
metric.WithDescription("WebSocket connect latency in seconds"),
metric.WithUnit("s"))
mWSMessages, _ = meter.Int64Counter("newt_websocket_messages_total",
metric.WithDescription("WebSocket messages by direction and type"))
mWSDisconnects, _ = meter.Int64Counter("newt_websocket_disconnects_total",
metric.WithDescription("WebSocket disconnects by reason/result"))
mWSKeepaliveFailure, _ = meter.Int64Counter("newt_websocket_keepalive_failures_total",
metric.WithDescription("WebSocket keepalive (ping/pong) failures"))
mWSSessionDuration, _ = meter.Float64Histogram("newt_websocket_session_duration_seconds",
metric.WithDescription("Duration of established WebSocket sessions"),
metric.WithUnit("s"))
mWSConnected, _ = meter.Int64ObservableGauge("newt_websocket_connected",
metric.WithDescription("WebSocket connection state (1=connected, 0=disconnected)"))
mWSReconnects, _ = meter.Int64Counter("newt_websocket_reconnects_total",
metric.WithDescription("WebSocket reconnect attempts by reason"))
// Proxy
mProxyActiveConns, _ = meter.Int64ObservableGauge("newt_proxy_active_connections",
metric.WithDescription("Proxy active connections per tunnel and protocol"))
mProxyBufferBytes, _ = meter.Int64ObservableGauge("newt_proxy_buffer_bytes",
metric.WithDescription("Proxy buffer bytes (may approximate async backlog)"),
metric.WithUnit("By"))
mProxyAsyncBacklogByte, _ = meter.Int64ObservableGauge("newt_proxy_async_backlog_bytes",
metric.WithDescription("Unflushed async byte backlog per tunnel and protocol"),
metric.WithUnit("By"))
mProxyDropsTotal, _ = meter.Int64Counter("newt_proxy_drops_total",
metric.WithDescription("Proxy drops due to write errors"))
mProxyAcceptsTotal, _ = meter.Int64Counter("newt_proxy_accept_total",
metric.WithDescription("Proxy connection accepts by protocol and result"))
mProxyConnDuration, _ = meter.Float64Histogram("newt_proxy_connection_duration_seconds",
metric.WithDescription("Duration of completed proxy connections"),
metric.WithUnit("s"))
mProxyConnectionsTotal, _ = meter.Int64Counter("newt_proxy_connections_total",
metric.WithDescription("Proxy connection lifecycle events by protocol"))
// Register a default callback for build info if version/commit set
reg, e := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
if buildVersion == "" && buildCommit == "" {
return nil
}
attrs := []attribute.KeyValue{}
if buildVersion != "" {
attrs = append(attrs, attribute.String("version", buildVersion))
}
if buildCommit != "" {
attrs = append(attrs, attribute.String("commit", buildCommit))
}
if ShouldIncludeSiteLabels() {
attrs = append(attrs, siteAttrs()...)
}
o.ObserveInt64(mBuildInfo, 1, metric.WithAttributes(attrs...))
return nil
}, mBuildInfo)
if e != nil {
otel.Handle(e)
} else {
// Provide a functional stopper that unregisters the callback
obsStopper = func() { _ = reg.Unregister() }
}
if mWSConnected != nil {
if regConn, err := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
val := wsConnectedState.Load()
o.ObserveInt64(mWSConnected, val, metric.WithAttributes(attrsWithSite()...))
return nil
}, mWSConnected); err != nil {
otel.Handle(err)
} else {
wsConnStopper = func() { _ = regConn.Unregister() }
}
}
return nil
}
// Observable registration: Newt can register a callback to report gauges.
// Call SetObservableCallback once to start observing online status, last
// heartbeat seconds, and active sessions.
var (
obsOnce sync.Once
obsStopper func()
proxyObsOnce sync.Once
proxyStopper func()
wsConnStopper func()
)
// SetObservableCallback registers a single callback that will be invoked
// on collection. Use the provided observer to emit values for the observable
// gauges defined here.
//
// Example inside your code (where you have access to current state):
//
// telemetry.SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
// o.ObserveInt64(mSiteOnline, 1)
// o.ObserveFloat64(mSiteLastHeartbeat, float64(lastHB.Unix()))
// o.ObserveInt64(mTunnelSessions, int64(len(activeSessions)))
// return nil
// })
func SetObservableCallback(cb func(context.Context, metric.Observer) error) {
obsOnce.Do(func() {
reg, e := meter.RegisterCallback(cb, mSiteOnline, mSiteLastHeartbeat, mTunnelSessions)
if e != nil {
otel.Handle(e)
obsStopper = func() {
// no-op: registration failed; keep stopper callable
}
return
}
// Provide a functional stopper mirroring proxy/build-info behavior
obsStopper = func() { _ = reg.Unregister() }
})
}
// SetProxyObservableCallback registers a callback to observe proxy gauges.
func SetProxyObservableCallback(cb func(context.Context, metric.Observer) error) {
proxyObsOnce.Do(func() {
reg, e := meter.RegisterCallback(cb, mProxyActiveConns, mProxyBufferBytes, mProxyAsyncBacklogByte)
if e != nil {
otel.Handle(e)
proxyStopper = func() {
// no-op: registration failed; keep stopper callable
}
return
}
// Provide a functional stopper to unregister later if needed
proxyStopper = func() { _ = reg.Unregister() }
})
}
// Build info registration
func RegisterBuildInfo(version, commit string) {
buildVersion = version
buildCommit = commit
}
// Config reloads
func IncConfigReload(ctx context.Context, result string) {
mConfigReloads.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("result", result),
)...))
}
// Helpers for counters/histograms
func IncSiteRegistration(ctx context.Context, result string) {
attrs := []attribute.KeyValue{
attribute.String("result", result),
}
mSiteRegistrations.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func AddTunnelBytes(ctx context.Context, tunnelID, direction string, n int64) {
attrs := []attribute.KeyValue{
attribute.String("direction", direction),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mTunnelBytes.Add(ctx, n, metric.WithAttributes(attrsWithSite(attrs...)...))
}
// AddTunnelBytesSet adds bytes using a pre-built attribute.Set to avoid per-call allocations.
func AddTunnelBytesSet(ctx context.Context, n int64, attrs attribute.Set) {
mTunnelBytes.Add(ctx, n, metric.WithAttributeSet(attrs))
}
// --- WebSocket helpers ---
func ObserveWSConnectLatency(ctx context.Context, seconds float64, result, errorType string) {
attrs := []attribute.KeyValue{
attribute.String("transport", "websocket"),
attribute.String("result", result),
}
if errorType != "" {
attrs = append(attrs, attribute.String("error_type", errorType))
}
mWSConnectLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncWSMessage(ctx context.Context, direction, msgType string) {
mWSMessages.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("direction", direction),
attribute.String("msg_type", msgType),
)...))
}
func IncWSDisconnect(ctx context.Context, reason, result string) {
mWSDisconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("reason", reason),
attribute.String("result", result),
)...))
}
func IncWSKeepaliveFailure(ctx context.Context, reason string) {
mWSKeepaliveFailure.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("reason", reason),
)...))
}
// SetWSConnectionState updates the backing gauge for the WebSocket connected state.
func SetWSConnectionState(connected bool) {
if connected {
wsConnectedState.Store(1)
} else {
wsConnectedState.Store(0)
}
}
// IncWSReconnect increments the WebSocket reconnect counter with a bounded reason label.
func IncWSReconnect(ctx context.Context, reason string) {
if reason == "" {
reason = "unknown"
}
mWSReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("reason", reason),
)...))
}
func ObserveWSSessionDuration(ctx context.Context, seconds float64, result string) {
mWSSessionDuration.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
attribute.String("result", result),
)...))
}
// --- Proxy helpers ---
func ObserveProxyActiveConnsObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
o.ObserveInt64(mProxyActiveConns, value, metric.WithAttributes(attrs...))
}
func ObserveProxyBufferBytesObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
o.ObserveInt64(mProxyBufferBytes, value, metric.WithAttributes(attrs...))
}
func ObserveProxyAsyncBacklogObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
o.ObserveInt64(mProxyAsyncBacklogByte, value, metric.WithAttributes(attrs...))
}
func IncProxyDrops(ctx context.Context, tunnelID, protocol string) {
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyDropsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncProxyAccept(ctx context.Context, tunnelID, protocol, result, reason string) {
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
attribute.String("result", result),
}
if reason != "" {
attrs = append(attrs, attribute.String("reason", reason))
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyAcceptsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func ObserveProxyConnectionDuration(ctx context.Context, tunnelID, protocol, result string, seconds float64) {
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
attribute.String("result", result),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyConnDuration.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
}
// IncProxyConnectionEvent records proxy connection lifecycle events (opened/closed).
func IncProxyConnectionEvent(ctx context.Context, tunnelID, protocol, event string) {
if event == "" {
event = "unknown"
}
attrs := []attribute.KeyValue{
attribute.String("protocol", protocol),
attribute.String("event", event),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mProxyConnectionsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
// --- Config/PKI helpers ---
func ObserveConfigApply(ctx context.Context, phase, result string, seconds float64) {
mConfigApply.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
attribute.String("phase", phase),
attribute.String("result", result),
)...))
}
func IncCertRotation(ctx context.Context, result string) {
mCertRotationTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("result", result),
)...))
}
func ObserveTunnelLatency(ctx context.Context, tunnelID, transport string, seconds float64) {
attrs := []attribute.KeyValue{
attribute.String("transport", transport),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mTunnelLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncReconnect(ctx context.Context, tunnelID, initiator, reason string) {
attrs := []attribute.KeyValue{
attribute.String("initiator", initiator),
attribute.String("reason", reason),
}
if ShouldIncludeTunnelID() && tunnelID != "" {
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
}
mReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
}
func IncConnAttempt(ctx context.Context, transport, result string) {
mConnAttempts.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("transport", transport),
attribute.String("result", result),
)...))
}
func IncConnError(ctx context.Context, transport, typ string) {
mConnErrors.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
attribute.String("transport", transport),
attribute.String("error_type", typ),
)...))
}

View File

@@ -0,0 +1,59 @@
package telemetry
import (
"sync"
"time"
)
func resetMetricsForTest() {
initOnce = sync.Once{}
obsOnce = sync.Once{}
proxyObsOnce = sync.Once{}
obsStopper = nil
proxyStopper = nil
if wsConnStopper != nil {
wsConnStopper()
}
wsConnStopper = nil
meter = nil
mSiteRegistrations = nil
mSiteOnline = nil
mSiteLastHeartbeat = nil
mTunnelSessions = nil
mTunnelBytes = nil
mTunnelLatency = nil
mReconnects = nil
mConnAttempts = nil
mConnErrors = nil
mConfigReloads = nil
mConfigApply = nil
mCertRotationTotal = nil
mProcessStartTime = nil
mBuildInfo = nil
mWSConnectLatency = nil
mWSMessages = nil
mWSDisconnects = nil
mWSKeepaliveFailure = nil
mWSSessionDuration = nil
mWSConnected = nil
mWSReconnects = nil
mProxyActiveConns = nil
mProxyBufferBytes = nil
mProxyAsyncBacklogByte = nil
mProxyDropsTotal = nil
mProxyAcceptsTotal = nil
mProxyConnDuration = nil
mProxyConnectionsTotal = nil
processStartUnix = float64(time.Now().UnixNano()) / 1e9
wsConnectedState.Store(0)
includeTunnelIDVal.Store(false)
includeSiteLabelVal.Store(false)
}

View File

@@ -0,0 +1,106 @@
package telemetry
import (
"context"
"sync/atomic"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
)
// StateView provides a read-only view for observable gauges.
// Implementations must be concurrency-safe and avoid blocking operations.
// All methods should be fast and use RLocks where applicable.
type StateView interface {
// ListSites returns a stable, low-cardinality list of site IDs to expose.
ListSites() []string
// Online returns whether the site is online.
Online(siteID string) (online bool, ok bool)
// LastHeartbeat returns the last heartbeat time for a site.
LastHeartbeat(siteID string) (t time.Time, ok bool)
// ActiveSessions returns the current number of active sessions for a site (across tunnels),
// or scoped to site if your model is site-scoped.
ActiveSessions(siteID string) (n int64, ok bool)
}
var (
stateView atomic.Value // of type StateView
)
// RegisterStateView sets the global StateView used by the default observable callback.
func RegisterStateView(v StateView) {
stateView.Store(v)
// If instruments are registered, ensure a callback exists.
if v != nil {
SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
if any := stateView.Load(); any != nil {
if sv, ok := any.(StateView); ok {
for _, siteID := range sv.ListSites() {
observeSiteOnlineFor(o, sv, siteID)
observeLastHeartbeatFor(o, sv, siteID)
observeSessionsFor(o, siteID, sv)
}
}
}
return nil
})
}
}
func observeSiteOnlineFor(o metric.Observer, sv StateView, siteID string) {
if online, ok := sv.Online(siteID); ok {
val := int64(0)
if online {
val = 1
}
o.ObserveInt64(mSiteOnline, val, metric.WithAttributes(
attribute.String("site_id", siteID),
))
}
}
func observeLastHeartbeatFor(o metric.Observer, sv StateView, siteID string) {
if t, ok := sv.LastHeartbeat(siteID); ok {
ts := float64(t.UnixNano()) / 1e9
o.ObserveFloat64(mSiteLastHeartbeat, ts, metric.WithAttributes(
attribute.String("site_id", siteID),
))
}
}
func observeSessionsFor(o metric.Observer, siteID string, any interface{}) {
if tm, ok := any.(interface{ SessionsByTunnel() map[string]int64 }); ok {
sessions := tm.SessionsByTunnel()
// If tunnel_id labels are enabled, preserve existing per-tunnel observations
if ShouldIncludeTunnelID() {
for tid, n := range sessions {
attrs := []attribute.KeyValue{
attribute.String("site_id", siteID),
}
if tid != "" {
attrs = append(attrs, attribute.String("tunnel_id", tid))
}
o.ObserveInt64(mTunnelSessions, n, metric.WithAttributes(attrs...))
}
return
}
// When tunnel_id is disabled, collapse per-tunnel counts into a single site-level value
var total int64
for _, n := range sessions {
total += n
}
// If there are no per-tunnel entries, fall back to ActiveSessions() if available
if total == 0 {
if svAny := stateView.Load(); svAny != nil {
if sv, ok := svAny.(StateView); ok {
if n, ok2 := sv.ActiveSessions(siteID); ok2 {
total = n
}
}
}
}
o.ObserveInt64(mTunnelSessions, total, metric.WithAttributes(attribute.String("site_id", siteID)))
return
}
}

View File

@@ -0,0 +1,384 @@
package telemetry
import (
"context"
"errors"
"net/http"
"os"
"strings"
"sync/atomic"
"time"
promclient "github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"go.opentelemetry.io/contrib/instrumentation/runtime"
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
"go.opentelemetry.io/otel/exporters/prometheus"
"go.opentelemetry.io/otel/sdk/metric"
"go.opentelemetry.io/otel/sdk/resource"
"go.opentelemetry.io/otel/sdk/trace"
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
"google.golang.org/grpc/credentials"
)
// Config controls telemetry initialization via env flags.
//
// Defaults align with the issue requirements:
// - Prometheus exporter enabled by default (/metrics)
// - OTLP exporter disabled by default
// - Durations in seconds, bytes in raw bytes
// - Admin HTTP server address configurable (for mounting /metrics)
type Config struct {
ServiceName string
ServiceVersion string
// Optional resource attributes
SiteID string
Region string
PromEnabled bool
OTLPEnabled bool
OTLPEndpoint string // host:port
OTLPInsecure bool
MetricExportInterval time.Duration
AdminAddr string // e.g.: ":2112"
// Optional build info for newt_build_info metric
BuildVersion string
BuildCommit string
}
// FromEnv reads configuration from environment variables.
//
// NEWT_METRICS_PROMETHEUS_ENABLED (default: true)
// NEWT_METRICS_OTLP_ENABLED (default: false)
// OTEL_EXPORTER_OTLP_ENDPOINT (default: "localhost:4317")
// OTEL_EXPORTER_OTLP_INSECURE (default: true)
// OTEL_METRIC_EXPORT_INTERVAL (default: 15s)
// OTEL_SERVICE_NAME (default: "newt")
// OTEL_SERVICE_VERSION (default: "")
// NEWT_ADMIN_ADDR (default: ":2112")
func FromEnv() Config {
// Prefer explicit NEWT_* env vars, then fall back to OTEL_RESOURCE_ATTRIBUTES
site := getenv("NEWT_SITE_ID", "")
if site == "" {
site = getenv("NEWT_ID", "")
}
region := os.Getenv("NEWT_REGION")
if site == "" || region == "" {
if ra := os.Getenv("OTEL_RESOURCE_ATTRIBUTES"); ra != "" {
m := parseResourceAttributes(ra)
if site == "" {
site = m["site_id"]
}
if region == "" {
region = m["region"]
}
}
}
return Config{
ServiceName: getenv("OTEL_SERVICE_NAME", "newt"),
ServiceVersion: os.Getenv("OTEL_SERVICE_VERSION"),
SiteID: site,
Region: region,
PromEnabled: getenv("NEWT_METRICS_PROMETHEUS_ENABLED", "true") == "true",
OTLPEnabled: getenv("NEWT_METRICS_OTLP_ENABLED", "false") == "true",
OTLPEndpoint: getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317"),
OTLPInsecure: getenv("OTEL_EXPORTER_OTLP_INSECURE", "true") == "true",
MetricExportInterval: getdur("OTEL_METRIC_EXPORT_INTERVAL", 15*time.Second),
AdminAddr: getenv("NEWT_ADMIN_ADDR", ":2112"),
}
}
// Setup holds initialized telemetry providers and (optionally) a /metrics handler.
// Call Shutdown when the process terminates to flush exporters.
type Setup struct {
MeterProvider *metric.MeterProvider
TracerProvider *trace.TracerProvider
PrometheusHandler http.Handler // nil if Prometheus exporter disabled
shutdowns []func(context.Context) error
}
// Init configures OpenTelemetry metrics and (optionally) tracing.
//
// It sets a global MeterProvider and TracerProvider, registers runtime instrumentation,
// installs recommended histogram views for *_latency_seconds, and returns a Setup with
// a Shutdown method to flush exporters.
func Init(ctx context.Context, cfg Config) (*Setup, error) {
// Configure tunnel_id label inclusion from env (default true)
if getenv("NEWT_METRICS_INCLUDE_TUNNEL_ID", "true") == "true" {
includeTunnelIDVal.Store(true)
} else {
includeTunnelIDVal.Store(false)
}
if getenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true") == "true" {
includeSiteLabelVal.Store(true)
} else {
includeSiteLabelVal.Store(false)
}
res := buildResource(ctx, cfg)
UpdateSiteInfo(cfg.SiteID, cfg.Region)
s := &Setup{}
readers, promHandler, shutdowns, err := setupMetricExport(ctx, cfg, res)
if err != nil {
return nil, err
}
s.PrometheusHandler = promHandler
// Build provider
mp := buildMeterProvider(res, readers)
otel.SetMeterProvider(mp)
s.MeterProvider = mp
s.shutdowns = append(s.shutdowns, mp.Shutdown)
// Optional tracing
if cfg.OTLPEnabled {
if tp, shutdown := setupTracing(ctx, cfg, res); tp != nil {
otel.SetTracerProvider(tp)
s.TracerProvider = tp
s.shutdowns = append(s.shutdowns, func(c context.Context) error {
return errors.Join(shutdown(c), tp.Shutdown(c))
})
}
}
// Add metric exporter shutdowns
s.shutdowns = append(s.shutdowns, shutdowns...)
// Runtime metrics
_ = runtime.Start(runtime.WithMeterProvider(mp))
// Instruments
if err := registerInstruments(); err != nil {
return nil, err
}
if cfg.BuildVersion != "" || cfg.BuildCommit != "" {
RegisterBuildInfo(cfg.BuildVersion, cfg.BuildCommit)
}
return s, nil
}
func buildResource(ctx context.Context, cfg Config) *resource.Resource {
attrs := []attribute.KeyValue{
semconv.ServiceName(cfg.ServiceName),
semconv.ServiceVersion(cfg.ServiceVersion),
}
if cfg.SiteID != "" {
attrs = append(attrs, attribute.String("site_id", cfg.SiteID))
}
if cfg.Region != "" {
attrs = append(attrs, attribute.String("region", cfg.Region))
}
res, _ := resource.New(ctx, resource.WithFromEnv(), resource.WithHost(), resource.WithAttributes(attrs...))
return res
}
func setupMetricExport(ctx context.Context, cfg Config, _ *resource.Resource) ([]metric.Reader, http.Handler, []func(context.Context) error, error) {
var readers []metric.Reader
var shutdowns []func(context.Context) error
var promHandler http.Handler
if cfg.PromEnabled {
reg := promclient.NewRegistry()
exp, err := prometheus.New(prometheus.WithRegisterer(reg))
if err != nil {
return nil, nil, nil, err
}
readers = append(readers, exp)
promHandler = promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
}
if cfg.OTLPEnabled {
mopts := []otlpmetricgrpc.Option{otlpmetricgrpc.WithEndpoint(cfg.OTLPEndpoint)}
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
mopts = append(mopts, otlpmetricgrpc.WithHeaders(hdrs))
}
if cfg.OTLPInsecure {
mopts = append(mopts, otlpmetricgrpc.WithInsecure())
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
if creds, cerr := credentials.NewClientTLSFromFile(certFile, ""); cerr == nil {
mopts = append(mopts, otlpmetricgrpc.WithTLSCredentials(creds))
}
}
mexp, err := otlpmetricgrpc.New(ctx, mopts...)
if err != nil {
return nil, nil, nil, err
}
readers = append(readers, metric.NewPeriodicReader(mexp, metric.WithInterval(cfg.MetricExportInterval)))
shutdowns = append(shutdowns, mexp.Shutdown)
}
return readers, promHandler, shutdowns, nil
}
func buildMeterProvider(res *resource.Resource, readers []metric.Reader) *metric.MeterProvider {
var mpOpts []metric.Option
mpOpts = append(mpOpts, metric.WithResource(res))
for _, r := range readers {
mpOpts = append(mpOpts, metric.WithReader(r))
}
mpOpts = append(mpOpts, metric.WithView(metric.NewView(
metric.Instrument{Name: "newt_*_latency_seconds"},
metric.Stream{Aggregation: metric.AggregationExplicitBucketHistogram{Boundaries: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}}},
)))
mpOpts = append(mpOpts, metric.WithView(metric.NewView(
metric.Instrument{Name: "newt_*"},
metric.Stream{AttributeFilter: func(kv attribute.KeyValue) bool {
k := string(kv.Key)
switch k {
case "tunnel_id", "transport", "direction", "protocol", "result", "reason", "initiator", "error_type", "msg_type", "phase", "version", "commit", "site_id", "region":
return true
default:
return false
}
}},
)))
return metric.NewMeterProvider(mpOpts...)
}
func setupTracing(ctx context.Context, cfg Config, res *resource.Resource) (*trace.TracerProvider, func(context.Context) error) {
topts := []otlptracegrpc.Option{otlptracegrpc.WithEndpoint(cfg.OTLPEndpoint)}
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
topts = append(topts, otlptracegrpc.WithHeaders(hdrs))
}
if cfg.OTLPInsecure {
topts = append(topts, otlptracegrpc.WithInsecure())
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
if creds, cerr := credentials.NewClientTLSFromFile(certFile, ""); cerr == nil {
topts = append(topts, otlptracegrpc.WithTLSCredentials(creds))
}
}
exp, err := otlptracegrpc.New(ctx, topts...)
if err != nil {
return nil, nil
}
tp := trace.NewTracerProvider(trace.WithBatcher(exp), trace.WithResource(res))
return tp, exp.Shutdown
}
// Shutdown flushes exporters and providers in reverse init order.
func (s *Setup) Shutdown(ctx context.Context) error {
var err error
for i := len(s.shutdowns) - 1; i >= 0; i-- {
err = errors.Join(err, s.shutdowns[i](ctx))
}
return err
}
func parseOTLPHeaders(h string) map[string]string {
m := map[string]string{}
if h == "" {
return m
}
pairs := strings.Split(h, ",")
for _, p := range pairs {
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
if len(kv) == 2 {
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
}
}
return m
}
// parseResourceAttributes parses OTEL_RESOURCE_ATTRIBUTES formatted as k=v,k2=v2
func parseResourceAttributes(s string) map[string]string {
m := map[string]string{}
if s == "" {
return m
}
parts := strings.Split(s, ",")
for _, p := range parts {
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
if len(kv) == 2 {
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
}
}
return m
}
// Global site/region used to enrich metric labels.
var siteIDVal atomic.Value
var regionVal atomic.Value
var (
includeTunnelIDVal atomic.Value // bool; default true
includeSiteLabelVal atomic.Value // bool; default false
)
// UpdateSiteInfo updates the global site_id and region used for metric labels.
// Thread-safe via atomic.Value: subsequent metric emissions will include
// the new labels, prior emissions remain unchanged.
func UpdateSiteInfo(siteID, region string) {
if siteID != "" {
siteIDVal.Store(siteID)
}
if region != "" {
regionVal.Store(region)
}
}
func getSiteID() string {
if v, ok := siteIDVal.Load().(string); ok {
return v
}
return ""
}
func getRegion() string {
if v, ok := regionVal.Load().(string); ok {
return v
}
return ""
}
// siteAttrs returns label KVs for site_id and region (if set).
func siteAttrs() []attribute.KeyValue {
var out []attribute.KeyValue
if s := getSiteID(); s != "" {
out = append(out, attribute.String("site_id", s))
}
if r := getRegion(); r != "" {
out = append(out, attribute.String("region", r))
}
return out
}
// SiteLabelKVs exposes site label KVs for other packages (e.g., proxy manager).
func SiteLabelKVs() []attribute.KeyValue {
if !ShouldIncludeSiteLabels() {
return nil
}
return siteAttrs()
}
// ShouldIncludeTunnelID returns whether tunnel_id labels should be emitted.
func ShouldIncludeTunnelID() bool {
if v, ok := includeTunnelIDVal.Load().(bool); ok {
return v
}
return true
}
// ShouldIncludeSiteLabels returns whether site_id/region should be emitted as
// metric labels in addition to resource attributes.
func ShouldIncludeSiteLabels() bool {
if v, ok := includeSiteLabelVal.Load().(bool); ok {
return v
}
return false
}
func getenv(k, d string) string {
if v := os.Getenv(k); v != "" {
return v
}
return d
}
func getdur(k string, d time.Duration) time.Duration {
if v := os.Getenv(k); v != "" {
if p, e := time.ParseDuration(v); e == nil {
return p
}
}
return d
}

View File

@@ -0,0 +1,53 @@
package telemetry
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
"go.opentelemetry.io/otel/attribute"
)
// Test that disallowed attributes are filtered from the exposition.
func TestAttributeFilterDropsUnknownKeys(t *testing.T) {
ctx := context.Background()
resetMetricsForTest()
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0"}
tel, err := Init(ctx, cfg)
if err != nil {
t.Fatalf("init: %v", err)
}
defer func() { _ = tel.Shutdown(context.Background()) }()
if tel.PrometheusHandler == nil {
t.Fatalf("prom handler nil")
}
ts := httptest.NewServer(tel.PrometheusHandler)
defer ts.Close()
// Add samples with disallowed attribute keys
for _, k := range []string{"forbidden", "site_id", "host"} {
set := attribute.NewSet(attribute.String(k, "x"))
AddTunnelBytesSet(ctx, 123, set)
}
time.Sleep(50 * time.Millisecond)
resp, err := http.Get(ts.URL)
if err != nil {
t.Fatalf("GET: %v", err)
}
defer resp.Body.Close()
b, _ := io.ReadAll(resp.Body)
body := string(b)
if strings.Contains(body, "forbidden=") {
t.Fatalf("unexpected forbidden attribute leaked into metrics: %s", body)
}
if !strings.Contains(body, "site_id=\"x\"") {
t.Fatalf("expected allowed attribute site_id to be present in metrics, got: %s", body)
}
}

View File

@@ -0,0 +1,76 @@
package telemetry
import (
"bufio"
"context"
"io"
"net/http"
"net/http/httptest"
"os"
"path/filepath"
"strings"
"testing"
"time"
)
// Golden test that /metrics contains expected metric names.
func TestMetricsGoldenContains(t *testing.T) {
ctx := context.Background()
resetMetricsForTest()
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0", BuildVersion: "test"}
tel, err := Init(ctx, cfg)
if err != nil {
t.Fatalf("telemetry init error: %v", err)
}
defer func() { _ = tel.Shutdown(context.Background()) }()
if tel.PrometheusHandler == nil {
t.Fatalf("prom handler nil")
}
ts := httptest.NewServer(tel.PrometheusHandler)
defer ts.Close()
// Trigger counters to ensure they appear in the scrape
IncConnAttempt(ctx, "websocket", "success")
IncWSReconnect(ctx, "io_error")
IncProxyConnectionEvent(ctx, "", "tcp", ProxyConnectionOpened)
if tel.MeterProvider != nil {
_ = tel.MeterProvider.ForceFlush(ctx)
}
time.Sleep(100 * time.Millisecond)
var body string
for i := 0; i < 5; i++ {
resp, err := http.Get(ts.URL)
if err != nil {
t.Fatalf("GET metrics failed: %v", err)
}
b, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
body = string(b)
if strings.Contains(body, "newt_connection_attempts_total") {
break
}
time.Sleep(100 * time.Millisecond)
}
f, err := os.Open(filepath.Join("testdata", "expected_contains.golden"))
if err != nil {
t.Fatalf("read golden: %v", err)
}
defer f.Close()
s := bufio.NewScanner(f)
for s.Scan() {
needle := strings.TrimSpace(s.Text())
if needle == "" {
continue
}
if !strings.Contains(body, needle) {
t.Fatalf("expected metrics body to contain %q. body=\n%s", needle, body)
}
}
if err := s.Err(); err != nil {
t.Fatalf("scan golden: %v", err)
}
}

View File

@@ -0,0 +1,65 @@
package telemetry
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"time"
)
// Smoke test that /metrics contains at least one newt_* metric when Prom exporter is enabled.
func TestMetricsSmoke(t *testing.T) {
ctx := context.Background()
resetMetricsForTest()
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
cfg := Config{
ServiceName: "newt",
PromEnabled: true,
OTLPEnabled: false,
AdminAddr: "127.0.0.1:0",
BuildVersion: "test",
BuildCommit: "deadbeef",
MetricExportInterval: 5 * time.Second,
}
tel, err := Init(ctx, cfg)
if err != nil {
t.Fatalf("telemetry init error: %v", err)
}
defer func() { _ = tel.Shutdown(context.Background()) }()
// Serve the Prom handler on a test server
if tel.PrometheusHandler == nil {
t.Fatalf("Prometheus handler nil; PromEnabled should enable it")
}
ts := httptest.NewServer(tel.PrometheusHandler)
defer ts.Close()
// Record a simple metric and then fetch /metrics
IncConnAttempt(ctx, "websocket", "success")
if tel.MeterProvider != nil {
_ = tel.MeterProvider.ForceFlush(ctx)
}
// Give the exporter a tick to collect
time.Sleep(100 * time.Millisecond)
var body string
for i := 0; i < 5; i++ {
resp, err := http.Get(ts.URL)
if err != nil {
t.Fatalf("GET /metrics failed: %v", err)
}
b, _ := io.ReadAll(resp.Body)
_ = resp.Body.Close()
body = string(b)
if strings.Contains(body, "newt_connection_attempts_total") {
break
}
time.Sleep(100 * time.Millisecond)
}
if !strings.Contains(body, "newt_connection_attempts_total") {
t.Fatalf("expected newt_connection_attempts_total in metrics, got:\n%s", body)
}
}

206
main.go
View File

@@ -1,7 +1,9 @@
package main
import (
"context"
"encoding/json"
"errors"
"flag"
"fmt"
"net"
@@ -22,6 +24,9 @@ import (
"github.com/fosrl/newt/updates"
"github.com/fosrl/newt/websocket"
"github.com/fosrl/newt/internal/state"
"github.com/fosrl/newt/internal/telemetry"
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
"golang.zx2c4.com/wireguard/conn"
"golang.zx2c4.com/wireguard/device"
"golang.zx2c4.com/wireguard/tun"
@@ -91,6 +96,14 @@ func (s *stringSlice) Set(value string) error {
return nil
}
const (
fmtErrMarshaling = "Error marshaling data: %v"
fmtReceivedMsg = "Received: %+v"
topicWGRegister = "newt/wg/register"
msgNoTunnelOrProxy = "No tunnel IP or proxy manager available"
fmtErrParsingTargetData = "Error parsing target data: %v"
)
var (
endpoint string
id string
@@ -120,8 +133,17 @@ var (
preferEndpoint string
healthMonitor *healthcheck.Monitor
enforceHealthcheckCert bool
blueprintFile string
noCloud bool
// Build/version (can be overridden via -ldflags "-X main.newtVersion=...")
newtVersion = "version_replaceme"
// Observability/metrics flags
metricsEnabled bool
otlpEnabled bool
adminAddr string
region string
metricsAsyncBytes bool
blueprintFile string
noCloud bool
// New mTLS configuration variables
tlsClientCert string
@@ -133,6 +155,10 @@ var (
)
func main() {
// Prepare context for graceful shutdown and signal handling
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
defer stop()
// if PANGOLIN_ENDPOINT, NEWT_ID, and NEWT_SECRET are set as environment variables, they will be used as default values
endpoint = os.Getenv("PANGOLIN_ENDPOINT")
id = os.Getenv("NEWT_ID")
@@ -143,6 +169,14 @@ func main() {
updownScript = os.Getenv("UPDOWN_SCRIPT")
interfaceName = os.Getenv("INTERFACE")
generateAndSaveKeyTo = os.Getenv("GENERATE_AND_SAVE_KEY_TO")
// Metrics/observability env mirrors
metricsEnabledEnv := os.Getenv("NEWT_METRICS_PROMETHEUS_ENABLED")
otlpEnabledEnv := os.Getenv("NEWT_METRICS_OTLP_ENABLED")
adminAddrEnv := os.Getenv("NEWT_ADMIN_ADDR")
regionEnv := os.Getenv("NEWT_REGION")
asyncBytesEnv := os.Getenv("NEWT_METRICS_ASYNC_BYTES")
keepInterfaceEnv := os.Getenv("KEEP_INTERFACE")
keepInterface = keepInterfaceEnv == "true"
acceptClientsEnv := os.Getenv("ACCEPT_CLIENTS")
@@ -286,6 +320,43 @@ func main() {
flag.BoolVar(&noCloud, "no-cloud", false, "Disable cloud failover")
}
// Metrics/observability flags (mirror ENV if unset)
if metricsEnabledEnv == "" {
flag.BoolVar(&metricsEnabled, "metrics", true, "Enable Prometheus /metrics exporter")
} else {
if v, err := strconv.ParseBool(metricsEnabledEnv); err == nil {
metricsEnabled = v
} else {
metricsEnabled = true
}
}
if otlpEnabledEnv == "" {
flag.BoolVar(&otlpEnabled, "otlp", false, "Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT")
} else {
if v, err := strconv.ParseBool(otlpEnabledEnv); err == nil {
otlpEnabled = v
}
}
if adminAddrEnv == "" {
flag.StringVar(&adminAddr, "metrics-admin-addr", "127.0.0.1:2112", "Admin/metrics bind address")
} else {
adminAddr = adminAddrEnv
}
// Async bytes toggle
if asyncBytesEnv == "" {
flag.BoolVar(&metricsAsyncBytes, "metrics-async-bytes", false, "Enable async bytes counting (background flush; lower hot path overhead)")
} else {
if v, err := strconv.ParseBool(asyncBytesEnv); err == nil {
metricsAsyncBytes = v
}
}
// Optional region flag (resource attribute)
if regionEnv == "" {
flag.StringVar(&region, "region", "", "Optional region resource attribute (also NEWT_REGION)")
} else {
region = regionEnv
}
// do a --version check
version := flag.Bool("version", false, "Print the version")
@@ -300,12 +371,57 @@ func main() {
loggerLevel := parseLogLevel(logLevel)
logger.GetLogger().SetLevel(parseLogLevel(logLevel))
newtVersion := "version_replaceme"
// Initialize telemetry after flags are parsed (so flags override env)
tcfg := telemetry.FromEnv()
tcfg.PromEnabled = metricsEnabled
tcfg.OTLPEnabled = otlpEnabled
if adminAddr != "" {
tcfg.AdminAddr = adminAddr
}
// Resource attributes (if available)
tcfg.SiteID = id
tcfg.Region = region
// Build info
tcfg.BuildVersion = newtVersion
tcfg.BuildCommit = os.Getenv("NEWT_COMMIT")
tel, telErr := telemetry.Init(ctx, tcfg)
if telErr != nil {
logger.Warn("Telemetry init failed: %v", telErr)
}
if tel != nil {
// Admin HTTP server (exposes /metrics when Prometheus exporter is enabled)
mux := http.NewServeMux()
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(200) })
if tel.PrometheusHandler != nil {
mux.Handle("/metrics", tel.PrometheusHandler)
}
admin := &http.Server{
Addr: tcfg.AdminAddr,
Handler: otelhttp.NewHandler(mux, "newt-admin"),
ReadTimeout: 5 * time.Second,
WriteTimeout: 10 * time.Second,
ReadHeaderTimeout: 5 * time.Second,
IdleTimeout: 30 * time.Second,
}
go func() {
if err := admin.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
logger.Warn("admin http error: %v", err)
}
}()
defer func() {
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
defer cancel()
_ = admin.Shutdown(ctx)
}()
defer func() { _ = tel.Shutdown(context.Background()) }()
}
if *version {
fmt.Println("Newt version " + newtVersion)
os.Exit(0)
} else {
logger.Info("Newt version " + newtVersion)
logger.Info("Newt version %s", newtVersion)
}
if err := updates.CheckForUpdate("fosrl", "newt", newtVersion); err != nil {
@@ -376,6 +492,8 @@ func main() {
}
endpoint = client.GetConfig().Endpoint // Update endpoint from config
id = client.GetConfig().ID // Update ID from config
// Update site labels for metrics with the resolved ID
telemetry.UpdateSiteInfo(id, region)
// output env var values if set
logger.Debug("Endpoint: %v", endpoint)
@@ -484,6 +602,10 @@ func main() {
// Register handlers for different message types
client.RegisterHandler("newt/wg/connect", func(msg websocket.WSMessage) {
logger.Debug("Received registration message")
regResult := "success"
defer func() {
telemetry.IncSiteRegistration(ctx, regResult)
}()
if stopFunc != nil {
stopFunc() // stop the ws from sending more requests
stopFunc = nil // reset stopFunc to nil to avoid double stopping
@@ -502,22 +624,25 @@ func main() {
jsonData, err := json.Marshal(msg.Data)
if err != nil {
logger.Info("Error marshaling data: %v", err)
logger.Info(fmtErrMarshaling, err)
regResult = "failure"
return
}
if err := json.Unmarshal(jsonData, &wgData); err != nil {
logger.Info("Error unmarshaling target data: %v", err)
regResult = "failure"
return
}
logger.Debug("Received: %+v", msg)
logger.Debug(fmtReceivedMsg, msg)
tun, tnet, err = netstack.CreateNetTUN(
[]netip.Addr{netip.MustParseAddr(wgData.TunnelIP)},
[]netip.Addr{netip.MustParseAddr(dns)},
mtuInt)
if err != nil {
logger.Error("Failed to create TUN device: %v", err)
regResult = "failure"
}
setDownstreamTNetstack(tnet)
@@ -531,6 +656,7 @@ func main() {
host, _, err := net.SplitHostPort(wgData.Endpoint)
if err != nil {
logger.Error("Failed to split endpoint: %v", err)
regResult = "failure"
return
}
@@ -539,6 +665,7 @@ func main() {
endpoint, err := resolveDomain(wgData.Endpoint)
if err != nil {
logger.Error("Failed to resolve endpoint: %v", err)
regResult = "failure"
return
}
@@ -554,12 +681,14 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
err = dev.IpcSet(config)
if err != nil {
logger.Error("Failed to configure WireGuard device: %v", err)
regResult = "failure"
}
// Bring up the device
err = dev.Up()
if err != nil {
logger.Error("Failed to bring up WireGuard device: %v", err)
regResult = "failure"
}
logger.Debug("WireGuard device created. Lets ping the server now...")
@@ -572,9 +701,13 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}
// Use reliable ping for initial connection test
logger.Debug("Testing initial connection with reliable ping...")
_, err = reliablePing(tnet, wgData.ServerIP, pingTimeout, 5)
lat, err := reliablePing(tnet, wgData.ServerIP, pingTimeout, 5)
if err == nil && wgData.PublicKey != "" {
telemetry.ObserveTunnelLatency(ctx, wgData.PublicKey, "wireguard", lat.Seconds())
}
if err != nil {
logger.Warn("Initial reliable ping failed, but continuing: %v", err)
regResult = "failure"
} else {
logger.Debug("Initial connection test successful")
}
@@ -585,11 +718,14 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
// as the pings will continue in the background
if !connected {
logger.Debug("Starting ping check")
pingStopChan = startPingCheck(tnet, wgData.ServerIP, client)
pingStopChan = startPingCheck(tnet, wgData.ServerIP, client, wgData.PublicKey)
}
// Create proxy manager
pm = proxy.NewProxyManager(tnet)
pm.SetAsyncBytes(metricsAsyncBytes)
// Set tunnel_id for metrics (WireGuard peer public key)
pm.SetTunnelID(wgData.PublicKey)
connected = true
@@ -626,10 +762,19 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
client.RegisterHandler("newt/wg/reconnect", func(msg websocket.WSMessage) {
logger.Info("Received reconnect message")
if wgData.PublicKey != "" {
telemetry.IncReconnect(ctx, wgData.PublicKey, "server", telemetry.ReasonServerRequest)
}
// Close the WireGuard device and TUN
closeWgTunnel()
// Clear metrics attrs and sessions for the tunnel
if pm != nil {
pm.ClearTunnelID()
state.Global().ClearTunnel(wgData.PublicKey)
}
// Mark as disconnected
connected = false
@@ -648,6 +793,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
client.RegisterHandler("newt/wg/terminate", func(msg websocket.WSMessage) {
logger.Info("Received termination message")
if wgData.PublicKey != "" {
telemetry.IncReconnect(ctx, wgData.PublicKey, "server", telemetry.ReasonServerRequest)
}
// Close the WireGuard device and TUN
closeWgTunnel()
@@ -675,7 +823,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
jsonData, err := json.Marshal(msg.Data)
if err != nil {
logger.Info("Error marshaling data: %v", err)
logger.Info(fmtErrMarshaling, err)
return
}
if err := json.Unmarshal(jsonData, &exitNodeData); err != nil {
@@ -716,7 +864,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
},
}
stopFunc = client.SendMessageInterval("newt/wg/register", map[string]interface{}{
stopFunc = client.SendMessageInterval(topicWGRegister, map[string]interface{}{
"publicKey": publicKey.String(),
"pingResults": pingResults,
"newtVersion": newtVersion,
@@ -819,7 +967,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}
// Send the ping results to the cloud for selection
stopFunc = client.SendMessageInterval("newt/wg/register", map[string]interface{}{
stopFunc = client.SendMessageInterval(topicWGRegister, map[string]interface{}{
"publicKey": publicKey.String(),
"pingResults": pingResults,
"newtVersion": newtVersion,
@@ -829,17 +977,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
})
client.RegisterHandler("newt/tcp/add", func(msg websocket.WSMessage) {
logger.Debug("Received: %+v", msg)
logger.Debug(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available")
logger.Info(msgNoTunnelOrProxy)
return
}
targetData, err := parseTargetData(msg.Data)
if err != nil {
logger.Info("Error parsing target data: %v", err)
logger.Info(fmtErrParsingTargetData, err)
return
}
@@ -854,17 +1002,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
})
client.RegisterHandler("newt/udp/add", func(msg websocket.WSMessage) {
logger.Info("Received: %+v", msg)
logger.Info(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available")
logger.Info(msgNoTunnelOrProxy)
return
}
targetData, err := parseTargetData(msg.Data)
if err != nil {
logger.Info("Error parsing target data: %v", err)
logger.Info(fmtErrParsingTargetData, err)
return
}
@@ -879,17 +1027,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
})
client.RegisterHandler("newt/udp/remove", func(msg websocket.WSMessage) {
logger.Info("Received: %+v", msg)
logger.Info(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available")
logger.Info(msgNoTunnelOrProxy)
return
}
targetData, err := parseTargetData(msg.Data)
if err != nil {
logger.Info("Error parsing target data: %v", err)
logger.Info(fmtErrParsingTargetData, err)
return
}
@@ -904,17 +1052,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
})
client.RegisterHandler("newt/tcp/remove", func(msg websocket.WSMessage) {
logger.Info("Received: %+v", msg)
logger.Info(fmtReceivedMsg, msg)
// if there is no wgData or pm, we can't add targets
if wgData.TunnelIP == "" || pm == nil {
logger.Info("No tunnel IP or proxy manager available")
logger.Info(msgNoTunnelOrProxy)
return
}
targetData, err := parseTargetData(msg.Data)
if err != nil {
logger.Info("Error parsing target data: %v", err)
logger.Info(fmtErrParsingTargetData, err)
return
}
@@ -998,7 +1146,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
jsonData, err := json.Marshal(msg.Data)
if err != nil {
logger.Info("Error marshaling data: %v", err)
logger.Info(fmtErrMarshaling, err)
return
}
if err := json.Unmarshal(jsonData, &sshPublicKeyData); err != nil {
@@ -1155,9 +1303,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}
if err := healthMonitor.EnableTarget(requestData.ID); err != nil {
logger.Error("Failed to enable health check target %s: %v", requestData.ID, err)
logger.Error("Failed to enable health check target %d: %v", requestData.ID, err)
} else {
logger.Info("Enabled health check target: %s", requestData.ID)
logger.Info("Enabled health check target: %d", requestData.ID)
}
})
@@ -1180,9 +1328,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}
if err := healthMonitor.DisableTarget(requestData.ID); err != nil {
logger.Error("Failed to disable health check target %s: %v", requestData.ID, err)
logger.Error("Failed to disable health check target %d: %v", requestData.ID, err)
} else {
logger.Info("Disabled health check target: %s", requestData.ID)
logger.Info("Disabled health check target: %d", requestData.ID)
}
})
@@ -1252,7 +1400,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
}
// Send registration message to the server for backward compatibility
err := client.SendMessage("newt/wg/register", map[string]interface{}{
err := client.SendMessage(topicWGRegister, map[string]interface{}{
"publicKey": publicKey.String(),
"newtVersion": newtVersion,
"backwardsCompatible": true,

View File

@@ -1,18 +1,28 @@
package proxy
import (
"context"
"errors"
"fmt"
"io"
"net"
"os"
"strings"
"sync"
"sync/atomic"
"time"
"github.com/fosrl/newt/internal/state"
"github.com/fosrl/newt/internal/telemetry"
"github.com/fosrl/newt/logger"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
"golang.zx2c4.com/wireguard/tun/netstack"
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
)
const errUnsupportedProtoFmt = "unsupported protocol: %s"
// Target represents a proxy target with its address and port
type Target struct {
Address string
@@ -28,6 +38,90 @@ type ProxyManager struct {
udpConns []*gonet.UDPConn
running bool
mutex sync.RWMutex
// telemetry (multi-tunnel)
currentTunnelID string
tunnels map[string]*tunnelEntry
asyncBytes bool
flushStop chan struct{}
}
// tunnelEntry holds per-tunnel attributes and (optional) async counters.
type tunnelEntry struct {
attrInTCP attribute.Set
attrOutTCP attribute.Set
attrInUDP attribute.Set
attrOutUDP attribute.Set
bytesInTCP atomic.Uint64
bytesOutTCP atomic.Uint64
bytesInUDP atomic.Uint64
bytesOutUDP atomic.Uint64
activeTCP atomic.Int64
activeUDP atomic.Int64
}
// countingWriter wraps an io.Writer and adds bytes to OTel counter using a pre-built attribute set.
type countingWriter struct {
ctx context.Context
w io.Writer
set attribute.Set
pm *ProxyManager
ent *tunnelEntry
out bool // false=in, true=out
proto string // "tcp" or "udp"
}
func (cw *countingWriter) Write(p []byte) (int, error) {
n, err := cw.w.Write(p)
if n > 0 {
if cw.pm != nil && cw.pm.asyncBytes && cw.ent != nil {
switch cw.proto {
case "tcp":
if cw.out {
cw.ent.bytesOutTCP.Add(uint64(n))
} else {
cw.ent.bytesInTCP.Add(uint64(n))
}
case "udp":
if cw.out {
cw.ent.bytesOutUDP.Add(uint64(n))
} else {
cw.ent.bytesInUDP.Add(uint64(n))
}
}
} else {
telemetry.AddTunnelBytesSet(cw.ctx, int64(n), cw.set)
}
}
return n, err
}
func classifyProxyError(err error) string {
if err == nil {
return ""
}
if errors.Is(err, net.ErrClosed) {
return "closed"
}
if ne, ok := err.(net.Error); ok {
if ne.Timeout() {
return "timeout"
}
if ne.Temporary() {
return "temporary"
}
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, "refused"):
return "refused"
case strings.Contains(msg, "reset"):
return "reset"
default:
return "io_error"
}
}
// NewProxyManager creates a new proxy manager instance
@@ -38,9 +132,77 @@ func NewProxyManager(tnet *netstack.Net) *ProxyManager {
udpTargets: make(map[string]map[int]string),
listeners: make([]*gonet.TCPListener, 0),
udpConns: make([]*gonet.UDPConn, 0),
tunnels: make(map[string]*tunnelEntry),
}
}
// SetTunnelID sets the WireGuard peer public key used as tunnel_id label.
func (pm *ProxyManager) SetTunnelID(id string) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
pm.currentTunnelID = id
if _, ok := pm.tunnels[id]; !ok {
pm.tunnels[id] = &tunnelEntry{}
}
e := pm.tunnels[id]
// include site labels if available
site := telemetry.SiteLabelKVs()
build := func(base []attribute.KeyValue) attribute.Set {
if telemetry.ShouldIncludeTunnelID() {
base = append([]attribute.KeyValue{attribute.String("tunnel_id", id)}, base...)
}
base = append(site, base...)
return attribute.NewSet(base...)
}
e.attrInTCP = build([]attribute.KeyValue{
attribute.String("direction", "ingress"),
attribute.String("protocol", "tcp"),
})
e.attrOutTCP = build([]attribute.KeyValue{
attribute.String("direction", "egress"),
attribute.String("protocol", "tcp"),
})
e.attrInUDP = build([]attribute.KeyValue{
attribute.String("direction", "ingress"),
attribute.String("protocol", "udp"),
})
e.attrOutUDP = build([]attribute.KeyValue{
attribute.String("direction", "egress"),
attribute.String("protocol", "udp"),
})
}
// ClearTunnelID clears cached attribute sets for the current tunnel.
func (pm *ProxyManager) ClearTunnelID() {
pm.mutex.Lock()
defer pm.mutex.Unlock()
id := pm.currentTunnelID
if id == "" {
return
}
if e, ok := pm.tunnels[id]; ok {
// final flush for this tunnel
inTCP := e.bytesInTCP.Swap(0)
outTCP := e.bytesOutTCP.Swap(0)
inUDP := e.bytesInUDP.Swap(0)
outUDP := e.bytesOutUDP.Swap(0)
if inTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
}
if outTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
}
if inUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
}
if outUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
}
delete(pm.tunnels, id)
}
pm.currentTunnelID = ""
}
// init function without tnet
func NewProxyManagerWithoutTNet() *ProxyManager {
return &ProxyManager{
@@ -75,7 +237,7 @@ func (pm *ProxyManager) AddTarget(proto, listenIP string, port int, targetAddr s
}
pm.udpTargets[listenIP][port] = targetAddr
default:
return fmt.Errorf("unsupported protocol: %s", proto)
return fmt.Errorf(errUnsupportedProtoFmt, proto)
}
if pm.running {
@@ -124,13 +286,28 @@ func (pm *ProxyManager) RemoveTarget(proto, listenIP string, port int) error {
return fmt.Errorf("target not found: %s:%d", listenIP, port)
}
default:
return fmt.Errorf("unsupported protocol: %s", proto)
return fmt.Errorf(errUnsupportedProtoFmt, proto)
}
return nil
}
// Start begins listening for all configured proxy targets
func (pm *ProxyManager) Start() error {
// Register proxy observables once per process
telemetry.SetProxyObservableCallback(func(ctx context.Context, o metric.Observer) error {
pm.mutex.RLock()
defer pm.mutex.RUnlock()
for _, e := range pm.tunnels {
// active connections
telemetry.ObserveProxyActiveConnsObs(o, e.activeTCP.Load(), e.attrOutTCP.ToSlice())
telemetry.ObserveProxyActiveConnsObs(o, e.activeUDP.Load(), e.attrOutUDP.ToSlice())
// backlog bytes (sum of unflushed counters)
b := int64(e.bytesInTCP.Load() + e.bytesOutTCP.Load() + e.bytesInUDP.Load() + e.bytesOutUDP.Load())
telemetry.ObserveProxyAsyncBacklogObs(o, b, e.attrOutTCP.ToSlice())
telemetry.ObserveProxyBufferBytesObs(o, b, e.attrOutTCP.ToSlice())
}
return nil
})
pm.mutex.Lock()
defer pm.mutex.Unlock()
@@ -160,6 +337,75 @@ func (pm *ProxyManager) Start() error {
return nil
}
func (pm *ProxyManager) SetAsyncBytes(b bool) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
pm.asyncBytes = b
if b && pm.flushStop == nil {
pm.flushStop = make(chan struct{})
go pm.flushLoop()
}
}
func (pm *ProxyManager) flushLoop() {
flushInterval := 2 * time.Second
if v := os.Getenv("OTEL_METRIC_EXPORT_INTERVAL"); v != "" {
if d, err := time.ParseDuration(v); err == nil && d > 0 {
if d/2 < flushInterval {
flushInterval = d / 2
}
}
}
ticker := time.NewTicker(flushInterval)
defer ticker.Stop()
for {
select {
case <-ticker.C:
pm.mutex.RLock()
for _, e := range pm.tunnels {
inTCP := e.bytesInTCP.Swap(0)
outTCP := e.bytesOutTCP.Swap(0)
inUDP := e.bytesInUDP.Swap(0)
outUDP := e.bytesOutUDP.Swap(0)
if inTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
}
if outTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
}
if inUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
}
if outUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
}
}
pm.mutex.RUnlock()
case <-pm.flushStop:
pm.mutex.RLock()
for _, e := range pm.tunnels {
inTCP := e.bytesInTCP.Swap(0)
outTCP := e.bytesOutTCP.Swap(0)
inUDP := e.bytesInUDP.Swap(0)
outUDP := e.bytesOutUDP.Swap(0)
if inTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
}
if outTCP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
}
if inUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
}
if outUDP > 0 {
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
}
}
pm.mutex.RUnlock()
return
}
}
}
func (pm *ProxyManager) Stop() error {
pm.mutex.Lock()
defer pm.mutex.Unlock()
@@ -227,7 +473,7 @@ func (pm *ProxyManager) startTarget(proto, listenIP string, port int, targetAddr
go pm.handleUDPProxy(conn, targetAddr)
default:
return fmt.Errorf("unsupported protocol: %s", proto)
return fmt.Errorf(errUnsupportedProtoFmt, proto)
}
logger.Info("Started %s proxy to %s", proto, targetAddr)
@@ -236,54 +482,84 @@ func (pm *ProxyManager) startTarget(proto, listenIP string, port int, targetAddr
return nil
}
// getEntry returns per-tunnel entry or nil.
func (pm *ProxyManager) getEntry(id string) *tunnelEntry {
pm.mutex.RLock()
e := pm.tunnels[id]
pm.mutex.RUnlock()
return e
}
func (pm *ProxyManager) handleTCPProxy(listener net.Listener, targetAddr string) {
for {
conn, err := listener.Accept()
if err != nil {
// Check if we're shutting down or the listener was closed
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "tcp", "failure", classifyProxyError(err))
if !pm.running {
return
}
// Check for specific network errors that indicate the listener is closed
if ne, ok := err.(net.Error); ok && !ne.Temporary() {
logger.Info("TCP listener closed, stopping proxy handler for %v", listener.Addr())
return
}
logger.Error("Error accepting TCP connection: %v", err)
// Don't hammer the CPU if we hit a temporary error
time.Sleep(100 * time.Millisecond)
continue
}
go func() {
tunnelID := pm.currentTunnelID
telemetry.IncProxyAccept(context.Background(), tunnelID, "tcp", "success", "")
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionOpened)
if tunnelID != "" {
state.Global().IncSessions(tunnelID)
if e := pm.getEntry(tunnelID); e != nil {
e.activeTCP.Add(1)
}
}
go func(tunnelID string, accepted net.Conn) {
connStart := time.Now()
target, err := net.Dial("tcp", targetAddr)
if err != nil {
logger.Error("Error connecting to target: %v", err)
conn.Close()
accepted.Close()
telemetry.IncProxyAccept(context.Background(), tunnelID, "tcp", "failure", classifyProxyError(err))
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionClosed)
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "tcp", "failure", time.Since(connStart).Seconds())
return
}
// Create a WaitGroup to ensure both copy operations complete
entry := pm.getEntry(tunnelID)
if entry == nil {
entry = &tunnelEntry{}
}
var wg sync.WaitGroup
wg.Add(2)
go func() {
go func(ent *tunnelEntry) {
defer wg.Done()
io.Copy(target, conn)
target.Close()
}()
cw := &countingWriter{ctx: context.Background(), w: target, set: ent.attrInTCP, pm: pm, ent: ent, out: false, proto: "tcp"}
_, _ = io.Copy(cw, accepted)
_ = target.Close()
}(entry)
go func() {
go func(ent *tunnelEntry) {
defer wg.Done()
io.Copy(conn, target)
conn.Close()
}()
cw := &countingWriter{ctx: context.Background(), w: accepted, set: ent.attrOutTCP, pm: pm, ent: ent, out: true, proto: "tcp"}
_, _ = io.Copy(cw, target)
_ = accepted.Close()
}(entry)
// Wait for both copies to complete
wg.Wait()
}()
if tunnelID != "" {
state.Global().DecSessions(tunnelID)
if e := pm.getEntry(tunnelID); e != nil {
e.activeTCP.Add(-1)
}
}
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "tcp", "success", time.Since(connStart).Seconds())
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionClosed)
}(tunnelID, conn)
}
}
@@ -326,6 +602,18 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
}
clientKey := remoteAddr.String()
// bytes from client -> target (direction=in)
if pm.currentTunnelID != "" && n > 0 {
if pm.asyncBytes {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
e.bytesInUDP.Add(uint64(n))
}
} else {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
telemetry.AddTunnelBytesSet(context.Background(), int64(n), e.attrInUDP)
}
}
}
clientsMutex.RLock()
targetConn, exists := clientConns[clientKey]
clientsMutex.RUnlock()
@@ -334,28 +622,44 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
targetUDPAddr, err := net.ResolveUDPAddr("udp", targetAddr)
if err != nil {
logger.Error("Error resolving target address: %v", err)
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "udp", "failure", "resolve")
continue
}
targetConn, err = net.DialUDP("udp", nil, targetUDPAddr)
if err != nil {
logger.Error("Error connecting to target: %v", err)
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "udp", "failure", classifyProxyError(err))
continue
}
tunnelID := pm.currentTunnelID
telemetry.IncProxyAccept(context.Background(), tunnelID, "udp", "success", "")
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "udp", telemetry.ProxyConnectionOpened)
// Only increment activeUDP after a successful DialUDP
if e := pm.getEntry(tunnelID); e != nil {
e.activeUDP.Add(1)
}
clientsMutex.Lock()
clientConns[clientKey] = targetConn
clientsMutex.Unlock()
go func(clientKey string, targetConn *net.UDPConn, remoteAddr net.Addr) {
go func(clientKey string, targetConn *net.UDPConn, remoteAddr net.Addr, tunnelID string) {
start := time.Now()
result := "success"
defer func() {
// Always clean up when this goroutine exits
clientsMutex.Lock()
if storedConn, exists := clientConns[clientKey]; exists && storedConn == targetConn {
delete(clientConns, clientKey)
targetConn.Close()
if e := pm.getEntry(tunnelID); e != nil {
e.activeUDP.Add(-1)
}
}
clientsMutex.Unlock()
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "udp", result, time.Since(start).Seconds())
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "udp", telemetry.ProxyConnectionClosed)
}()
buffer := make([]byte, 65507)
@@ -363,25 +667,52 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
n, _, err := targetConn.ReadFromUDP(buffer)
if err != nil {
logger.Error("Error reading from target: %v", err)
result = "failure"
return // defer will handle cleanup
}
// bytes from target -> client (direction=out)
if pm.currentTunnelID != "" && n > 0 {
if pm.asyncBytes {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
e.bytesOutUDP.Add(uint64(n))
}
} else {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
telemetry.AddTunnelBytesSet(context.Background(), int64(n), e.attrOutUDP)
}
}
}
_, err = conn.WriteTo(buffer[:n], remoteAddr)
if err != nil {
logger.Error("Error writing to client: %v", err)
telemetry.IncProxyDrops(context.Background(), pm.currentTunnelID, "udp")
result = "failure"
return // defer will handle cleanup
}
}
}(clientKey, targetConn, remoteAddr)
}(clientKey, targetConn, remoteAddr, tunnelID)
}
_, err = targetConn.Write(buffer[:n])
written, err := targetConn.Write(buffer[:n])
if err != nil {
logger.Error("Error writing to target: %v", err)
telemetry.IncProxyDrops(context.Background(), pm.currentTunnelID, "udp")
targetConn.Close()
clientsMutex.Lock()
delete(clientConns, clientKey)
clientsMutex.Unlock()
} else if pm.currentTunnelID != "" && written > 0 {
if pm.asyncBytes {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
e.bytesInUDP.Add(uint64(written))
}
} else {
if e := pm.getEntry(pm.currentTunnelID); e != nil {
telemetry.AddTunnelBytesSet(context.Background(), int64(written), e.attrInUDP)
}
}
}
}
}

12
stub.go
View File

@@ -8,25 +8,27 @@ import (
)
func setupClientsNative(client *websocket.Client, host string) {
return // This function is not implemented for non-Linux systems.
_ = client
_ = host
// No-op for non-Linux systems
}
func closeWgServiceNative() {
// No-op for non-Linux systems
return
}
func clientsOnConnectNative() {
// No-op for non-Linux systems
return
}
func clientsHandleNewtConnectionNative(publicKey, endpoint string) {
_ = publicKey
_ = endpoint
// No-op for non-Linux systems
return
}
func clientsAddProxyTargetNative(pm *proxy.ProxyManager, tunnelIp string) {
_ = pm
_ = tunnelIp
// No-op for non-Linux systems
return
}

28
util.go
View File

@@ -2,6 +2,7 @@ package main
import (
"bytes"
"context"
"encoding/base64"
"encoding/hex"
"encoding/json"
@@ -14,6 +15,7 @@ import (
"math/rand"
"github.com/fosrl/newt/internal/telemetry"
"github.com/fosrl/newt/logger"
"github.com/fosrl/newt/proxy"
"github.com/fosrl/newt/websocket"
@@ -24,6 +26,8 @@ import (
"gopkg.in/yaml.v3"
)
const msgHealthFileWriteFailed = "Failed to write health file: %v"
func fixKey(key string) string {
// Remove any whitespace
key = strings.TrimSpace(key)
@@ -176,7 +180,7 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
if healthFile != "" {
err := os.WriteFile(healthFile, []byte("ok"), 0644)
if err != nil {
logger.Warn("Failed to write health file: %v", err)
logger.Warn(msgHealthFileWriteFailed, err)
}
}
return stopChan, nil
@@ -217,11 +221,13 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
if healthFile != "" {
err := os.WriteFile(healthFile, []byte("ok"), 0644)
if err != nil {
logger.Warn("Failed to write health file: %v", err)
logger.Warn(msgHealthFileWriteFailed, err)
}
}
return
}
case <-pingStopChan:
// Stop the goroutine when signaled
return
}
}
}()
@@ -230,7 +236,7 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
return stopChan, fmt.Errorf("initial ping attempts failed, continuing in background")
}
func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Client) chan struct{} {
func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Client, tunnelID string) chan struct{} {
maxInterval := 6 * time.Second
currentInterval := pingInterval
consecutiveFailures := 0
@@ -293,6 +299,9 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
if !connectionLost {
connectionLost = true
logger.Warn("Connection to server lost after %d failures. Continuous reconnection attempts will be made.", consecutiveFailures)
if tunnelID != "" {
telemetry.IncReconnect(context.Background(), tunnelID, "client", telemetry.ReasonTimeout)
}
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second)
// Send registration message to the server for backward compatibility
err := client.SendMessage("newt/wg/register", map[string]interface{}{
@@ -319,6 +328,10 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
} else {
// Track recent latencies
recentLatencies = append(recentLatencies, latency)
// Record tunnel latency (limit sampling to this periodic check)
if tunnelID != "" {
telemetry.ObserveTunnelLatency(context.Background(), tunnelID, "wireguard", latency.Seconds())
}
if len(recentLatencies) > 10 {
recentLatencies = recentLatencies[1:]
}
@@ -468,7 +481,8 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
continue
}
if action == "add" {
switch action {
case "add":
target := parts[1] + ":" + parts[2]
// Call updown script if provided
@@ -494,7 +508,7 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
// Add the new target
pm.AddTarget(proto, tunnelIP, port, processedTarget)
} else if action == "remove" {
case "remove":
logger.Info("Removing target with port %d", port)
target := parts[1] + ":" + parts[2]
@@ -512,6 +526,8 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
logger.Error("Failed to remove target: %v", err)
return err
}
default:
logger.Info("Unknown action: %s", action)
}
}

View File

@@ -7,6 +7,7 @@ import (
"encoding/json"
"fmt"
"io"
"net"
"net/http"
"net/url"
"os"
@@ -18,6 +19,11 @@ import (
"github.com/fosrl/newt/logger"
"github.com/gorilla/websocket"
"context"
"github.com/fosrl/newt/internal/telemetry"
"go.opentelemetry.io/otel"
)
type Client struct {
@@ -37,6 +43,8 @@ type Client struct {
writeMux sync.Mutex
clientType string // Type of client (e.g., "newt", "olm")
tlsConfig TLSConfig
metricsCtxMu sync.RWMutex
metricsCtx context.Context
configNeedsSave bool // Flag to track if config needs to be saved
}
@@ -81,6 +89,26 @@ func (c *Client) OnTokenUpdate(callback func(token string)) {
c.onTokenUpdate = callback
}
func (c *Client) metricsContext() context.Context {
c.metricsCtxMu.RLock()
defer c.metricsCtxMu.RUnlock()
if c.metricsCtx != nil {
return c.metricsCtx
}
return context.Background()
}
func (c *Client) setMetricsContext(ctx context.Context) {
c.metricsCtxMu.Lock()
c.metricsCtx = ctx
c.metricsCtxMu.Unlock()
}
// MetricsContext exposes the context used for telemetry emission when a connection is active.
func (c *Client) MetricsContext() context.Context {
return c.metricsContext()
}
// NewClient creates a new websocket client
func NewClient(clientType string, ID, secret string, endpoint string, pingInterval time.Duration, pingTimeout time.Duration, opts ...ClientOption) (*Client, error) {
config := &Config{
@@ -140,6 +168,7 @@ func (c *Client) Close() error {
// Set connection status to false
c.setConnected(false)
telemetry.SetWSConnectionState(false)
// Close the WebSocket connection gracefully
if c.conn != nil {
@@ -170,7 +199,11 @@ func (c *Client) SendMessage(messageType string, data interface{}) error {
c.writeMux.Lock()
defer c.writeMux.Unlock()
return c.conn.WriteJSON(msg)
if err := c.conn.WriteJSON(msg); err != nil {
return err
}
telemetry.IncWSMessage(c.metricsContext(), "out", "text")
return nil
}
func (c *Client) SendMessageInterval(messageType string, data interface{}, interval time.Duration) (stop func()) {
@@ -265,8 +298,12 @@ func (c *Client) getToken() (string, error) {
return "", fmt.Errorf("failed to marshal token request data: %w", err)
}
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
defer cancel()
// Create a new request
req, err := http.NewRequest(
req, err := http.NewRequestWithContext(
ctx,
"POST",
baseEndpoint+"/api/v1/auth/"+c.clientType+"/get-token",
bytes.NewBuffer(jsonData),
@@ -288,6 +325,8 @@ func (c *Client) getToken() (string, error) {
}
resp, err := client.Do(req)
if err != nil {
telemetry.IncConnAttempt(ctx, "auth", "failure")
telemetry.IncConnError(ctx, "auth", classifyConnError(err))
return "", fmt.Errorf("failed to request new token: %w", err)
}
defer resp.Body.Close()
@@ -295,6 +334,16 @@ func (c *Client) getToken() (string, error) {
if resp.StatusCode != http.StatusOK {
body, _ := io.ReadAll(resp.Body)
logger.Error("Failed to get token with status code: %d, body: %s", resp.StatusCode, string(body))
telemetry.IncConnAttempt(ctx, "auth", "failure")
etype := "io_error"
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
etype = "auth_failed"
}
telemetry.IncConnError(ctx, "auth", etype)
// Reconnect reason mapping for auth failures
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonAuthError)
}
return "", fmt.Errorf("failed to get token with status code: %d, body: %s", resp.StatusCode, string(body))
}
@@ -313,10 +362,55 @@ func (c *Client) getToken() (string, error) {
}
logger.Debug("Received token: %s", tokenResp.Data.Token)
telemetry.IncConnAttempt(ctx, "auth", "success")
return tokenResp.Data.Token, nil
}
// classifyConnError maps to fixed, low-cardinality error_type values.
// Allowed enum: dial_timeout, tls_handshake, auth_failed, io_error
func classifyConnError(err error) string {
if err == nil {
return ""
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, "tls") || strings.Contains(msg, "certificate"):
return "tls_handshake"
case strings.Contains(msg, "timeout") || strings.Contains(msg, "i/o timeout") || strings.Contains(msg, "deadline exceeded"):
return "dial_timeout"
case strings.Contains(msg, "unauthorized") || strings.Contains(msg, "forbidden"):
return "auth_failed"
default:
// Group remaining network/socket errors as io_error to avoid label explosion
return "io_error"
}
}
func classifyWSDisconnect(err error) (result, reason string) {
if err == nil {
return "success", "normal"
}
if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
return "success", "normal"
}
if ne, ok := err.(net.Error); ok && ne.Timeout() {
return "error", "timeout"
}
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
return "error", "unexpected_close"
}
msg := strings.ToLower(err.Error())
switch {
case strings.Contains(msg, "eof"):
return "error", "eof"
case strings.Contains(msg, "reset"):
return "error", "connection_reset"
default:
return "error", "read_error"
}
}
func (c *Client) connectWithRetry() {
for {
select {
@@ -335,9 +429,13 @@ func (c *Client) connectWithRetry() {
}
func (c *Client) establishConnection() error {
ctx := context.Background()
// Get token for authentication
token, err := c.getToken()
if err != nil {
telemetry.IncConnAttempt(ctx, "websocket", "failure")
telemetry.IncConnError(ctx, "websocket", classifyConnError(err))
return fmt.Errorf("failed to get token: %w", err)
}
@@ -370,7 +468,12 @@ func (c *Client) establishConnection() error {
q.Set("clientType", c.clientType)
u.RawQuery = q.Encode()
// Connect to WebSocket
// Connect to WebSocket (optional span)
tr := otel.Tracer("newt")
ctx, span := tr.Start(ctx, "ws.connect")
defer span.End()
start := time.Now()
dialer := websocket.DefaultDialer
// Use new TLS configuration method
@@ -392,18 +495,42 @@ func (c *Client) establishConnection() error {
logger.Debug("WebSocket TLS certificate verification disabled via SKIP_TLS_VERIFY environment variable")
}
conn, _, err := dialer.Dial(u.String(), nil)
conn, _, err := dialer.DialContext(ctx, u.String(), nil)
lat := time.Since(start).Seconds()
if err != nil {
telemetry.IncConnAttempt(ctx, "websocket", "failure")
etype := classifyConnError(err)
telemetry.IncConnError(ctx, "websocket", etype)
telemetry.ObserveWSConnectLatency(ctx, lat, "failure", etype)
// Map handshake-related errors to reconnect reasons where appropriate
if etype == "tls_handshake" {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonHandshakeError)
} else if etype == "dial_timeout" {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonTimeout)
} else {
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonError)
}
telemetry.IncWSReconnect(ctx, etype)
return fmt.Errorf("failed to connect to WebSocket: %w", err)
}
telemetry.IncConnAttempt(ctx, "websocket", "success")
telemetry.ObserveWSConnectLatency(ctx, lat, "success", "")
c.conn = conn
c.setConnected(true)
telemetry.SetWSConnectionState(true)
c.setMetricsContext(ctx)
sessionStart := time.Now()
// Wire up pong handler for metrics
c.conn.SetPongHandler(func(appData string) error {
telemetry.IncWSMessage(c.metricsContext(), "in", "pong")
return nil
})
// Start the ping monitor
go c.pingMonitor()
// Start the read pump with disconnect detection
go c.readPumpWithDisconnectDetection()
go c.readPumpWithDisconnectDetection(sessionStart)
if c.onConnect != nil {
err := c.saveConfig()
@@ -496,6 +623,9 @@ func (c *Client) pingMonitor() {
}
c.writeMux.Lock()
err := c.conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(c.pingTimeout))
if err == nil {
telemetry.IncWSMessage(c.metricsContext(), "out", "ping")
}
c.writeMux.Unlock()
if err != nil {
// Check if we're shutting down before logging error and reconnecting
@@ -505,6 +635,8 @@ func (c *Client) pingMonitor() {
return
default:
logger.Error("Ping failed: %v", err)
telemetry.IncWSKeepaliveFailure(c.metricsContext(), "ping_write")
telemetry.IncWSReconnect(c.metricsContext(), "ping_write")
c.reconnect()
return
}
@@ -514,17 +646,26 @@ func (c *Client) pingMonitor() {
}
// readPumpWithDisconnectDetection reads messages and triggers reconnect on error
func (c *Client) readPumpWithDisconnectDetection() {
func (c *Client) readPumpWithDisconnectDetection(started time.Time) {
ctx := c.metricsContext()
disconnectReason := "shutdown"
disconnectResult := "success"
defer func() {
if c.conn != nil {
c.conn.Close()
}
if !started.IsZero() {
telemetry.ObserveWSSessionDuration(ctx, time.Since(started).Seconds(), disconnectResult)
}
telemetry.IncWSDisconnect(ctx, disconnectReason, disconnectResult)
// Only attempt reconnect if we're not shutting down
select {
case <-c.done:
// Shutting down, don't reconnect
return
default:
telemetry.IncWSReconnect(ctx, disconnectReason)
c.reconnect()
}
}()
@@ -532,23 +673,33 @@ func (c *Client) readPumpWithDisconnectDetection() {
for {
select {
case <-c.done:
disconnectReason = "shutdown"
disconnectResult = "success"
return
default:
var msg WSMessage
err := c.conn.ReadJSON(&msg)
if err == nil {
telemetry.IncWSMessage(c.metricsContext(), "in", "text")
}
if err != nil {
// Check if we're shutting down before logging error
select {
case <-c.done:
// Expected during shutdown, don't log as error
logger.Debug("WebSocket connection closed during shutdown")
disconnectReason = "shutdown"
disconnectResult = "success"
return
default:
// Unexpected error during normal operation
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure, websocket.CloseNormalClosure) {
logger.Error("WebSocket read error: %v", err)
} else {
logger.Debug("WebSocket connection closed: %v", err)
disconnectResult, disconnectReason = classifyWSDisconnect(err)
if disconnectResult == "error" {
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure, websocket.CloseNormalClosure) {
logger.Error("WebSocket read error: %v", err)
} else {
logger.Debug("WebSocket connection closed: %v", err)
}
}
return // triggers reconnect via defer
}
@@ -565,6 +716,7 @@ func (c *Client) readPumpWithDisconnectDetection() {
func (c *Client) reconnect() {
c.setConnected(false)
telemetry.SetWSConnectionState(false)
if c.conn != nil {
c.conn.Close()
c.conn = nil

View File

@@ -3,6 +3,7 @@
package wg
import (
"context"
"encoding/json"
"errors"
"fmt"
@@ -13,16 +14,19 @@ import (
"sync"
"time"
"math/rand"
"github.com/fosrl/newt/logger"
"github.com/fosrl/newt/network"
"github.com/fosrl/newt/websocket"
"github.com/vishvananda/netlink"
"golang.org/x/crypto/chacha20poly1305"
"golang.org/x/crypto/curve25519"
"golang.org/x/exp/rand"
"golang.zx2c4.com/wireguard/conn"
"golang.zx2c4.com/wireguard/wgctrl"
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
"github.com/fosrl/newt/internal/telemetry"
)
type WgConfig struct {
@@ -106,7 +110,7 @@ func FindAvailableUDPPort(minPort, maxPort uint16) (uint16, error) {
}
// Fisher-Yates shuffle to randomize the port order
rand.Seed(uint64(time.Now().UnixNano()))
rand.Seed(time.Now().UnixNano())
for i := len(portRange) - 1; i > 0; i-- {
j := rand.Intn(i + 1)
portRange[i], portRange[j] = portRange[j], portRange[i]
@@ -280,6 +284,15 @@ func (s *WireGuardService) LoadRemoteConfig() error {
}
func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
ctx := context.Background()
if s.client != nil {
ctx = s.client.MetricsContext()
}
result := "success"
defer func() {
telemetry.IncConfigReload(ctx, result)
}()
var config WgConfig
logger.Debug("Received message: %v", msg)
@@ -288,11 +301,13 @@ func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
jsonData, err := json.Marshal(msg.Data)
if err != nil {
logger.Info("Error marshaling data: %v", err)
result = "failure"
return
}
if err := json.Unmarshal(jsonData, &config); err != nil {
logger.Info("Error unmarshaling target data: %v", err)
result = "failure"
return
}
s.config = config
@@ -302,13 +317,29 @@ func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
s.stopGetConfig = nil
}
// Ensure the WireGuard interface and peers are configured
if err := s.ensureWireguardInterface(config); err != nil {
logger.Error("Failed to ensure WireGuard interface: %v", err)
// telemetry: config reload success
// Optional reconnect reason mapping: config change
if s.serverPubKey != "" {
telemetry.IncReconnect(ctx, s.serverPubKey, "client", telemetry.ReasonConfigChange)
}
// Ensure the WireGuard interface and peers are configured
start := time.Now()
if err := s.ensureWireguardInterface(config); err != nil {
logger.Error("Failed to ensure WireGuard interface: %v", err)
telemetry.ObserveConfigApply(ctx, "interface", "failure", time.Since(start).Seconds())
result = "failure"
} else {
telemetry.ObserveConfigApply(ctx, "interface", "success", time.Since(start).Seconds())
}
startPeers := time.Now()
if err := s.ensureWireguardPeers(config.Peers); err != nil {
logger.Error("Failed to ensure WireGuard peers: %v", err)
telemetry.ObserveConfigApply(ctx, "peer", "failure", time.Since(startPeers).Seconds())
result = "failure"
} else {
telemetry.ObserveConfigApply(ctx, "peer", "success", time.Since(startPeers).Seconds())
}
}

View File

@@ -1,6 +1,7 @@
package wgnetstack
import (
"context"
"crypto/rand"
"encoding/base64"
"encoding/hex"
@@ -26,6 +27,8 @@ import (
"golang.zx2c4.com/wireguard/tun"
"golang.zx2c4.com/wireguard/tun/netstack"
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
"github.com/fosrl/newt/internal/telemetry"
)
type WgConfig struct {
@@ -242,14 +245,20 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
return service, nil
}
// ReportRTT allows reporting native RTTs to telemetry, rate-limited externally.
func (s *WireGuardService) ReportRTT(seconds float64) {
if s.serverPubKey == "" { return }
telemetry.ObserveTunnelLatency(context.Background(), s.serverPubKey, "wireguard", seconds)
}
func (s *WireGuardService) addTcpTarget(msg websocket.WSMessage) {
logger.Debug("Received: %+v", msg)
// if there is no wgData or pm, we can't add targets
if s.TunnelIP == "" || s.proxyManager == nil {
logger.Info("No tunnel IP or proxy manager available")
return
}
return
}
targetData, err := parseTargetData(msg.Data)
if err != nil {

View File

@@ -126,7 +126,7 @@ func (s *Server) Stop() {
s.conn.Close()
}
s.isRunning = false
logger.Info(s.outputPrefix + "Server stopped")
logger.Info("%sServer stopped", s.outputPrefix)
}
// RestartWithNetstack stops the current server and restarts it with netstack
@@ -161,7 +161,7 @@ func (s *Server) handleConnections() {
// Set read deadline to avoid blocking forever
err := s.conn.SetReadDeadline(time.Now().Add(1 * time.Second))
if err != nil {
logger.Error(s.outputPrefix+"Error setting read deadline: %v", err)
logger.Error("%sError setting read deadline: %v", s.outputPrefix, err)
continue
}
@@ -187,7 +187,7 @@ func (s *Server) handleConnections() {
case <-s.shutdownCh:
return // Don't log error if we're shutting down
default:
logger.Error(s.outputPrefix+"Error reading from UDP: %v", err)
logger.Error("%sError reading from UDP: %v", s.outputPrefix, err)
}
continue
}
@@ -219,7 +219,7 @@ func (s *Server) handleConnections() {
copy(responsePacket[5:13], buffer[5:13])
// Log response being sent for debugging
logger.Debug(s.outputPrefix+"Sending response to %s", addr.String())
logger.Debug("%sSending response to %s", s.outputPrefix, addr.String())
// Send the response packet - handle both regular UDP and netstack UDP
if s.useNetstack {
@@ -233,9 +233,9 @@ func (s *Server) handleConnections() {
}
if err != nil {
logger.Error(s.outputPrefix+"Error sending response: %v", err)
logger.Error("%sError sending response: %v", s.outputPrefix, err)
} else {
logger.Debug(s.outputPrefix + "Response sent successfully")
logger.Debug("%sResponse sent successfully", s.outputPrefix)
}
}
}