Compare commits

...

190 Commits

Author SHA1 Message Date
Owen
b642df3e1e Add CODEOWNERS 2026-04-07 11:33:51 -04:00
Marc Schäfer
6c91e99497 Merge pull request #63 from fosrl/proxy-perf-improvements
perf(proxy): Add HTTP client reuse and buffer pooling
2026-04-04 01:47:55 +02:00
Marc Schäfer
58415dee7e refactor: remove redundant HTTP client instantiation in getRoute method 2026-04-04 01:44:59 +02:00
Marc Schäfer
c3ed355127 Merge branch 'dev' into proxy-perf-improvements 2026-04-04 01:37:09 +02:00
Marc Schäfer
c6e1881e6a Merge pull request #62 from LaurenceJJones/split/upstream-dev-relay-session-index
perf(relay): index WireGuard sessions by receiver index
2026-04-04 01:24:45 +02:00
Marc Schäfer
eedd813e2f Update Go version in GitHub Actions workflow 2026-04-04 01:19:47 +02:00
Marc Schäfer
3cf2ccdc54 Merge branch 'dev' into split/upstream-dev-relay-session-index 2026-04-04 01:13:09 +02:00
Owen
726b6b171c Merge branch 'main' into dev 2026-04-03 15:53:13 -04:00
Owen Schwartz
037618acbc Merge pull request #55 from fosrl/dependabot/docker/minor-updates-f918e77f61
Bump golang from 1.25-alpine to 1.26-alpine in the minor-updates group across 1 directory
2026-04-03 15:35:45 -04:00
Owen Schwartz
1a6bc81ddd Merge pull request #67 from fosrl/metrics
Integrate OpenTelemetry and Prometheus metrics across services
2026-04-03 15:35:23 -04:00
Owen Schwartz
a3dbdef7cc Merge pull request #66 from fosrl/dependabot/go_modules/prod-minor-updates-16c924d84c
Bump the prod-minor-updates group with 2 updates
2026-04-03 14:25:21 -04:00
Marc Schäfer
f07c83fde4 Update Go version to 1.25.0 and add gRPC dependency 2026-04-03 18:41:40 +02:00
Marc Schäfer
652d9c5c68 Add metrics tracking for UDP packet handling and session management 2026-04-03 18:15:58 +02:00
Marc Schäfer
e47a57cb4f Enhance metrics tracking in SNIProxy connection handling 2026-04-03 18:15:41 +02:00
Marc Schäfer
4357ddf64b Integrate metrics instrumentation across core services 2026-04-03 15:57:53 +02:00
Marc Schäfer
f322b4c921 Add OpenTelemetry and Prometheus metrics infrastructure 2026-04-03 15:57:47 +02:00
dependabot[bot]
56f72d6643 Bump golang from 1.25-alpine to 1.26-alpine in the minor-updates group
Bumps the minor-updates group with 1 update: golang.


Updates `golang` from 1.25-alpine to 1.26-alpine

---
updated-dependencies:
- dependency-name: golang
  dependency-version: 1.26-alpine
  dependency-type: direct:production
  dependency-group: minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-02 21:06:41 +00:00
dependabot[bot]
367e5bfa08 Bump the prod-minor-updates group with 2 updates
Bumps the prod-minor-updates group with 2 updates: [golang.org/x/crypto](https://github.com/golang/crypto) and [golang.org/x/sync](https://github.com/golang/sync).


Updates `golang.org/x/crypto` from 0.47.0 to 0.49.0
- [Commits](https://github.com/golang/crypto/compare/v0.47.0...v0.49.0)

Updates `golang.org/x/sync` from 0.19.0 to 0.20.0
- [Commits](https://github.com/golang/sync/compare/v0.19.0...v0.20.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.49.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
- dependency-name: golang.org/x/sync
  dependency-version: 0.20.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-04-02 21:06:24 +00:00
Marc Schäfer
aeb8b7c56f Merge pull request #52 from fosrl/dependabot/go_modules/prod-minor-updates-4320415815
Bump the prod-minor-updates group with 2 updates
2026-04-02 20:47:29 +02:00
Marc Schäfer
f5c77d7df8 Merge pull request #56 from fosrl/dependabot/github_actions/actions/upload-artifact-7.0.0
Bump actions/upload-artifact from 6.0.0 to 7.0.0
2026-04-02 20:42:41 +02:00
Marc Schäfer
a37aadddb5 Merge pull request #53 from fosrl/dependabot/github_actions/actions/checkout-6.0.2
Bump actions/checkout from 6.0.1 to 6.0.2
2026-04-02 20:42:07 +02:00
Marc Schäfer
80747bf98b Merge pull request #57 from fosrl/dependabot/github_actions/actions/setup-go-6.3.0
Bump actions/setup-go from 6.2.0 to 6.3.0
2026-04-02 20:41:20 +02:00
Marc Schäfer
69418a439c Merge pull request #58 from fosrl/dependabot/github_actions/sigstore/cosign-installer-4.1.0
Bump sigstore/cosign-installer from 4.0.0 to 4.1.0
2026-04-02 20:40:30 +02:00
Marc Schäfer
d065897c4d Merge pull request #59 from fosrl/dependabot/github_actions/docker/login-action-4.0.0
Bump docker/login-action from 3.6.0 to 4.0.0
2026-04-02 20:39:56 +02:00
Owen
b57574cc4b IFB ingress limiting 2026-03-31 21:56:41 -07:00
Owen
a3862260c9 Add var for b limit 2026-03-31 20:35:05 -07:00
Owen
40da38708c Update logging 2026-03-20 16:11:10 -07:00
Owen
3af64d8bd3 Merge branch 'dev' of github.com:fosrl/gerbil into dev 2026-03-20 16:04:39 -07:00
Owen
fcead8cc15 Add rate limit to hole punch 2026-03-20 16:02:58 -07:00
Owen Schwartz
20dad7bb8e Merge pull request #60 from LaurenceJJones/split/upstream-dev-relay-worker-scaling
perf(relay): scale packet workers and queue depth for throughput
2026-03-18 15:56:46 -07:00
Owen Schwartz
a955aa6169 Merge pull request #61 from LaurenceJJones/split/upstream-dev-relay-addr-cache
perf(relay): cache resolved UDP destinations with TTL
2026-03-18 15:56:32 -07:00
Laurence
c7d9c72f29 Add HTTP client reuse and buffer pooling for performance
- Add reusable HTTP client with connection pooling for API requests
- Add sync.Pool for 32KB buffers used in connection piping
- Clear buffers before returning to pool to prevent data leakage
- Reduces GC pressure and improves throughput under load
2026-03-13 15:28:04 +00:00
Laurence
abc744c647 perf(relay): index WireGuard sessions by receiver index 2026-03-12 12:55:49 +00:00
Laurence
b118fef265 perf(relay): cache resolved UDP destinations with TTL 2026-03-12 12:54:59 +00:00
Laurence
7985f97eb6 perf(relay): scale packet workers and queue depth for throughput 2026-03-12 12:54:02 +00:00
dependabot[bot]
986a2c6bb6 Bump docker/login-action from 3.6.0 to 4.0.0
Bumps [docker/login-action](https://github.com/docker/login-action) from 3.6.0 to 4.0.0.
- [Release notes](https://github.com/docker/login-action/releases)
- [Commits](5e57cd1181...b45d80f862)

---
updated-dependencies:
- dependency-name: docker/login-action
  dependency-version: 4.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-09 21:59:21 +00:00
dependabot[bot]
58674ec025 Bump sigstore/cosign-installer from 4.0.0 to 4.1.0
Bumps [sigstore/cosign-installer](https://github.com/sigstore/cosign-installer) from 4.0.0 to 4.1.0.
- [Release notes](https://github.com/sigstore/cosign-installer/releases)
- [Commits](faadad0cce...ba7bc0a3fe)

---
updated-dependencies:
- dependency-name: sigstore/cosign-installer
  dependency-version: 4.1.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-09 21:59:17 +00:00
dependabot[bot]
5dbe3dbb84 Bump actions/setup-go from 6.2.0 to 6.3.0
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 6.2.0 to 6.3.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](7a3fe6cf4c...4b73464bb3)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: 6.3.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-02 22:48:25 +00:00
dependabot[bot]
32d7af44ca Bump actions/upload-artifact from 6.0.0 to 7.0.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 6.0.0 to 7.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](b7c566a772...bbbca2ddaa)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: 7.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-03-02 22:48:17 +00:00
Owen
b9261b8fea Add optional tc 2026-02-27 15:45:17 -08:00
dependabot[bot]
fdc398eb9c Bump the prod-minor-updates group with 2 updates
Bumps the prod-minor-updates group with 2 updates: [golang.org/x/crypto](https://github.com/golang/crypto) and [golang.org/x/sync](https://github.com/golang/sync).


Updates `golang.org/x/crypto` from 0.46.0 to 0.47.0
- [Commits](https://github.com/golang/crypto/compare/v0.46.0...v0.47.0)

Updates `golang.org/x/sync` from 0.1.0 to 0.19.0
- [Commits](https://github.com/golang/sync/compare/v0.1.0...v0.19.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.47.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
- dependency-name: golang.org/x/sync
  dependency-version: 0.19.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-27 21:06:47 +00:00
Owen
c3e73d0189 Merge branch 'main' of github.com:fosrl/gerbil 2026-01-26 15:22:20 -08:00
dependabot[bot]
df2fbdf160 Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.45.0 to 0.46.0
- [Commits](https://github.com/golang/crypto/compare/v0.45.0...v0.46.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.46.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-26 15:22:08 -08:00
dependabot[bot]
cb4ac8199d Bump actions/checkout from 6.0.0 to 6.0.1
Bumps [actions/checkout](https://github.com/actions/checkout) from 6.0.0 to 6.0.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](1af3b93b68...8e8c483db8)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: 6.0.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-26 15:22:08 -08:00
dependabot[bot]
dd4b86b3e5 Bump actions/upload-artifact from 5.0.0 to 6.0.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 5.0.0 to 6.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](330a01c490...b7c566a772)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: 6.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-26 15:22:08 -08:00
dependabot[bot]
bad290aa4e Bump docker/setup-buildx-action from 3.11.1 to 3.12.0
Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 3.11.1 to 3.12.0.
- [Release notes](https://github.com/docker/setup-buildx-action/releases)
- [Commits](e468171a9d...8d2750c68a)

---
updated-dependencies:
- dependency-name: docker/setup-buildx-action
  dependency-version: 3.12.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-26 15:22:08 -08:00
dependabot[bot]
8c27d5e3bf Bump actions/setup-go from 6.1.0 to 6.2.0
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 6.1.0 to 6.2.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](4dc6199c7b...7a3fe6cf4c)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: 6.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-26 15:22:08 -08:00
Owen
7e7a37d49c We dont really support the config file anymore
Ref #30
2026-01-26 15:22:08 -08:00
Owen
d44aa97f32 Try to fix mem leak 2026-01-26 15:22:08 -08:00
Owen
b57ad74589 Quiet normal log message
Fixes #2057
2026-01-26 15:22:08 -08:00
Owen
82256a3f6f Add healthcheck route 2026-01-26 15:22:08 -08:00
Owen
9e140a94db Add regex to runs on 2026-01-26 15:22:08 -08:00
Owen
d0c9ea5a57 Fix docker username issue 2026-01-26 15:22:08 -08:00
Owen
c88810ef24 Restrict inbound traffic 2026-01-26 15:21:28 -08:00
dependabot[bot]
fcd290272f Bump actions/checkout from 6.0.1 to 6.0.2
Bumps [actions/checkout](https://github.com/actions/checkout) from 6.0.1 to 6.0.2.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](8e8c483db8...de0fac2e45)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: 6.0.2
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-26 22:34:44 +00:00
dependabot[bot]
463a4eea79 Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.45.0 to 0.46.0
- [Commits](https://github.com/golang/crypto/compare/v0.45.0...v0.46.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.46.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-19 17:30:51 -08:00
dependabot[bot]
4576a2e8a7 Bump actions/checkout from 6.0.0 to 6.0.1
Bumps [actions/checkout](https://github.com/actions/checkout) from 6.0.0 to 6.0.1.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](1af3b93b68...8e8c483db8)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: 6.0.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-19 17:30:42 -08:00
dependabot[bot]
69c13adcdb Bump actions/upload-artifact from 5.0.0 to 6.0.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 5.0.0 to 6.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](330a01c490...b7c566a772)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: 6.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-19 17:30:34 -08:00
dependabot[bot]
3886c1a8c1 Bump docker/setup-buildx-action from 3.11.1 to 3.12.0
Bumps [docker/setup-buildx-action](https://github.com/docker/setup-buildx-action) from 3.11.1 to 3.12.0.
- [Release notes](https://github.com/docker/setup-buildx-action/releases)
- [Commits](e468171a9d...8d2750c68a)

---
updated-dependencies:
- dependency-name: docker/setup-buildx-action
  dependency-version: 3.12.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-19 17:30:26 -08:00
dependabot[bot]
06eb4d4310 Bump actions/setup-go from 6.1.0 to 6.2.0
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 6.1.0 to 6.2.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](4dc6199c7b...7a3fe6cf4c)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: 6.2.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2026-01-19 17:30:18 -08:00
Owen
247c47b27f We dont really support the config file anymore
Ref #30
2026-01-19 17:29:04 -08:00
Owen
060038c29b Try to fix mem leak 2025-12-12 22:14:13 -05:00
Owen
5414d21dcd Quiet normal log message
Fixes #2057
2025-12-12 18:40:33 -05:00
Owen
364fa020aa Add healthcheck route 2025-12-12 11:47:04 -05:00
Owen
b96ee16fbf Add regex to runs on 2025-12-11 16:19:08 -05:00
Owen
467d69aa7c Fix docker username issue 2025-12-08 14:51:13 -05:00
Owen
7c7762ebc5 Merge branch 'main' into dev 2025-12-06 12:16:15 -05:00
Owen
526f9c8b4e Remove diff 2025-12-06 12:16:03 -05:00
Owen
905983cf61 Merge branch 'main' into dev 2025-12-06 12:15:34 -05:00
Owen
a0879114e2 Merge branch 'LaurenceJJones-enhancement/errgroup-context-propagation' 2025-12-06 12:15:09 -05:00
Owen
0d54a07973 Merge branch 'enhancement/errgroup-context-propagation' of github.com:LaurenceJJones/gerbil into LaurenceJJones-enhancement/errgroup-context-propagation 2025-12-06 12:14:58 -05:00
Owen Schwartz
4cb2fde961 Merge pull request #36 from LaurenceJJones/fix-wg-session-race-condition
fix: relay race condition in WireGuard session management
2025-12-06 12:12:04 -05:00
Owen Schwartz
9602599565 Merge pull request #38 from LaurenceJJones/fix/relay-buffer-leak
fix: relay buffer leak on UDP read error
2025-12-06 12:07:31 -05:00
Owen Schwartz
11f858b341 Merge pull request #39 from LaurenceJJones/fix/calcpeerbandwidth-optimization
feat: optimize calculatePeerBandwidth to avoid nested loops
2025-12-06 11:59:00 -05:00
Owen Schwartz
29b2cb33a2 Merge pull request #42 from fosrl/dependabot/github_actions/actions/setup-go-6.1.0
Bump actions/setup-go from 6.0.0 to 6.1.0
2025-12-06 11:48:22 -05:00
Owen Schwartz
34290ffe09 Merge pull request #43 from fosrl/dependabot/github_actions/actions/checkout-6.0.0
Bump actions/checkout from 5.0.0 to 6.0.0
2025-12-06 11:48:14 -05:00
Owen Schwartz
1013d0591e Merge pull request #44 from fosrl/dependabot/docker/minor-updates-60be0b6e22
Bump alpine from 3.22 to 3.23 in the minor-updates group
2025-12-06 11:48:07 -05:00
dependabot[bot]
2f6d62ab45 Bump alpine from 3.22 to 3.23 in the minor-updates group
Bumps the minor-updates group with 1 update: alpine.


Updates `alpine` from 3.22 to 3.23

---
updated-dependencies:
- dependency-name: alpine
  dependency-version: '3.23'
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-03 21:23:35 +00:00
Owen
8d6ba79408 Send public key 2025-12-01 16:19:57 -05:00
Owen Schwartz
208b434cb7 Merge pull request #41 from fosrl/dependabot/go_modules/prod-minor-updates-dd7da38a6b
Bump golang.org/x/crypto from 0.44.0 to 0.45.0 in the prod-minor-updates group
2025-11-29 13:03:37 -05:00
dependabot[bot]
39ce0ac407 Bump actions/checkout from 5.0.0 to 6.0.0
Bumps [actions/checkout](https://github.com/actions/checkout) from 5.0.0 to 6.0.0.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](08c6903cd8...1af3b93b68)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: 6.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 22:19:10 +00:00
dependabot[bot]
72bee56412 Bump actions/setup-go from 6.0.0 to 6.1.0
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 6.0.0 to 6.1.0.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](4469467582...4dc6199c7b)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: 6.1.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-24 22:19:05 +00:00
dependabot[bot]
b32da3a714 Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.44.0 to 0.45.0
- [Commits](https://github.com/golang/crypto/compare/v0.44.0...v0.45.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.45.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-19 21:25:00 +00:00
Laurence
971452e5d3 revert: drop logger formatting changes from calcpeerbandwidth optimization branch 2025-11-16 08:42:57 +00:00
Laurence
bba4345b0f main: optimize calculatePeerBandwidth to avoid nested peer scans
Build a set of current peer public keys during the primary iteration and
prune lastReadings in a single pass, removing the O(n^2) nested loop.

No behavior change; improves efficiency when peer lists and lastReadings
grow large.
2025-11-16 08:40:26 +00:00
Laurence
b2392fb250 relay: fix buffer leak on UDP read error by returning buffer to pool
When ReadFromUDP fails in readPackets, the buffer was not returned to the
sync.Pool, causing a small but persistent leak under error conditions.
Return the buffer before continuing to ensure reuse and stable memory.

Scope: minimal hotfix (no broader refactors).
2025-11-16 06:07:48 +00:00
Laurence
697f4131e7 enhancement: base context + errgroup; propagate cancellation; graceful shutdown
- main: add base context via signal.NotifyContext; establish errgroup and use it to supervise background tasks; convert ticker to context-aware periodicBandwidthCheck; run HTTP server under errgroup and add graceful shutdown; treat context.Canceled as normal exit
- relay: thread parent context through UDPProxyServer; add cancel func; make packet reader, workers, and cleanup tickers exit on ctx.Done; Stop cancels, closes listener and downstream UDP connections, and closes packet channel to drain workers
- proxy: drop earlier parent context hook for SNI proxy per review; rely on existing Stop() for graceful shutdown

Benefits:
- unified lifecycle and deterministic shutdown across components
- prevents leaked goroutines/tickers and closes sockets cleanly
- consolidated error handling via g.Wait(), with context cancellation treated as non-error
- sets foundation for child errgroups and future structured concurrency
2025-11-16 06:00:32 +00:00
Laurence Jones
e282715251 Merge branch 'main' into fix-wg-session-race-condition 2025-11-16 05:29:46 +00:00
Owen Schwartz
709df6db3e Merge pull request #33 from fosrl/dependabot/github_actions/actions/upload-artifact-5.0.0
Bump actions/upload-artifact from 4.6.2 to 5.0.0
2025-11-13 15:53:58 -05:00
Owen Schwartz
cf2b436470 Merge pull request #34 from fosrl/dependabot/github_actions/docker/setup-qemu-action-3.7.0
Bump docker/setup-qemu-action from 3.6.0 to 3.7.0
2025-11-13 15:53:00 -05:00
Owen Schwartz
2a29021572 Merge pull request #35 from fosrl/dependabot/go_modules/prod-minor-updates-c94709d3c3
Bump golang.org/x/crypto from 0.43.0 to 0.44.0 in the prod-minor-updates group
2025-11-13 15:52:54 -05:00
Laurence
a3f9a89079 Refactor WireGuard session locking and remove unused methods
- Remove unused methods: UpdateLastSeen, GetSenderIndex, MatchesSenderIndex
  (replaced by simpler direct usage in Range callbacks)
- Simplify session access pattern: check GetSenderIndex in Range callback,
  then call GetDestAddr and UpdateLastSeen when match found
- Optimize UpdateLastSeen usage: only use for existing sessions already
  in sync.Map; use direct assignment in struct literals for new sessions
  (safe since no concurrent access during creation)

This simplifies the code while maintaining thread-safety for concurrent
access to existing sessions.
2025-11-13 06:43:31 +00:00
Laurence
ee27bf3153 Fix race condition in WireGuard session management
The race condition existed because while sync.Map is thread-safe for map
operations (Load, Store, Delete, Range), it does not provide thread-safety
for the data stored within it. When WireGuardSession structs were stored as
pointers in the sync.Map, multiple goroutines could:

1. Retrieve the same session pointer from the map concurrently
2. Access and modify the session's fields (particularly LastSeen) without
   synchronization
3. Cause data races when one goroutine reads LastSeen while another updates it

This fix adds a sync.RWMutex to each WireGuardSession struct to protect
concurrent access to its fields. All field access now goes through
thread-safe methods that properly acquire/release the mutex.

Changes:
- Added sync.RWMutex to WireGuardSession struct
- Added thread-safe accessor methods (GetLastSeen, GetDestAddr, etc.)
- Added atomic CheckAndUpdateIfMatch method for efficient check-and-update
- Updated all session field accesses to use thread-safe methods
- Removed redundant Store call after updating LastSeen (pointer update is
  atomic in Go, but field access within pointer was not)
2025-11-13 06:26:09 +00:00
dependabot[bot]
a90f681957 Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.43.0 to 0.44.0
- [Commits](https://github.com/golang/crypto/compare/v0.43.0...v0.44.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.44.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-11 21:19:30 +00:00
dependabot[bot]
3afc82ef9a Bump docker/setup-qemu-action from 3.6.0 to 3.7.0
Bumps [docker/setup-qemu-action](https://github.com/docker/setup-qemu-action) from 3.6.0 to 3.7.0.
- [Release notes](https://github.com/docker/setup-qemu-action/releases)
- [Commits](29109295f8...c7c5346462)

---
updated-dependencies:
- dependency-name: docker/setup-qemu-action
  dependency-version: 3.7.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-11-10 21:51:03 +00:00
dependabot[bot]
d3a16f4c59 Bump actions/upload-artifact from 4.6.2 to 5.0.0
Bumps [actions/upload-artifact](https://github.com/actions/upload-artifact) from 4.6.2 to 5.0.0.
- [Release notes](https://github.com/actions/upload-artifact/releases)
- [Commits](ea165f8d65...330a01c490)

---
updated-dependencies:
- dependency-name: actions/upload-artifact
  dependency-version: 5.0.0
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-27 22:24:20 +00:00
Owen
2a1911a66f Update runner to amd64-runner 2025-10-20 16:43:18 -07:00
Owen Schwartz
08341b2385 Merge pull request #32 from marcschaeferger/gh-action
Adding GHCR to CI/CD Release Workflow & further improvements
2025-10-20 16:39:54 -07:00
Marc Schäfer
6cde07d479 ci(actions): add GHCR mirroring and cosign signing for Docker images
- mirror images from Docker Hub to GHCR using skopeo (preserves multi-arch manifests)
- login to GHCR via docker/login-action for signing/pushing
- install cosign and perform dual signing: keyless (OIDC) + key-based; verify signatures
- add required permissions for id-token/packages and reference necessary secrets
2025-10-21 01:30:51 +02:00
Marc Schäfer
06b1e84f99 feat(ci): add step to update version in main.go during CI/CD pipeline 2025-10-21 01:20:08 +02:00
Marc Schäfer
2b7e93ec92 ci(actions): add permissions section to CI/CD and test workflows 2025-10-21 01:19:36 +02:00
Marc Schäfer
ca23ae7a30 ci(actions): pin action versions to commit SHAs for security
- Pin actions/checkout to SHA for v5.0.0
- Pin docker/setup-qemu-action to SHA for v3.6.0
- Pin docker/setup-buildx-action to SHA for v3.11.1
- Pin docker/login-action to SHA for v3.6.0
- Pin actions/setup-go to SHA for v6.0.0
- Pin actions/upload-artifact to SHA for v4.6.2
2025-10-21 01:18:33 +02:00
Owen
661fd86305 Update to use gerbil and not newt 2025-10-20 12:59:17 -07:00
Owen Schwartz
594a499b95 Merge pull request #31 from marcschaeferger/ghcr
feat(actions): Sync Images from Docker to GHCR
2025-10-20 12:57:53 -07:00
Marc Schäfer
44aed84827 feat(actions): Sync Images from Docker to GHCR 2025-10-20 21:56:04 +02:00
Owen
bf038eb4a2 Update domain 2025-10-19 15:12:56 -07:00
Owen
6da3129b4e Update port 2025-10-18 15:07:50 -07:00
Owen
ac0f9b6a82 Update cicd 2025-10-16 14:09:30 -07:00
Owen
16aef10cca Merge branch 'main' of github.com:fosrl/gerbil 2025-10-16 13:41:42 -07:00
Owen
19031ebdfd Move to gen the port in the right place 2025-10-16 13:40:01 -07:00
Owen
0eebbc51d5 Deprecate --listen 2025-10-16 10:38:47 -07:00
Owen
d321a8ba7e Dont require proxy protocol from known hosts 2025-10-14 21:05:30 -07:00
Owen Schwartz
3ea86222ca Merge pull request #29 from fosrl/dependabot/go_modules/prod-minor-updates-ce64870c5e
Bump golang.org/x/crypto from 0.42.0 to 0.43.0 in the prod-minor-updates group
2025-10-11 09:41:08 -07:00
dependabot[bot]
c3ebe930d9 Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.42.0 to 0.43.0
- [Commits](https://github.com/golang/crypto/compare/v0.42.0...v0.43.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.43.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-10-08 21:24:05 +00:00
Owen Schwartz
f2b96f2a38 Merge pull request #28 from SigmaSquadron/push-unypywyqkkrl
Change default port to 3004 to avoid a conflict with Pangolin's integration API.
2025-10-05 17:41:21 -07:00
Owen
9038239bbe Accept proxy protocol from other nodes 2025-09-29 21:56:15 -07:00
miloschwartz
3e64eb9c4f add templates 2025-09-29 16:41:29 -07:00
Owen
92992b8c14 Merge branch 'main' into dev 2025-09-28 16:28:07 -07:00
Owen
4ee9d77532 Rebuild sessions 2025-09-28 15:31:34 -07:00
Owen Schwartz
bd7a5bd4b0 Merge pull request #26 from fosrl/dependabot/github_actions/actions/setup-go-6
Bump actions/setup-go from 5 to 6
2025-09-15 14:43:53 -07:00
Owen Schwartz
1cd49f8ee3 Merge pull request #27 from fosrl/dependabot/go_modules/prod-minor-updates-237ba4726d
Bump golang.org/x/crypto from 0.41.0 to 0.42.0 in the prod-minor-updates group
2025-09-15 14:43:41 -07:00
Fernando Rodrigues
7a919d867b Change default port to 3004 to avoid a conflict with Pangolin's integration API.
Signed-off-by: Fernando Rodrigues <alpha@sigmasquadron.net>
2025-09-14 23:19:21 +10:00
dependabot[bot]
ce50c627a7 Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.41.0 to 0.42.0
- [Commits](https://github.com/golang/crypto/compare/v0.41.0...v0.42.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.42.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-08 21:53:42 +00:00
dependabot[bot]
691d5f0271 Bump actions/setup-go from 5 to 6
Bumps [actions/setup-go](https://github.com/actions/setup-go) from 5 to 6.
- [Release notes](https://github.com/actions/setup-go/releases)
- [Commits](https://github.com/actions/setup-go/compare/v5...v6)

---
updated-dependencies:
- dependency-name: actions/setup-go
  dependency-version: '6'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-09-08 21:52:09 +00:00
Owen Schwartz
56151089e3 Merge pull request #24 from Lokowitz/dockerfile-update
changed docker image from ubuntu to alpine
2025-08-31 09:56:28 -07:00
Lokowitz
af7c1caf98 changed docker image from ubuntu to alpine 2025-08-31 11:19:16 +00:00
Owen Schwartz
dd208ab67c Merge pull request #22 from fosrl/dependabot/go_modules/prod-minor-updates-d1569a22cb
Bump golang.org/x/crypto from 0.36.0 to 0.41.0 in the prod-minor-updates group
2025-08-30 15:14:02 -07:00
Owen Schwartz
8189d41a45 Merge pull request #21 from fosrl/dependabot/github_actions/actions/checkout-5
Bump actions/checkout from 4 to 5
2025-08-30 15:13:54 -07:00
dependabot[bot]
ea3477c8ce Bump golang.org/x/crypto in the prod-minor-updates group
Bumps the prod-minor-updates group with 1 update: [golang.org/x/crypto](https://github.com/golang/crypto).


Updates `golang.org/x/crypto` from 0.36.0 to 0.41.0
- [Commits](https://github.com/golang/crypto/compare/v0.36.0...v0.41.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.41.0
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: prod-minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-27 21:35:30 +00:00
Owen Schwartz
a8a0f92c9b Merge pull request #23 from fosrl/dev
Add proxy protocol
2025-08-27 14:22:08 -07:00
Owen
7040a9436e Add proxy protocol 2025-08-26 22:26:01 -07:00
Owen
04361242fe Update readme 2025-08-23 12:29:26 -07:00
Owen
554b1d55dc Merge branch 'main' into dev 2025-08-23 12:24:21 -07:00
dependabot[bot]
b03f8911a5 Bump actions/checkout from 4 to 5
Bumps [actions/checkout](https://github.com/actions/checkout) from 4 to 5.
- [Release notes](https://github.com/actions/checkout/releases)
- [Changelog](https://github.com/actions/checkout/blob/main/CHANGELOG.md)
- [Commits](https://github.com/actions/checkout/compare/v4...v5)

---
updated-dependencies:
- dependency-name: actions/checkout
  dependency-version: '5'
  dependency-type: direct:production
  update-type: version-update:semver-major
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-23 04:37:59 +00:00
Owen Schwartz
47589570c9 Merge pull request #20 from Lokowitz/sync-go-versions
update versions and sync go version in all files
2025-08-22 21:37:20 -07:00
Owen
9f5b8dea26 Merge branch 'hybrid' into dev 2025-08-22 11:56:58 -07:00
Owen
f6a1e1e27c Merge branch 'main' into dev 2025-08-22 11:56:54 -07:00
Owen
f983a8f141 Local proxy port 443 2025-08-22 11:56:29 -07:00
Owen
efce3cb0b2 Sni has no errors now 2025-08-17 10:43:37 -07:00
Marvin
6eeebd81b2 sync go versions 2025-08-17 11:48:39 +00:00
Owen
c970fd5a18 Update to work with multipe endpoints 2025-08-16 22:59:45 -07:00
Owen
09bd02456d Move to post 2025-08-16 22:53:49 -07:00
Owen
c24537af36 Fix url 2025-08-16 22:36:03 -07:00
Owen
9de3f14799 Update default config 2025-08-16 22:35:51 -07:00
Owen Schwartz
0908f75f5f Merge pull request #19 from fosrl/dependabot/docker/minor-updates-80a311fbba
Bump golang from 1.24.3-alpine to 1.25.0-alpine in the minor-updates group
2025-08-15 09:40:54 -07:00
Owen
10958f8c55 Use propper logger 2025-08-14 22:25:38 -07:00
dependabot[bot]
b1840fd5c3 Bump golang in the minor-updates group
Bumps the minor-updates group with 1 update: golang.


Updates `golang` from 1.24.3-alpine to 1.25.0-alpine

---
updated-dependencies:
- dependency-name: golang
  dependency-version: 1.25.0-alpine
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-08-14 21:55:42 +00:00
Owen
1df5eb19ff Integrate sni proxy 2025-08-13 15:41:58 -07:00
Owen
f71f183886 Add basic proxy 2025-08-12 18:02:34 -07:00
Owen
8922ca9736 Fix some clients stuff for multi pop 2025-08-12 17:26:14 -07:00
Owen
38483f4a26 Allow for updating destinations 2025-07-28 22:41:11 -07:00
Owen
78c768e497 Add mutex 2025-07-28 21:35:57 -07:00
Owen
fc7df8a530 Update readme 2025-07-28 12:43:19 -07:00
Owen
50b42059ac Add new logic to handle changes in newt connection 2025-07-24 20:46:51 -07:00
Owen
825f7fcf60 Add notify 2025-06-21 12:06:58 -04:00
Owen
8c8ec72b40 Merge branch 'dev' into hp-multi-client 2025-06-10 09:39:39 -04:00
Owen
c61b7fc4fb Merge branch 'main' into dev 2025-06-10 09:39:29 -04:00
Owen Schwartz
96e3376147 Merge pull request #12 from Lokowitz/fix-dependabot
fix - dependabot
2025-06-10 09:37:52 -04:00
Owen Schwartz
e47a7c80d1 Merge pull request #11 from Lokowitz/add-test-action
Add test action
2025-06-10 09:37:34 -04:00
Marvin
f1e373f2d8 Update test.yml 2025-06-10 14:01:17 +02:00
Marvin
ef4d0db475 Update dependabot.yml 2025-06-10 13:40:41 +02:00
Marvin
b6b97f5ed3 Create test.yml 2025-06-10 13:36:07 +02:00
Marvin
dff267a42e Update Makefile 2025-06-10 13:34:49 +02:00
Owen Schwartz
bb98db7f5e Merge pull request #10 from Lokowitz/main
Update deps and add dependabot.yml
2025-06-02 09:04:41 -04:00
dependabot[bot]
f1016200b3 Bump golang.org/x/net from 0.21.0 to 0.38.0 in the go_modules group (#5)
Bumps the go_modules group with 1 update: [golang.org/x/net](https://github.com/golang/net).


Updates `golang.org/x/net` from 0.21.0 to 0.38.0
- [Commits](https://github.com/golang/net/compare/v0.21.0...v0.38.0)

---
updated-dependencies:
- dependency-name: golang.org/x/net
  dependency-version: 0.38.0
  dependency-type: indirect
  dependency-group: go_modules
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-06-01 16:46:30 +02:00
dependabot[bot]
f1ab8094cf Bump the go_modules group with 2 updates (#4)
Bumps the go_modules group with 2 updates: [golang.org/x/crypto](https://github.com/golang/crypto) and [golang.org/x/net](https://github.com/golang/net).


Updates `golang.org/x/crypto` from 0.8.0 to 0.35.0
- [Commits](https://github.com/golang/crypto/compare/v0.8.0...v0.35.0)

Updates `golang.org/x/net` from 0.9.0 to 0.21.0
- [Commits](https://github.com/golang/net/compare/v0.9.0...v0.21.0)

---
updated-dependencies:
- dependency-name: golang.org/x/crypto
  dependency-version: 0.35.0
  dependency-type: indirect
  dependency-group: go_modules
- dependency-name: golang.org/x/net
  dependency-version: 0.21.0
  dependency-type: indirect
  dependency-group: go_modules
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-06-01 16:39:52 +02:00
dependabot[bot]
ad2bc0d397 Bump ubuntu from 22.04 to 24.04 in the major-updates group (#2)
Bumps the major-updates group with 1 update: ubuntu.


Updates `ubuntu` from 22.04 to 24.04

---
updated-dependencies:
- dependency-name: ubuntu
  dependency-version: '24.04'
  dependency-type: direct:production
  dependency-group: major-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-06-01 16:15:25 +02:00
dependabot[bot]
a78d141ca3 Bump github.com/vishvananda/netlink in the prod-patch-updates group (#3)
Bumps the prod-patch-updates group with 1 update: [github.com/vishvananda/netlink](https://github.com/vishvananda/netlink).


Updates `github.com/vishvananda/netlink` from 1.3.0 to 1.3.1
- [Release notes](https://github.com/vishvananda/netlink/releases)
- [Commits](https://github.com/vishvananda/netlink/compare/v1.3.0...v1.3.1)

---
updated-dependencies:
- dependency-name: github.com/vishvananda/netlink
  dependency-version: 1.3.1
  dependency-type: direct:production
  update-type: version-update:semver-patch
  dependency-group: prod-patch-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
2025-06-01 16:11:22 +02:00
Marvin
10b1ad2a5a Merge pull request #1 from Lokowitz/dependabot/docker/minor-updates-c9322ea29a
Bump golang from 1.23.1-alpine to 1.24.3-alpine in the minor-updates group
2025-06-01 16:10:54 +02:00
dependabot[bot]
8a9f29043a Bump golang in the minor-updates group
Bumps the minor-updates group with 1 update: golang.


Updates `golang` from 1.23.1-alpine to 1.24.3-alpine

---
updated-dependencies:
- dependency-name: golang
  dependency-version: 1.24.3-alpine
  dependency-type: direct:production
  update-type: version-update:semver-minor
  dependency-group: minor-updates
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-06-01 14:08:27 +00:00
Marvin
05c9d851f4 Create dependabot.yml 2025-06-01 16:07:01 +02:00
Owen
c9a6b85e1d Attempt to add sender and receiver ids to relaying 2025-04-07 21:45:57 -04:00
Owen
a16021cd86 Put http server into routine 2025-03-25 20:49:28 -04:00
Owen
9506b545f4 Handle encrypted messages 2025-03-15 21:46:40 -04:00
Owen
17b87e6707 Merge branch 'dev' into holepunch 2025-03-04 00:02:04 -05:00
Owen
cba4dc646d Try to setup qemu 2025-03-03 23:58:55 -05:00
Owen
88be6d133d Update upload action to v4 2025-03-03 22:38:49 -05:00
Owen Schwartz
34a80c6411 Merge pull request #8 from fosrl/dev
Add CICD
2025-03-03 22:37:29 -05:00
Owen
6565fdbe62 Fix merge issue 2025-03-03 22:36:58 -05:00
Owen
993f5f86c5 Small adjustments 2025-02-23 20:17:16 -05:00
Owen
093a4c21f2 Big speed increase 2025-02-23 18:43:37 -05:00
Owen
f7c0bb9135 Basic relay working! 2025-02-23 16:49:49 -05:00
Owen
a145b77f79 Remove logging 2025-02-22 13:09:04 -05:00
Owen
7b3f7d2b12 Add holepunch udp server 2025-02-21 22:28:16 -05:00
Milo Schwartz
9c5ddcdfb8 Merge branch 'dev' of https://github.com/fosrl/gerbil into dev 2025-01-29 22:26:02 -05:00
Milo Schwartz
32176c74a0 add cicd 2025-01-29 22:25:33 -05:00
Owen Schwartz
aa4f4ebfab Merge pull request #5 from fosrl/dev
MSS Clamping & Retry Remote Config
2025-01-19 17:27:52 -05:00
Owen Schwartz
bab8630756 Add retry to config request 2025-01-19 17:26:15 -05:00
Owen Schwartz
24e993ee41 Add mss clamping 2025-01-19 17:19:09 -05:00
Owen Schwartz
5d4faaff65 Standarize release build makefile 2025-01-16 07:41:27 -05:00
39 changed files with 6732 additions and 126 deletions

View File

@@ -6,4 +6,5 @@ README.md
Makefile
public/
LICENSE
CONTRIBUTING.md
CONTRIBUTING.md
.git

1
.github/CODEOWNERS vendored Normal file
View File

@@ -0,0 +1 @@
* @oschwartz10612 @miloschwartz

View File

@@ -0,0 +1,47 @@
body:
- type: textarea
attributes:
label: Summary
description: A clear and concise summary of the requested feature.
validations:
required: true
- type: textarea
attributes:
label: Motivation
description: |
Why is this feature important?
Explain the problem this feature would solve or what use case it would enable.
validations:
required: true
- type: textarea
attributes:
label: Proposed Solution
description: |
How would you like to see this feature implemented?
Provide as much detail as possible about the desired behavior, configuration, or changes.
validations:
required: true
- type: textarea
attributes:
label: Alternatives Considered
description: Describe any alternative solutions or workarounds you've thought about.
validations:
required: false
- type: textarea
attributes:
label: Additional Context
description: Add any other context, mockups, or screenshots about the feature request here.
validations:
required: false
- type: markdown
attributes:
value: |
Before submitting, please:
- Check if there is an existing issue for this feature.
- Clearly explain the benefit and use case.
- Be as specific as possible to help contributors evaluate and implement.

51
.github/ISSUE_TEMPLATE/1.bug_report.yml vendored Normal file
View File

@@ -0,0 +1,51 @@
name: Bug Report
description: Create a bug report
labels: []
body:
- type: textarea
attributes:
label: Describe the Bug
description: A clear and concise description of what the bug is.
validations:
required: true
- type: textarea
attributes:
label: Environment
description: Please fill out the relevant details below for your environment.
value: |
- OS Type & Version: (e.g., Ubuntu 22.04)
- Pangolin Version:
- Gerbil Version:
- Traefik Version:
- Newt Version:
- Olm Version: (if applicable)
validations:
required: true
- type: textarea
attributes:
label: To Reproduce
description: |
Steps to reproduce the behavior, please provide a clear description of how to reproduce the issue, based on the linked minimal reproduction. Screenshots can be provided in the issue body below.
If using code blocks, make sure syntax highlighting is correct and double-check that the rendered preview is not broken.
validations:
required: true
- type: textarea
attributes:
label: Expected Behavior
description: A clear and concise description of what you expected to happen.
validations:
required: true
- type: markdown
attributes:
value: |
Before posting the issue go through the steps you've written down to make sure the steps provided are detailed and clear.
- type: markdown
attributes:
value: |
Contributors should be able to follow the steps provided in order to reproduce the bug.

8
.github/ISSUE_TEMPLATE/config.yml vendored Normal file
View File

@@ -0,0 +1,8 @@
blank_issues_enabled: false
contact_links:
- name: Need help or have questions?
url: https://github.com/orgs/fosrl/discussions
about: Ask questions, get help, and discuss with other community members
- name: Request a Feature
url: https://github.com/orgs/fosrl/discussions/new?category=feature-requests
about: Feature requests should be opened as discussions so others can upvote and comment

40
.github/dependabot.yml vendored Normal file
View File

@@ -0,0 +1,40 @@
version: 2
updates:
- package-ecosystem: "gomod"
directory: "/"
schedule:
interval: "daily"
groups:
dev-patch-updates:
dependency-type: "development"
update-types:
- "patch"
dev-minor-updates:
dependency-type: "development"
update-types:
- "minor"
prod-patch-updates:
dependency-type: "production"
update-types:
- "patch"
prod-minor-updates:
dependency-type: "production"
update-types:
- "minor"
- package-ecosystem: "docker"
directory: "/"
schedule:
interval: "daily"
groups:
patch-updates:
update-types:
- "patch"
minor-updates:
update-types:
- "minor"
- package-ecosystem: "github-actions"
directory: "/"
schedule:
interval: "weekly"

161
.github/workflows/cicd.yml vendored Normal file
View File

@@ -0,0 +1,161 @@
name: CI/CD Pipeline
# CI/CD workflow for building, publishing, mirroring, signing container images and building release binaries.
# Actions are pinned to specific SHAs to reduce supply-chain risk. This workflow triggers on tag push events.
permissions:
contents: read
packages: write # for GHCR push
id-token: write # for Cosign Keyless (OIDC) Signing
# Required secrets:
# - DOCKER_HUB_USERNAME / DOCKER_HUB_ACCESS_TOKEN: push to Docker Hub
# - GITHUB_TOKEN: used for GHCR login and OIDC keyless signing
# - COSIGN_PRIVATE_KEY / COSIGN_PASSWORD / COSIGN_PUBLIC_KEY: for key-based signing
on:
push:
tags:
- "[0-9]+.[0-9]+.[0-9]+"
- "[0-9]+.[0-9]+.[0-9]+.rc.[0-9]+"
concurrency:
group: ${{ github.ref }}
cancel-in-progress: true
jobs:
release:
name: Build and Release
runs-on: amd64-runner
# Job-level timeout to avoid runaway or stuck runs
timeout-minutes: 120
env:
# Target images
DOCKERHUB_IMAGE: docker.io/fosrl/${{ github.event.repository.name }}
GHCR_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
steps:
- name: Checkout code
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up QEMU
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
- name: Log in to Docker Hub
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: docker.io
username: ${{ secrets.DOCKER_HUB_USERNAME }}
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
- name: Extract tag name
id: get-tag
run: echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
shell: bash
- name: Install Go
uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
with:
go-version: 1.25
- name: Update version in main.go
run: |
TAG=${{ env.TAG }}
if [ -f main.go ]; then
sed -i 's/version_replaceme/'"$TAG"'/' main.go
echo "Updated main.go with version $TAG"
else
echo "main.go not found"
fi
shell: bash
- name: Build and push Docker images (Docker Hub)
run: |
TAG=${{ env.TAG }}
make docker-build-release tag=$TAG
echo "Built & pushed to: ${{ env.DOCKERHUB_IMAGE }}:${TAG}"
shell: bash
- name: Login in to GHCR
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
with:
registry: ghcr.io
username: ${{ github.actor }}
password: ${{ secrets.GITHUB_TOKEN }}
- name: Install skopeo + jq
# skopeo: copy/inspect images between registries
# jq: JSON parsing tool used to extract digest values
run: |
sudo apt-get update -y
sudo apt-get install -y skopeo jq
skopeo --version
shell: bash
- name: Copy tag from Docker Hub to GHCR
# Mirror the already-built image (all architectures) to GHCR so we can sign it
run: |
set -euo pipefail
TAG=${{ env.TAG }}
echo "Copying ${{ env.DOCKERHUB_IMAGE }}:${TAG} -> ${{ env.GHCR_IMAGE }}:${TAG}"
skopeo copy --all --retry-times 3 \
docker://$DOCKERHUB_IMAGE:$TAG \
docker://$GHCR_IMAGE:$TAG
shell: bash
- name: Install cosign
# cosign is used to sign and verify container images (key and keyless)
uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0
- name: Dual-sign and verify (GHCR & Docker Hub)
# Sign each image by digest using keyless (OIDC) and key-based signing,
# then verify both the public key signature and the keyless OIDC signature.
env:
TAG: ${{ env.TAG }}
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
COSIGN_YES: "true"
run: |
set -euo pipefail
issuer="https://token.actions.githubusercontent.com"
id_regex="^https://github.com/${{ github.repository }}/.+" # accept this repo (all workflows/refs)
for IMAGE in "${GHCR_IMAGE}" "${DOCKERHUB_IMAGE}"; do
echo "Processing ${IMAGE}:${TAG}"
DIGEST="$(skopeo inspect --retry-times 3 docker://${IMAGE}:${TAG} | jq -r '.Digest')"
REF="${IMAGE}@${DIGEST}"
echo "Resolved digest: ${REF}"
echo "==> cosign sign (keyless) --recursive ${REF}"
cosign sign --recursive "${REF}"
echo "==> cosign sign (key) --recursive ${REF}"
cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${REF}"
echo "==> cosign verify (public key) ${REF}"
cosign verify --key env://COSIGN_PUBLIC_KEY "${REF}" -o text
echo "==> cosign verify (keyless policy) ${REF}"
cosign verify \
--certificate-oidc-issuer "${issuer}" \
--certificate-identity-regexp "${id_regex}" \
"${REF}" -o text
done
shell: bash
- name: Build binaries
run: |
make go-build-release
shell: bash
- name: Upload artifacts from /bin
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
with:
name: binaries
path: bin/

132
.github/workflows/mirror.yaml vendored Normal file
View File

@@ -0,0 +1,132 @@
name: Mirror & Sign (Docker Hub to GHCR)
on:
workflow_dispatch: {}
permissions:
contents: read
packages: write
id-token: write # for keyless OIDC
env:
SOURCE_IMAGE: docker.io/fosrl/gerbil
DEST_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
jobs:
mirror-and-dual-sign:
runs-on: amd64-runner
steps:
- name: Install skopeo + jq
run: |
sudo apt-get update -y
sudo apt-get install -y skopeo jq
skopeo --version
- name: Install cosign
uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0
- name: Input check
run: |
test -n "${SOURCE_IMAGE}" || (echo "SOURCE_IMAGE is empty" && exit 1)
echo "Source : ${SOURCE_IMAGE}"
echo "Target : ${DEST_IMAGE}"
# Auth for skopeo (containers-auth)
- name: Skopeo login to GHCR
run: |
skopeo login ghcr.io -u "${{ github.actor }}" -p "${{ secrets.GITHUB_TOKEN }}"
# Auth for cosign (docker-config)
- name: Docker login to GHCR (for cosign)
run: |
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
- name: List source tags
run: |
set -euo pipefail
skopeo list-tags --retry-times 3 docker://"${SOURCE_IMAGE}" \
| jq -r '.Tags[]' | sort -u > src-tags.txt
echo "Found source tags: $(wc -l < src-tags.txt)"
head -n 20 src-tags.txt || true
- name: List destination tags (skip existing)
run: |
set -euo pipefail
if skopeo list-tags --retry-times 3 docker://"${DEST_IMAGE}" >/tmp/dst.json 2>/dev/null; then
jq -r '.Tags[]' /tmp/dst.json | sort -u > dst-tags.txt
else
: > dst-tags.txt
fi
echo "Existing destination tags: $(wc -l < dst-tags.txt)"
- name: Mirror, dual-sign, and verify
env:
# keyless
COSIGN_YES: "true"
# key-based
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
# verify
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
run: |
set -euo pipefail
copied=0; skipped=0; v_ok=0; errs=0
issuer="https://token.actions.githubusercontent.com"
id_regex="^https://github.com/${{ github.repository }}/.+"
while read -r tag; do
[ -z "$tag" ] && continue
if grep -Fxq "$tag" dst-tags.txt; then
echo "::notice ::Skip (exists) ${DEST_IMAGE}:${tag}"
skipped=$((skipped+1))
continue
fi
echo "==> Copy ${SOURCE_IMAGE}:${tag} → ${DEST_IMAGE}:${tag}"
if ! skopeo copy --all --retry-times 3 \
docker://"${SOURCE_IMAGE}:${tag}" docker://"${DEST_IMAGE}:${tag}"; then
echo "::warning title=Copy failed::${SOURCE_IMAGE}:${tag}"
errs=$((errs+1)); continue
fi
copied=$((copied+1))
digest="$(skopeo inspect --retry-times 3 docker://"${DEST_IMAGE}:${tag}" | jq -r '.Digest')"
ref="${DEST_IMAGE}@${digest}"
echo "==> cosign sign (keyless) --recursive ${ref}"
if ! cosign sign --recursive "${ref}"; then
echo "::warning title=Keyless sign failed::${ref}"
errs=$((errs+1))
fi
echo "==> cosign sign (key) --recursive ${ref}"
if ! cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${ref}"; then
echo "::warning title=Key sign failed::${ref}"
errs=$((errs+1))
fi
echo "==> cosign verify (public key) ${ref}"
if ! cosign verify --key env://COSIGN_PUBLIC_KEY "${ref}" -o text; then
echo "::warning title=Verify(pubkey) failed::${ref}"
errs=$((errs+1))
fi
echo "==> cosign verify (keyless policy) ${ref}"
if ! cosign verify \
--certificate-oidc-issuer "${issuer}" \
--certificate-identity-regexp "${id_regex}" \
"${ref}" -o text; then
echo "::warning title=Verify(keyless) failed::${ref}"
errs=$((errs+1))
else
v_ok=$((v_ok+1))
fi
done < src-tags.txt
echo "---- Summary ----"
echo "Copied : $copied"
echo "Skipped : $skipped"
echo "Verified OK : $v_ok"
echo "Errors : $errs"

31
.github/workflows/test.yml vendored Normal file
View File

@@ -0,0 +1,31 @@
name: Run Tests
permissions:
contents: read
on:
pull_request:
branches:
- main
- dev
jobs:
test:
runs-on: amd64-runner
steps:
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
- name: Set up Go
uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
with:
go-version: 1.26
- name: Build go
run: go build
- name: Build Docker image
run: make build
- name: Build binaries
run: make go-build-release

4
.gitignore vendored
View File

@@ -1 +1,3 @@
gerbil
gerbil
.DS_Store
bin/

1
.go-version Normal file
View File

@@ -0,0 +1 @@
1.25

View File

@@ -4,11 +4,7 @@ Contributions are welcome!
Please see the contribution and local development guide on the docs page before getting started:
https://docs.fossorial.io/development
For ideas about what features to work on and our future plans, please see the roadmap:
https://docs.fossorial.io/roadmap
https://docs.pangolin.net/development/contributing
### Licensing Considerations

View File

@@ -1,4 +1,4 @@
FROM golang:1.23.1-alpine AS builder
FROM golang:1.26-alpine AS builder
# Set the working directory inside the container
WORKDIR /app
@@ -16,16 +16,13 @@ COPY . .
RUN CGO_ENABLED=0 GOOS=linux go build -o /gerbil
# Start a new stage from scratch
FROM ubuntu:22.04 AS runner
FROM alpine:3.23 AS runner
RUN apk add --no-cache iptables iproute2
# Copy the pre-built binary file from the previous stage and the entrypoint script
COPY --from=builder /gerbil /usr/local/bin/
COPY entrypoint.sh /
RUN chmod +x /entrypoint.sh
# Copy the entrypoint script
ENTRYPOINT ["/entrypoint.sh"]
# Command to run the executable
CMD ["gerbil"]

View File

@@ -1,6 +1,14 @@
all: build push
docker-build-release:
@if [ -z "$(tag)" ]; then \
echo "Error: tag is required. Usage: make docker-build-release tag=<tag>"; \
exit 1; \
fi
docker buildx build --platform linux/arm64,linux/amd64 -t fosrl/gerbil:latest -f Dockerfile --push .
docker buildx build --platform linux/arm64,linux/amd64 -t fosrl/gerbil:$(tag) -f Dockerfile --push .
build:
docker build -t fosrl/gerbil:latest .
@@ -13,5 +21,9 @@ test:
local:
CGO_ENABLED=0 GOOS=linux go build -o gerbil
go-build-release:
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o bin/gerbil_linux_arm64
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o bin/gerbil_linux_amd64
clean:
rm gerbil
rm gerbil

View File

@@ -4,16 +4,9 @@ Gerbil is a simple [WireGuard](https://www.wireguard.com/) interface management
### Installation and Documentation
Gerbil can be used standalone with your own API, a static JSON file, or with Pangolin and Newt as part of the larger system. See documentation below:
Gerbil works with Pangolin, Newt, and Olm as part of the larger system. See documentation below:
- [Installation Instructions](https://docs.fossorial.io)
- [Full Documentation](https://docs.fossorial.io)
## Preview
<img src="public/screenshots/preview.png" alt="Preview"/>
_Sample output of a Gerbil container connected to Pangolin and terminating various peers._
- [Full Documentation](https://docs.pangolin.net)
## Key Functions
@@ -27,30 +20,89 @@ Gerbil will create the peers defined in the config on the WireGuard interface. T
### Report Bandwidth
Bytes transmitted in and out of each peer are collected every 10 seconds, and incremental usage is reported via the "reportBandwidthTo" endpoint. This can be used to track data usage of each peer on the remote server.
Bytes transmitted in and out of each peer are collected every 10 seconds, and incremental usage is reported via the api endpoint. This can be used to track data usage of each peer on the remote server.
### Handle client relaying
Gerbil listens on port 21820 for incoming UDP hole punch packets to orchestrate NAT hole punching between olm and newt clients. Additionally, it handles relaying data through the gerbil server down to the newt. This is accomplished by scanning each packet for headers and handling them appropriately.
### SNI Proxy
Gerbil includes an SNI (Server Name Indication) proxy that enables intelligent routing of HTTPS traffic between Pangolin nodes. When a TLS connection comes in, the proxy extracts the hostname from the SNI extension and queries Pangolin to determine the correct routing destination. This allows seamless routing of web traffic through the WireGuard mesh network:
- If the hostname is configured for local handling (via local overrides or local SNIs), traffic is routed to the local proxy
- Otherwise, the proxy queries Pangolin's routing API to determine which node should handle the traffic
- Supports caching of routing decisions to improve performance
- Handles connection pooling and graceful shutdown
- Optional PROXY protocol v1 support to preserve original client IP addresses when forwarding to downstream proxies (HAProxy, Nginx, etc.)
The PROXY protocol allows downstream proxies to know the real client IP address instead of seeing the SNI proxy's IP. When enabled with `--proxy-protocol`, the SNI proxy will prepend a PROXY protocol header to each connection containing the original client's IP and port information.
In single node (self hosted) Pangolin deployments this can be bypassed by using port 443:443 to route to Traefik instead of the SNI proxy at 8443.
### Observability with OpenTelemetry
Gerbil includes comprehensive OpenTelemetry metrics instrumentation for monitoring and observability. Metrics can be exported via:
- **Prometheus**: Pull-based metrics at the `/metrics` endpoint (enabled by default)
- **OTLP**: Push-based metrics to any OpenTelemetry-compatible collector
Key metrics include:
- WireGuard interface and peer status
- Bandwidth usage per peer
- Active relay sessions and proxy connections
- Handshake success/failure rates
- Route lookup cache hit/miss ratios
- Go runtime metrics (GC, goroutines, memory)
See [docs/observability.md](docs/observability.md) for complete documentation, metrics reference, and examples.
## CLI Args
Important:
- `reachableAt`: How should the remote server reach Gerbil's API?
- `generateAndSaveKeyTo`: Where to save the generated WireGuard private key to persist across restarts.
- `remoteConfig` (optional): Remote config location to HTTP get the JSON based config from. See `example_config.json`
- `config` (optional): Local JSON file path to load config. Used if remote config is not supplied. See `example_config.json`
- `remoteConfig`: Remote config location to HTTP get the JSON based config from.
Note: You must use either `config` or `remoteConfig` to configure WireGuard.
- `reportBandwidthTo` (optional): Remote HTTP endpoint to send peer bandwidth data
Others:
- `reportBandwidthTo` (optional): **DEPRECATED** - Use `remoteConfig` instead. Remote HTTP endpoint to send peer bandwidth data
- `interface` (optional): Name of the WireGuard interface created by Gerbil. Default: `wg0`
- `listen` (optional): Port to listen on for HTTP server. Default: `3003`
- `log-level` (optional): The log level to use. Default: INFO
- `listen` (optional): Port to listen on for HTTP server. Default: `:3004`
- `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: `INFO`
- `mtu` (optional): MTU of the WireGuard interface. Default: `1280`
- `notify` (optional): URL to notify on peer changes
- `sni-port` (optional): Port for the SNI proxy to listen on. Default: `8443`
- `local-proxy` (optional): Address for local proxy when routing local traffic. Default: `localhost`
- `local-proxy-port` (optional): Port for local proxy when routing local traffic. Default: `443`
- `local-overrides` (optional): Comma-separated list of domain names that should always be routed to the local proxy
- `proxy-protocol` (optional): Enable PROXY protocol v1 for preserving client IP addresses when forwarding to downstream proxies. Default: `false`
## Environment Variables
All CLI arguments can also be provided via environment variables:
- `INTERFACE`: Name of the WireGuard interface
- `REMOTE_CONFIG`: URL of the remote config server
- `LISTEN`: Address to listen on for HTTP server
- `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key
- `REACHABLE_AT`: Endpoint of the HTTP server to tell remote config about
- `LOG_LEVEL`: Log level (DEBUG, INFO, WARN, ERROR, FATAL)
- `MTU`: MTU of the WireGuard interface
- `NOTIFY_URL`: URL to notify on peer changes
- `SNI_PORT`: Port for the SNI proxy to listen on
- `LOCAL_PROXY`: Address for local proxy when routing local traffic
- `LOCAL_PROXY_PORT`: Port for local proxy when routing local traffic
- `LOCAL_OVERRIDES`: Comma-separated list of domain names that should always be routed to the local proxy
- `PROXY_PROTOCOL`: Enable PROXY protocol v1 for preserving client IP addresses (true/false)
Example:
```bash
./gerbil \
--reachableAt=http://gerbil:3003 \
--reachableAt=http://gerbil:3004 \
--generateAndSaveKeyTo=/var/config/key \
--remoteConfig=http://pangolin:3001/api/v1/gerbil/get-config \
--reportBandwidthTo=http://pangolin:3001/api/v1/gerbil/receive-bandwidth
--remoteConfig=http://pangolin:3001/api/v1/
```
```yaml
@@ -60,10 +112,9 @@ services:
container_name: gerbil
restart: unless-stopped
command:
- --reachableAt=http://gerbil:3003
- --reachableAt=http://gerbil:3004
- --generateAndSaveKeyTo=/var/config/key
- --remoteConfig=http://pangolin:3001/api/v1/gerbil/get-config
- --reportBandwidthTo=http://pangolin:3001/api/v1/gerbil/receive-bandwidth
- --remoteConfig=http://pangolin:3001/api/v1/
volumes:
- ./config/:/var/config
cap_add:
@@ -71,6 +122,8 @@ services:
- SYS_MODULE
ports:
- 51820:51820/udp
- 21820:21820/udp
- 443:8443/tcp # SNI proxy port
```
## Build

View File

@@ -3,7 +3,7 @@
If you discover a security vulnerability, please follow the steps below to responsibly disclose it to us:
1. **Do not create a public GitHub issue or discussion post.** This could put the security of other users at risk.
2. Send a detailed report to [security@fossorial.io](mailto:security@fossorial.io) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
2. Send a detailed report to [security@pangolin.net](mailto:security@pangolin.net) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
- Description and location of the vulnerability.
- Potential impact of the vulnerability.

View File

@@ -1,23 +0,0 @@
{
"privateKey": "kBGTgk7c+zncEEoSnMl+jsLjVh5ZVoL/HwBSQem+d1M=",
"listenPort": 51820,
"ipAddress": "10.0.0.1/24",
"peers": [
{
"publicKey": "5UzzoeveFVSzuqK3nTMS5bA1jIMs1fQffVQzJ8MXUQM=",
"allowedIps": ["10.0.0.0/28"]
},
{
"publicKey": "kYrZpuO2NsrFoBh1GMNgkhd1i9Rgtu1rAjbJ7qsfngU=",
"allowedIps": ["10.0.0.16/28"]
},
{
"publicKey": "1YfPUVr9ZF4zehkbI2BQhCxaRLz+Vtwa4vJwH+mpK0A=",
"allowedIps": ["10.0.0.32/28"]
},
{
"publicKey": "2/U4oyZ+sai336Dal/yExCphL8AxyqvIxMk4qsUy4iI=",
"allowedIps": ["10.0.0.48/28"]
}
]
}

269
docs/observability.md Normal file
View File

@@ -0,0 +1,269 @@
<!-- markdownlint-disable MD036 MD060 -->
# Gerbil Observability Architecture
This document describes the metrics subsystem for Gerbil, explains the design
decisions, and shows how to configure each backend.
---
## Architecture Overview
Gerbil's metrics subsystem uses a **pluggable backend** design:
```text
main.go ─── internal/metrics ─── internal/observability ─── backend
(facade) (interface) Prometheus
OR OTel/OTLP
OR Noop (disabled)
```
Application code (main, relay, proxy) calls only the `metrics.Record*`
functions in `internal/metrics`. That package delegates to whichever backend
was selected at startup via `internal/observability.Backend`.
### Why Prometheus-native and OTel are mutually exclusive
**Exactly one** metrics backend may be active at runtime:
| Mode | What happens |
|------|-------------|
| `prometheus` | Native Prometheus client registers metrics on a dedicated registry and exposes `/metrics`. No OTel SDK is initialised. |
| `otel` | OTel SDK pushes metrics via OTLP/gRPC or OTLP/HTTP to an external collector. No `/metrics` endpoint is exposed. |
| `none` | A safe noop backend is used. All `Record*` calls are discarded. |
Running both simultaneously would mean every metric is recorded twice through
two different code paths, with differing semantics (pull vs. push, different
naming rules, different cardinality handling). The design enforces a single
source of truth.
### Future OTel tracing and logging
The `internal/observability/otel/` package is designed so that tracing and
logging support can be added **beside** the existing metrics code without
touching the Prometheus-native path:
```bash
internal/observability/otel/
backend.go ← metrics
exporter.go ← OTLP exporter creation
resource.go ← OTel resource
trace.go ← future: TracerProvider setup
log.go ← future: LoggerProvider setup
```
---
## Configuration
### Config precedence
1. CLI flags (highest priority)
2. Environment variables
3. Defaults
### Config struct
```go
type MetricsConfig struct {
Enabled bool
Backend string // "prometheus" | "otel" | "none"
Prometheus PrometheusConfig
OTel OTelConfig
ServiceName string
ServiceVersion string
DeploymentEnvironment string
}
type PrometheusConfig struct {
Path string // default: "/metrics"
}
type OTelConfig struct {
Protocol string // "grpc" (default) or "http"
Endpoint string // default: "localhost:4317"
Insecure bool // default: true
ExportInterval time.Duration // default: 60s
}
```
### Environment variables
| Variable | Default | Description |
|----------|---------|-------------|
| `METRICS_ENABLED` | `true` | Enable/disable metrics |
| `METRICS_BACKEND` | `prometheus` | Backend: `prometheus`, `otel`, or `none` |
| `METRICS_PATH` | `/metrics` | HTTP path for Prometheus endpoint |
| `OTEL_METRICS_PROTOCOL` | `grpc` | OTLP transport: `grpc` or `http` |
| `OTEL_METRICS_ENDPOINT` | `localhost:4317` | OTLP collector address |
| `OTEL_METRICS_INSECURE` | `true` | Disable TLS for OTLP |
| `OTEL_METRICS_EXPORT_INTERVAL` | `60s` | Push interval (e.g. `10s`, `1m`) |
| `DEPLOYMENT_ENVIRONMENT` | _(unset)_ | OTel deployment.environment attribute |
### CLI flags
```bash
--metrics-enabled bool (default: true)
--metrics-backend string (default: prometheus)
--metrics-path string (default: /metrics)
--otel-metrics-protocol string (default: grpc)
--otel-metrics-endpoint string (default: localhost:4317)
--otel-metrics-insecure bool (default: true)
--otel-metrics-export-interval duration (default: 1m0s)
```
---
## When to choose each backend
| Criterion | Prometheus | OTel/OTLP |
|-----------|-----------|-----------|
| Existing Prometheus/Grafana stack | ✅ | |
| Pull-based scraping | ✅ | |
| No external collector required | ✅ | |
| Vendor-neutral telemetry | | ✅ |
| Push-based export | | ✅ |
| Grafana Cloud / managed OTLP | | ✅ |
| Future traces + logs via same pipeline | | ✅ |
---
## Enabling Prometheus-native mode
### Environment variables
```bash
METRICS_ENABLED=true
METRICS_BACKEND=prometheus
METRICS_PATH=/metrics
```
### CLI
```bash
./gerbil --metrics-enabled --metrics-backend=prometheus --metrics-path=/metrics \
--config=/etc/gerbil/config.json
```
The metrics config is supplied separately via env/flags; it is not embedded
in the WireGuard config file.
The Prometheus `/metrics` endpoint is registered only when
`--metrics-backend=prometheus`. All gerbil_* metrics plus Go runtime metrics
are available.
---
## Enabling OTel mode
### Environment variables
```bash
export METRICS_ENABLED=true
export METRICS_BACKEND=otel
export OTEL_METRICS_PROTOCOL=grpc
export OTEL_METRICS_ENDPOINT=otel-collector:4317
export OTEL_METRICS_INSECURE=true
export OTEL_METRICS_EXPORT_INTERVAL=10s
export DEPLOYMENT_ENVIRONMENT=production
```
### CLI
```bash
./gerbil --metrics-enabled \
--metrics-backend=otel \
--otel-metrics-protocol=grpc \
--otel-metrics-endpoint=otel-collector:4317 \
--otel-metrics-insecure \
--otel-metrics-export-interval=10s \
--config=/etc/gerbil/config.json
```
### HTTP mode (OTLP/HTTP)
```bash
export OTEL_METRICS_PROTOCOL=http
export OTEL_METRICS_ENDPOINT=otel-collector:4318
```
---
## Disabling metrics
```bash
export METRICS_ENABLED=false
# or
./gerbil --metrics-enabled=false
# or
./gerbil --metrics-backend=none
```
When disabled, all `Record*` calls are directed to a safe noop backend that
discards observations without allocating or locking.
---
## Metric catalog
All metrics use the prefix `gerbil_<component>_<name>`.
### WireGuard metrics
| Metric | Type | Labels | Description |
|--------|------|--------|-------------|
| `gerbil_wg_interface_up` | Gauge | `ifname`, `instance` | 1=up, 0=down |
| `gerbil_wg_peers_total` | UpDownCounter | `ifname` | Configured peers |
| `gerbil_wg_peer_connected` | Gauge | `ifname`, `peer` | 1=connected, 0=disconnected |
| `gerbil_wg_bytes_received_total` | Counter | `ifname`, `peer` | Bytes received |
| `gerbil_wg_bytes_transmitted_total` | Counter | `ifname`, `peer` | Bytes transmitted |
| `gerbil_wg_handshakes_total` | Counter | `ifname`, `peer`, `result` | Handshake attempts |
| `gerbil_wg_handshake_latency_seconds` | Histogram | `ifname`, `peer` | Handshake duration |
| `gerbil_wg_peer_rtt_seconds` | Histogram | `ifname`, `peer` | Peer round-trip time |
### Relay metrics
| Metric | Type | Labels |
|--------|------|--------|
| `gerbil_proxy_mapping_active` | UpDownCounter | `ifname` |
| `gerbil_session_active` | UpDownCounter | `ifname` |
| `gerbil_active_sessions` | UpDownCounter | `ifname` |
| `gerbil_udp_packets_total` | Counter | `ifname`, `type`, `direction` |
| `gerbil_hole_punch_events_total` | Counter | `ifname`, `result` |
### SNI proxy metrics
| Metric | Type | Labels |
|--------|------|--------|
| `gerbil_sni_connections_total` | Counter | `result` |
| `gerbil_sni_active_connections` | UpDownCounter | _(none)_ |
| `gerbil_sni_route_cache_hits_total` | Counter | `result` |
| `gerbil_sni_route_api_requests_total` | Counter | `result` |
| `gerbil_proxy_route_lookups_total` | Counter | `result`, `hostname` |
### HTTP metrics
| Metric | Type | Labels |
|--------|------|--------|
| `gerbil_http_requests_total` | Counter | `endpoint`, `method`, `status_code` |
| `gerbil_http_request_duration_seconds` | Histogram | `endpoint`, `method` |
---
## Using Docker Compose
The `docker-compose.metrics.yml` provides a complete observability stack.
**Prometheus mode:**
```bash
METRICS_BACKEND=prometheus docker-compose -f docker-compose.metrics.yml up -d
# Scrape at http://localhost:3003/metrics
# Grafana at http://localhost:3000 (admin/admin)
```
**OTel mode:**
```bash
METRICS_BACKEND=otel OTEL_METRICS_ENDPOINT=otel-collector:4317 \
docker-compose -f docker-compose.metrics.yml up -d
```

View File

@@ -0,0 +1,46 @@
receivers:
otlp:
protocols:
grpc:
endpoint: 0.0.0.0:4317
http:
endpoint: 0.0.0.0:4318
processors:
batch:
timeout: 10s
send_batch_size: 1024
# Add resource attributes
resource:
attributes:
- key: service.environment
value: "development"
action: insert
exporters:
# Prometheus exporter for scraping
prometheus:
endpoint: "0.0.0.0:8889"
namespace: "gerbil"
send_timestamps: true
metric_expiration: 5m
resource_to_telemetry_conversion:
enabled: true
# Prometheus remote write (optional)
prometheusremotewrite:
endpoint: "http://prometheus:9090/api/v1/write"
tls:
insecure: true
# Debug exporter for debugging
debug:
verbosity: normal
service:
pipelines:
metrics:
receivers: [otlp]
processors: [batch, resource]
exporters: [prometheus, prometheusremotewrite, debug]

24
examples/prometheus.yml Normal file
View File

@@ -0,0 +1,24 @@
global:
scrape_interval: 15s
evaluation_interval: 15s
external_labels:
cluster: 'gerbil-dev'
scrape_configs:
# Scrape Gerbil's /metrics endpoint directly
- job_name: 'gerbil'
static_configs:
- targets: ['gerbil:3003']
labels:
service: 'gerbil'
environment: 'development'
# Scrape OpenTelemetry Collector metrics
- job_name: 'otel-collector'
static_configs:
- targets: ['otel-collector:8888']
labels:
service: 'otel-collector'
- targets: ['otel-collector:8889']
labels:
service: 'otel-collector-prometheus-exporter'

45
go.mod
View File

@@ -1,23 +1,50 @@
module github.com/fosrl/gerbil
go 1.23.1
go 1.26.0
toolchain go1.23.2
require (
github.com/vishvananda/netlink v1.3.0
github.com/patrickmn/go-cache v2.1.0+incompatible
github.com/prometheus/client_golang v1.20.5
github.com/vishvananda/netlink v1.3.1
go.opentelemetry.io/otel v1.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0
go.opentelemetry.io/otel/metric v1.42.0
go.opentelemetry.io/otel/sdk v1.42.0
go.opentelemetry.io/otel/sdk/metric v1.42.0
golang.org/x/crypto v0.49.0
golang.org/x/sync v0.20.0
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6
)
require (
github.com/google/go-cmp v0.5.9 // indirect
github.com/beorn7/perks v1.0.1 // indirect
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
github.com/cespare/xxhash/v2 v2.3.0 // indirect
github.com/go-logr/logr v1.4.3 // indirect
github.com/go-logr/stdr v1.2.2 // indirect
github.com/google/go-cmp v0.7.0 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
github.com/josharian/native v1.1.0 // indirect
github.com/klauspost/compress v1.17.9 // indirect
github.com/mdlayher/genetlink v1.3.2 // indirect
github.com/mdlayher/netlink v1.7.2 // indirect
github.com/mdlayher/socket v0.4.1 // indirect
github.com/vishvananda/netns v0.0.4 // indirect
golang.org/x/crypto v0.8.0 // indirect
golang.org/x/net v0.9.0 // indirect
golang.org/x/sync v0.1.0 // indirect
golang.org/x/sys v0.10.0 // indirect
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
github.com/prometheus/client_model v0.6.1 // indirect
github.com/prometheus/common v0.61.0 // indirect
github.com/prometheus/procfs v0.15.1 // indirect
github.com/vishvananda/netns v0.0.5 // indirect
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
go.opentelemetry.io/otel/trace v1.42.0 // indirect
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
golang.org/x/net v0.51.0 // indirect
golang.org/x/sys v0.42.0 // indirect
golang.org/x/text v0.35.0 // indirect
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b // indirect
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect
google.golang.org/grpc v1.79.3 // indirect
google.golang.org/protobuf v1.36.11 // indirect
)

102
go.sum
View File

@@ -1,29 +1,101 @@
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ=
golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws=
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ=
github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s=
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0 h1:MdKucPl/HbzckWWEisiNqMPhRrAOQX8r4jTuGr636gk=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0/go.mod h1:RolT8tWtfHcjajEH5wFIZ4Dgh5jpPdFXYV9pTAk/qjc=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0 h1:H7O6RlGOMTizyl3R08Kn5pdM06bnH8oscSj7o11tmLA=
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0/go.mod h1:mBFWu/WOVDkWWsR7Tx7h6EpQB8wsv7P0Yrh0Pb7othc=
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx3wnr6L04cJFbWoceSK9JWBdglINo=
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6 h1:CawjfCvYQH2OU3/TnxLx97WDSUDRABfT18pCOYwc2GE=
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6/go.mod h1:3rxYc4HtVcSG9gVaTs2GEBdehh+sYPOwKtyUWEOTb80=
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

506
internal/metrics/metrics.go Normal file
View File

@@ -0,0 +1,506 @@
// Package metrics provides the application-level metrics facade for Gerbil.
//
// Application code (main, relay, proxy) uses only the Record* functions in this
// package. The actual recording is delegated to the backend selected in
// internal/observability. Neither Prometheus nor OTel packages are imported here.
package metrics
import (
"context"
"net/http"
"github.com/fosrl/gerbil/internal/observability"
)
// Config is the metrics configuration type. It is an alias for
// observability.MetricsConfig so callers do not need to import observability.
type Config = observability.MetricsConfig
// PrometheusConfig is re-exported for convenience.
type PrometheusConfig = observability.PrometheusConfig
// OTelConfig is re-exported for convenience.
type OTelConfig = observability.OTelConfig
var (
backend observability.Backend
// Interface and peer metrics
wgInterfaceUp observability.Int64Gauge
wgPeersTotal observability.UpDownCounter
wgPeerConnected observability.Int64Gauge
wgHandshakesTotal observability.Counter
wgHandshakeLatency observability.Histogram
wgPeerRTT observability.Histogram
wgBytesReceived observability.Counter
wgBytesTransmitted observability.Counter
allowedIPsCount observability.UpDownCounter
keyRotationTotal observability.Counter
// System and proxy metrics
netlinkEventsTotal observability.Counter
netlinkErrorsTotal observability.Counter
syncDuration observability.Histogram
workqueueDepth observability.UpDownCounter
kernelModuleLoads observability.Counter
firewallRulesApplied observability.Counter
activeSessions observability.UpDownCounter
activeProxyConnections observability.UpDownCounter
proxyRouteLookups observability.Counter
proxyTLSHandshake observability.Histogram
proxyBytesTransmitted observability.Counter
// UDP Relay / Proxy Metrics
udpPacketsTotal observability.Counter
udpPacketSizeBytes observability.Histogram
holePunchEventsTotal observability.Counter
proxyMappingActive observability.UpDownCounter
sessionActive observability.UpDownCounter
sessionRebuiltTotal observability.Counter
commPatternActive observability.UpDownCounter
proxyCleanupRemovedTotal observability.Counter
proxyConnectionErrorsTotal observability.Counter
proxyInitialMappingsTotal observability.Int64Gauge
proxyMappingUpdatesTotal observability.Counter
proxyIdleCleanupDuration observability.Histogram
// SNI Proxy Metrics
sniConnectionsTotal observability.Counter
sniConnectionDuration observability.Histogram
sniActiveConnections observability.UpDownCounter
sniRouteCacheHitsTotal observability.Counter
sniRouteAPIRequestsTotal observability.Counter
sniRouteAPILatency observability.Histogram
sniLocalOverrideTotal observability.Counter
sniTrustedProxyEventsTotal observability.Counter
sniProxyProtocolParseErrorsTotal observability.Counter
sniDataBytesTotal observability.Counter
sniTunnelTerminationsTotal observability.Counter
// HTTP API & Peer Management Metrics
httpRequestsTotal observability.Counter
httpRequestDuration observability.Histogram
peerOperationsTotal observability.Counter
proxyMappingUpdateRequestsTotal observability.Counter
destinationsUpdateRequestsTotal observability.Counter
// Remote Configuration, Reporting & Housekeeping
remoteConfigFetchesTotal observability.Counter
bandwidthReportsTotal observability.Counter
peerBandwidthBytesTotal observability.Counter
memorySpikeTotal observability.Counter
heapProfilesWrittenTotal observability.Counter
// Operational metrics
configReloadsTotal observability.Counter
restartTotal observability.Counter
authFailuresTotal observability.Counter
aclDeniedTotal observability.Counter
certificateExpiryDays observability.Float64Gauge
)
// DefaultConfig returns a default metrics configuration.
func DefaultConfig() Config {
return observability.DefaultMetricsConfig()
}
// Initialize sets up the metrics system using the selected backend.
// It returns the /metrics HTTP handler (non-nil only for Prometheus backend).
func Initialize(cfg Config) (http.Handler, error) {
b, err := observability.New(cfg)
if err != nil {
return nil, err
}
backend = b
if err := createInstruments(); err != nil {
return nil, err
}
return backend.HTTPHandler(), nil
}
// Shutdown gracefully shuts down the metrics backend.
func Shutdown(ctx context.Context) error {
if backend != nil {
return backend.Shutdown(ctx)
}
return nil
}
func createInstruments() error {
durationBuckets := []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}
sizeBuckets := []float64{512, 1024, 4096, 16384, 65536, 262144, 1048576}
sniDurationBuckets := []float64{0.1, 0.5, 1, 2.5, 5, 10, 30, 60, 120}
b := backend
wgInterfaceUp = b.NewInt64Gauge("gerbil_wg_interface_up",
"Operational state of a WireGuard interface (1=up, 0=down)", "ifname", "instance")
wgPeersTotal = b.NewUpDownCounter("gerbil_wg_peers_total",
"Total number of configured peers per interface", "ifname")
wgPeerConnected = b.NewInt64Gauge("gerbil_wg_peer_connected",
"Whether a specific peer is connected (1=connected, 0=disconnected)", "ifname", "peer")
allowedIPsCount = b.NewUpDownCounter("gerbil_allowed_ips_count",
"Number of allowed IPs configured per peer", "ifname", "peer")
keyRotationTotal = b.NewCounter("gerbil_key_rotation_total",
"Key rotation events", "ifname", "reason")
wgHandshakesTotal = b.NewCounter("gerbil_wg_handshakes_total",
"Count of handshake attempts with their result status", "ifname", "peer", "result")
wgHandshakeLatency = b.NewHistogram("gerbil_wg_handshake_latency_seconds",
"Distribution of handshake latencies in seconds", durationBuckets, "ifname", "peer")
wgPeerRTT = b.NewHistogram("gerbil_wg_peer_rtt_seconds",
"Observed round-trip time to a peer in seconds", durationBuckets, "ifname", "peer")
wgBytesReceived = b.NewCounter("gerbil_wg_bytes_received_total",
"Number of bytes received from a peer", "ifname", "peer")
wgBytesTransmitted = b.NewCounter("gerbil_wg_bytes_transmitted_total",
"Number of bytes transmitted to a peer", "ifname", "peer")
netlinkEventsTotal = b.NewCounter("gerbil_netlink_events_total",
"Number of netlink events processed", "event_type")
netlinkErrorsTotal = b.NewCounter("gerbil_netlink_errors_total",
"Count of netlink or kernel errors", "component", "error_type")
syncDuration = b.NewHistogram("gerbil_sync_duration_seconds",
"Duration of reconciliation/sync loops in seconds", durationBuckets, "component")
workqueueDepth = b.NewUpDownCounter("gerbil_workqueue_depth",
"Current length of internal work queues", "queue")
kernelModuleLoads = b.NewCounter("gerbil_kernel_module_loads_total",
"Count of kernel module load attempts", "result")
firewallRulesApplied = b.NewCounter("gerbil_firewall_rules_applied_total",
"IPTables/NFT rules applied", "result", "chain")
activeSessions = b.NewUpDownCounter("gerbil_active_sessions",
"Number of active UDP relay sessions", "ifname")
activeProxyConnections = b.NewUpDownCounter("gerbil_active_proxy_connections",
"Active SNI proxy connections")
proxyRouteLookups = b.NewCounter("gerbil_proxy_route_lookups_total",
"Number of route lookups", "result")
proxyTLSHandshake = b.NewHistogram("gerbil_proxy_tls_handshake_seconds",
"TLS handshake duration for SNI proxy in seconds", durationBuckets)
proxyBytesTransmitted = b.NewCounter("gerbil_proxy_bytes_transmitted_total",
"Bytes sent/received by the SNI proxy", "direction")
configReloadsTotal = b.NewCounter("gerbil_config_reloads_total",
"Number of configuration reloads", "result")
restartTotal = b.NewCounter("gerbil_restart_total",
"Process restart count")
authFailuresTotal = b.NewCounter("gerbil_auth_failures_total",
"Count of authentication or peer validation failures", "peer", "reason")
aclDeniedTotal = b.NewCounter("gerbil_acl_denied_total",
"Access control denied events", "ifname", "peer", "policy")
certificateExpiryDays = b.NewFloat64Gauge("gerbil_certificate_expiry_days",
"Days until certificate expiry", "cert_name", "ifname")
udpPacketsTotal = b.NewCounter("gerbil_udp_packets_total",
"Count of UDP packets processed by relay workers", "ifname", "type", "direction")
udpPacketSizeBytes = b.NewHistogram("gerbil_udp_packet_size_bytes",
"Size distribution of packets forwarded through relay", sizeBuckets, "ifname", "type")
holePunchEventsTotal = b.NewCounter("gerbil_hole_punch_events_total",
"Count of hole punch messages processed", "ifname", "result")
proxyMappingActive = b.NewUpDownCounter("gerbil_proxy_mapping_active",
"Number of active proxy mappings", "ifname")
sessionActive = b.NewUpDownCounter("gerbil_session_active",
"Number of active WireGuard sessions", "ifname")
sessionRebuiltTotal = b.NewCounter("gerbil_session_rebuilt_total",
"Count of sessions rebuilt from communication patterns", "ifname")
commPatternActive = b.NewUpDownCounter("gerbil_comm_pattern_active",
"Number of active communication patterns", "ifname")
proxyCleanupRemovedTotal = b.NewCounter("gerbil_proxy_cleanup_removed_total",
"Count of items removed during cleanup routines", "ifname", "component")
proxyConnectionErrorsTotal = b.NewCounter("gerbil_proxy_connection_errors_total",
"Count of connection errors in proxy operations", "ifname", "error_type")
proxyInitialMappingsTotal = b.NewInt64Gauge("gerbil_proxy_initial_mappings",
"Number of initial proxy mappings loaded", "ifname")
proxyMappingUpdatesTotal = b.NewCounter("gerbil_proxy_mapping_updates_total",
"Count of proxy mapping updates", "ifname")
proxyIdleCleanupDuration = b.NewHistogram("gerbil_proxy_idle_cleanup_duration_seconds",
"Duration of cleanup cycles", durationBuckets, "ifname", "component")
sniConnectionsTotal = b.NewCounter("gerbil_sni_connections_total",
"Count of connections processed by SNI proxy", "result")
sniConnectionDuration = b.NewHistogram("gerbil_sni_connection_duration_seconds",
"Lifetime distribution of proxied TLS connections", sniDurationBuckets)
sniActiveConnections = b.NewUpDownCounter("gerbil_sni_active_connections",
"Number of active SNI tunnels")
sniRouteCacheHitsTotal = b.NewCounter("gerbil_sni_route_cache_hits_total",
"Count of route cache hits and misses", "result")
sniRouteAPIRequestsTotal = b.NewCounter("gerbil_sni_route_api_requests_total",
"Count of route API requests", "result")
sniRouteAPILatency = b.NewHistogram("gerbil_sni_route_api_latency_seconds",
"Distribution of route API call latencies", durationBuckets)
sniLocalOverrideTotal = b.NewCounter("gerbil_sni_local_override_total",
"Count of routes using local overrides", "hit")
sniTrustedProxyEventsTotal = b.NewCounter("gerbil_sni_trusted_proxy_events_total",
"Count of PROXY protocol events", "event")
sniProxyProtocolParseErrorsTotal = b.NewCounter("gerbil_sni_proxy_protocol_parse_errors_total",
"Count of PROXY protocol parse failures")
sniDataBytesTotal = b.NewCounter("gerbil_sni_data_bytes_total",
"Count of bytes proxied through SNI tunnels", "direction")
sniTunnelTerminationsTotal = b.NewCounter("gerbil_sni_tunnel_terminations_total",
"Count of tunnel terminations by reason", "reason")
httpRequestsTotal = b.NewCounter("gerbil_http_requests_total",
"Count of HTTP requests to management API", "endpoint", "method", "status_code")
httpRequestDuration = b.NewHistogram("gerbil_http_request_duration_seconds",
"Distribution of HTTP request handling time", durationBuckets, "endpoint", "method")
peerOperationsTotal = b.NewCounter("gerbil_peer_operations_total",
"Count of peer lifecycle operations", "operation", "result")
proxyMappingUpdateRequestsTotal = b.NewCounter("gerbil_proxy_mapping_update_requests_total",
"Count of proxy mapping update API calls", "result")
destinationsUpdateRequestsTotal = b.NewCounter("gerbil_destinations_update_requests_total",
"Count of destinations update API calls", "result")
remoteConfigFetchesTotal = b.NewCounter("gerbil_remote_config_fetches_total",
"Count of remote configuration fetch attempts", "result")
bandwidthReportsTotal = b.NewCounter("gerbil_bandwidth_reports_total",
"Count of bandwidth report transmissions", "result")
peerBandwidthBytesTotal = b.NewCounter("gerbil_peer_bandwidth_bytes_total",
"Bytes per peer tracked by bandwidth calculation", "peer", "direction")
memorySpikeTotal = b.NewCounter("gerbil_memory_spike_total",
"Count of memory spikes detected", "severity")
heapProfilesWrittenTotal = b.NewCounter("gerbil_heap_profiles_written_total",
"Count of heap profile files generated")
return nil
}
func RecordInterfaceUp(ifname, instance string, up bool) {
value := int64(0)
if up {
value = 1
}
wgInterfaceUp.Record(context.Background(), value, observability.Labels{"ifname": ifname, "instance": instance})
}
func RecordPeersTotal(ifname string, delta int64) {
wgPeersTotal.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
}
func RecordPeerConnected(ifname, peer string, connected bool) {
value := int64(0)
if connected {
value = 1
}
wgPeerConnected.Record(context.Background(), value, observability.Labels{"ifname": ifname, "peer": peer})
}
func RecordHandshake(ifname, peer, result string) {
wgHandshakesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "result": result})
}
func RecordHandshakeLatency(ifname, peer string, seconds float64) {
wgHandshakeLatency.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
}
func RecordPeerRTT(ifname, peer string, seconds float64) {
wgPeerRTT.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
}
func RecordBytesReceived(ifname, peer string, bytes int64) {
wgBytesReceived.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
}
func RecordBytesTransmitted(ifname, peer string, bytes int64) {
wgBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
}
func RecordAllowedIPsCount(ifname, peer string, delta int64) {
allowedIPsCount.Add(context.Background(), delta, observability.Labels{"ifname": ifname, "peer": peer})
}
func RecordKeyRotation(ifname, reason string) {
keyRotationTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "reason": reason})
}
func RecordNetlinkEvent(eventType string) {
netlinkEventsTotal.Add(context.Background(), 1, observability.Labels{"event_type": eventType})
}
func RecordNetlinkError(component, errorType string) {
netlinkErrorsTotal.Add(context.Background(), 1, observability.Labels{"component": component, "error_type": errorType})
}
func RecordSyncDuration(component string, seconds float64) {
syncDuration.Record(context.Background(), seconds, observability.Labels{"component": component})
}
func RecordWorkqueueDepth(queue string, delta int64) {
workqueueDepth.Add(context.Background(), delta, observability.Labels{"queue": queue})
}
func RecordKernelModuleLoad(result string) {
kernelModuleLoads.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordFirewallRuleApplied(result, chain string) {
firewallRulesApplied.Add(context.Background(), 1, observability.Labels{"result": result, "chain": chain})
}
func RecordActiveSession(ifname string, delta int64) {
activeSessions.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
}
func RecordActiveProxyConnection(hostname string, delta int64) {
_ = hostname
activeProxyConnections.Add(context.Background(), delta, nil)
}
func RecordProxyRouteLookup(result, hostname string) {
_ = hostname
proxyRouteLookups.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordProxyTLSHandshake(hostname string, seconds float64) {
_ = hostname
proxyTLSHandshake.Record(context.Background(), seconds, nil)
}
func RecordProxyBytesTransmitted(hostname, direction string, bytes int64) {
_ = hostname
proxyBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"direction": direction})
}
func RecordConfigReload(result string) {
configReloadsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordRestart() {
restartTotal.Add(context.Background(), 1, nil)
}
func RecordAuthFailure(peer, reason string) {
authFailuresTotal.Add(context.Background(), 1, observability.Labels{"peer": peer, "reason": reason})
}
func RecordACLDenied(ifname, peer, policy string) {
aclDeniedTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "policy": policy})
}
func RecordCertificateExpiry(certName, ifname string, days float64) {
certificateExpiryDays.Record(context.Background(), days, observability.Labels{"cert_name": certName, "ifname": ifname})
}
func RecordUDPPacket(ifname, packetType, direction string) {
udpPacketsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "type": packetType, "direction": direction})
}
func RecordUDPPacketSize(ifname, packetType string, bytes float64) {
udpPacketSizeBytes.Record(context.Background(), bytes, observability.Labels{"ifname": ifname, "type": packetType})
}
func RecordHolePunchEvent(ifname, result string) {
holePunchEventsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "result": result})
}
func RecordProxyMapping(ifname string, delta int64) {
proxyMappingActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
}
func RecordSession(ifname string, delta int64) {
sessionActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
}
func RecordSessionRebuilt(ifname string) {
sessionRebuiltTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
}
func RecordCommPattern(ifname string, delta int64) {
commPatternActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
}
func RecordProxyCleanupRemoved(ifname, component string, count int64) {
proxyCleanupRemovedTotal.Add(context.Background(), count, observability.Labels{"ifname": ifname, "component": component})
}
func RecordProxyConnectionError(ifname, errorType string) {
proxyConnectionErrorsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "error_type": errorType})
}
func RecordProxyInitialMappings(ifname string, count int64) {
proxyInitialMappingsTotal.Record(context.Background(), count, observability.Labels{"ifname": ifname})
}
func RecordProxyMappingUpdate(ifname string) {
proxyMappingUpdatesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
}
func RecordProxyIdleCleanupDuration(ifname, component string, seconds float64) {
proxyIdleCleanupDuration.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "component": component})
}
func RecordSNIConnection(result string) {
sniConnectionsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordSNIConnectionDuration(seconds float64) {
sniConnectionDuration.Record(context.Background(), seconds, nil)
}
func RecordSNIActiveConnection(delta int64) {
sniActiveConnections.Add(context.Background(), delta, nil)
}
func RecordSNIRouteCacheHit(result string) {
sniRouteCacheHitsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordSNIRouteAPIRequest(result string) {
sniRouteAPIRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordSNIRouteAPILatency(seconds float64) {
sniRouteAPILatency.Record(context.Background(), seconds, nil)
}
func RecordSNILocalOverride(hit string) {
sniLocalOverrideTotal.Add(context.Background(), 1, observability.Labels{"hit": hit})
}
func RecordSNITrustedProxyEvent(event string) {
sniTrustedProxyEventsTotal.Add(context.Background(), 1, observability.Labels{"event": event})
}
func RecordSNIProxyProtocolParseError() {
sniProxyProtocolParseErrorsTotal.Add(context.Background(), 1, nil)
}
func RecordSNIDataBytes(direction string, bytes int64) {
sniDataBytesTotal.Add(context.Background(), bytes, observability.Labels{"direction": direction})
}
func RecordSNITunnelTermination(reason string) {
sniTunnelTerminationsTotal.Add(context.Background(), 1, observability.Labels{"reason": reason})
}
func RecordHTTPRequest(endpoint, method, statusCode string) {
httpRequestsTotal.Add(context.Background(), 1, observability.Labels{"endpoint": endpoint, "method": method, "status_code": statusCode})
}
func RecordHTTPRequestDuration(endpoint, method string, seconds float64) {
httpRequestDuration.Record(context.Background(), seconds, observability.Labels{"endpoint": endpoint, "method": method})
}
func RecordPeerOperation(operation, result string) {
peerOperationsTotal.Add(context.Background(), 1, observability.Labels{"operation": operation, "result": result})
}
func RecordProxyMappingUpdateRequest(result string) {
proxyMappingUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordDestinationsUpdateRequest(result string) {
destinationsUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordRemoteConfigFetch(result string) {
remoteConfigFetchesTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordBandwidthReport(result string) {
bandwidthReportsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
}
func RecordPeerBandwidthBytes(peer, direction string, bytes int64) {
peerBandwidthBytesTotal.Add(context.Background(), bytes, observability.Labels{"peer": peer, "direction": direction})
}
func RecordMemorySpike(severity string) {
memorySpikeTotal.Add(context.Background(), 1, observability.Labels{"severity": severity})
}
func RecordHeapProfileWritten() {
heapProfilesWrittenTotal.Add(context.Background(), 1, nil)
}

View File

@@ -0,0 +1,258 @@
package metrics_test
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
"github.com/fosrl/gerbil/internal/metrics"
"github.com/fosrl/gerbil/internal/observability"
)
const exampleHostname = "example.com"
func initPrometheus(t *testing.T) http.Handler {
t.Helper()
cfg := metrics.DefaultConfig()
cfg.Enabled = true
cfg.Backend = "prometheus"
cfg.Prometheus.Path = "/metrics"
h, err := metrics.Initialize(cfg)
if err != nil {
t.Fatalf("Initialize failed: %v", err)
}
t.Cleanup(func() {
metrics.Shutdown(context.Background()) //nolint:errcheck
})
return h
}
func initNoop(t *testing.T) {
t.Helper()
cfg := metrics.DefaultConfig()
cfg.Enabled = false
_, err := metrics.Initialize(cfg)
if err != nil {
t.Fatalf("Initialize noop failed: %v", err)
}
t.Cleanup(func() {
metrics.Shutdown(context.Background()) //nolint:errcheck
})
}
func scrape(t *testing.T, h http.Handler) string {
t.Helper()
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
rr := httptest.NewRecorder()
h.ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("scrape returned %d", rr.Code)
}
b, _ := io.ReadAll(rr.Body)
return string(b)
}
func assertContains(t *testing.T, body, substr string) {
t.Helper()
if !strings.Contains(body, substr) {
t.Errorf("expected %q in output\nbody:\n%s", substr, body)
}
}
// --- Tests ---
func TestInitializePrometheus(t *testing.T) {
h := initPrometheus(t)
if h == nil {
t.Error("expected non-nil HTTP handler for prometheus backend")
}
}
func TestInitializeNoop(t *testing.T) {
initNoop(t)
// All Record* functions must not panic when noop backend is active.
metrics.RecordRestart()
metrics.RecordHTTPRequest("/test", "GET", "200")
metrics.RecordSNIConnection("accepted")
metrics.RecordPeersTotal("wg0", 1)
}
func TestDefaultConfig(t *testing.T) {
cfg := metrics.DefaultConfig()
if cfg.Backend != "prometheus" {
t.Errorf("expected prometheus default backend, got %q", cfg.Backend)
}
}
func TestShutdownNoInit(t *testing.T) {
// Shutdown without Initialize should not panic or error.
if err := metrics.Shutdown(context.Background()); err != nil {
t.Errorf("unexpected error: %v", err)
}
}
func TestRecordHTTPRequest(t *testing.T) {
h := initPrometheus(t)
metrics.RecordHTTPRequest("/peers", "POST", "201")
body := scrape(t, h)
assertContains(t, body, "gerbil_http_requests_total")
}
func TestRecordHTTPRequestDuration(t *testing.T) {
h := initPrometheus(t)
metrics.RecordHTTPRequestDuration("/peers", "POST", 0.05)
body := scrape(t, h)
assertContains(t, body, "gerbil_http_request_duration_seconds")
}
func TestRecordInterfaceUp(t *testing.T) {
h := initPrometheus(t)
metrics.RecordInterfaceUp("wg0", "host1", true)
metrics.RecordInterfaceUp("wg0", "host1", false)
body := scrape(t, h)
assertContains(t, body, "gerbil_wg_interface_up")
}
func TestRecordPeersTotal(t *testing.T) {
h := initPrometheus(t)
metrics.RecordPeersTotal("wg0", 3)
body := scrape(t, h)
assertContains(t, body, "gerbil_wg_peers_total")
}
func TestRecordBytesReceivedTransmitted(t *testing.T) {
h := initPrometheus(t)
metrics.RecordBytesReceived("wg0", "peer1", 1024)
metrics.RecordBytesTransmitted("wg0", "peer1", 512)
body := scrape(t, h)
assertContains(t, body, "gerbil_wg_bytes_received_total")
assertContains(t, body, "gerbil_wg_bytes_transmitted_total")
}
func TestRecordSNI(t *testing.T) {
h := initPrometheus(t)
metrics.RecordSNIConnection("accepted")
metrics.RecordSNIActiveConnection(1)
metrics.RecordSNIConnectionDuration(1.5)
metrics.RecordSNIRouteCacheHit("hit")
metrics.RecordSNIRouteAPIRequest("success")
metrics.RecordSNIRouteAPILatency(0.01)
metrics.RecordSNILocalOverride("yes")
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
metrics.RecordSNIProxyProtocolParseError()
metrics.RecordSNIDataBytes("client_to_target", 2048)
metrics.RecordSNITunnelTermination("eof")
body := scrape(t, h)
assertContains(t, body, "gerbil_sni_connections_total")
assertContains(t, body, "gerbil_sni_active_connections")
}
func TestRecordRelay(t *testing.T) {
h := initPrometheus(t)
metrics.RecordUDPPacket("relay", "data", "in")
metrics.RecordUDPPacketSize("relay", "data", 256)
metrics.RecordHolePunchEvent("relay", "success")
metrics.RecordProxyMapping("relay", 1)
metrics.RecordSession("relay", 1)
metrics.RecordSessionRebuilt("relay")
metrics.RecordCommPattern("relay", 1)
metrics.RecordProxyCleanupRemoved("relay", "session", 2)
metrics.RecordProxyConnectionError("relay", "dial_udp")
metrics.RecordProxyInitialMappings("relay", 5)
metrics.RecordProxyMappingUpdate("relay")
metrics.RecordProxyIdleCleanupDuration("relay", "conn", 0.1)
body := scrape(t, h)
assertContains(t, body, "gerbil_udp_packets_total")
assertContains(t, body, "gerbil_proxy_mapping_active")
}
func TestRecordWireGuard(t *testing.T) {
h := initPrometheus(t)
metrics.RecordHandshake("wg0", "peer1", "success")
metrics.RecordHandshakeLatency("wg0", "peer1", 0.02)
metrics.RecordPeerRTT("wg0", "peer1", 0.005)
metrics.RecordPeerConnected("wg0", "peer1", true)
metrics.RecordAllowedIPsCount("wg0", "peer1", 2)
metrics.RecordKeyRotation("wg0", "scheduled")
body := scrape(t, h)
assertContains(t, body, "gerbil_wg_handshakes_total")
assertContains(t, body, "gerbil_wg_peer_connected")
}
func TestRecordHousekeeping(t *testing.T) {
h := initPrometheus(t)
metrics.RecordRemoteConfigFetch("success")
metrics.RecordBandwidthReport("success")
metrics.RecordPeerBandwidthBytes("peer1", "rx", 512)
metrics.RecordMemorySpike("warning")
metrics.RecordHeapProfileWritten()
body := scrape(t, h)
assertContains(t, body, "gerbil_remote_config_fetches_total")
assertContains(t, body, "gerbil_memory_spike_total")
}
func TestRecordOperational(t *testing.T) {
h := initPrometheus(t)
metrics.RecordConfigReload("success")
metrics.RecordRestart()
metrics.RecordAuthFailure("peer1", "bad_key")
metrics.RecordACLDenied("wg0", "peer1", "default-deny")
metrics.RecordCertificateExpiry(exampleHostname, "wg0", 90.0)
body := scrape(t, h)
assertContains(t, body, "gerbil_config_reloads_total")
assertContains(t, body, "gerbil_restart_total")
}
func TestRecordNetlink(t *testing.T) {
h := initPrometheus(t)
metrics.RecordNetlinkEvent("link_up")
metrics.RecordNetlinkError("wg", "timeout")
metrics.RecordSyncDuration("config", 0.1)
metrics.RecordWorkqueueDepth("main", 3)
metrics.RecordKernelModuleLoad("success")
metrics.RecordFirewallRuleApplied("success", "INPUT")
metrics.RecordActiveSession("wg0", 1)
metrics.RecordActiveProxyConnection(exampleHostname, 1)
metrics.RecordProxyRouteLookup("hit", exampleHostname)
metrics.RecordProxyTLSHandshake(exampleHostname, 0.05)
metrics.RecordProxyBytesTransmitted(exampleHostname, "tx", 1024)
body := scrape(t, h)
assertContains(t, body, "gerbil_netlink_events_total")
assertContains(t, body, "gerbil_active_sessions")
}
func TestRecordPeerOperation(t *testing.T) {
h := initPrometheus(t)
metrics.RecordPeerOperation("add", "success")
metrics.RecordProxyMappingUpdateRequest("success")
metrics.RecordDestinationsUpdateRequest("success")
body := scrape(t, h)
assertContains(t, body, "gerbil_peer_operations_total")
}
func TestInitializeInvalidBackend(t *testing.T) {
cfg := observability.MetricsConfig{Enabled: true, Backend: "invalid"}
_, err := metrics.Initialize(cfg)
if err == nil {
t.Error("expected error for invalid backend")
}
}
func TestInitializeBackendNone(t *testing.T) {
cfg := metrics.DefaultConfig()
cfg.Backend = "none"
h, err := metrics.Initialize(cfg)
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
if h != nil {
t.Error("none backend should return nil handler")
}
// All Record* calls should be noop
metrics.RecordRestart()
metrics.Shutdown(context.Background()) //nolint:errcheck
}

View File

@@ -0,0 +1,119 @@
// Package observability provides a backend-neutral metrics abstraction for Gerbil.
//
// Exactly one metrics backend may be enabled at runtime:
// - "prometheus" native Prometheus client; exposes /metrics (no OTel SDK required)
// - "otel" OpenTelemetry metrics pushed via OTLP (gRPC or HTTP)
// - "none" metrics disabled; a safe noop implementation is used
//
// Future OTel tracing and logging can be added to this package alongside the
// existing otel sub-package without touching the Prometheus-native path.
package observability
import (
"fmt"
"time"
)
// MetricsConfig is the top-level metrics configuration.
type MetricsConfig struct {
// Enabled controls whether any metrics backend is started.
// When false the noop backend is used regardless of Backend.
Enabled bool
// Backend selects the active backend: "prometheus", "otel", or "none".
Backend string
// Prometheus holds settings used only by the Prometheus-native backend.
Prometheus PrometheusConfig
// OTel holds settings used only by the OTel backend.
OTel OTelConfig
// ServiceName is propagated to OTel resource attributes.
ServiceName string
// ServiceVersion is propagated to OTel resource attributes.
ServiceVersion string
// DeploymentEnvironment is an optional OTel resource attribute.
DeploymentEnvironment string
}
// PrometheusConfig holds Prometheus-native backend settings.
type PrometheusConfig struct {
// Path is the HTTP path to expose the /metrics endpoint.
// Defaults to "/metrics".
Path string
}
// OTelConfig holds OpenTelemetry backend settings.
type OTelConfig struct {
// Protocol is the OTLP transport: "grpc" (default) or "http".
Protocol string
// Endpoint is the OTLP collector address (e.g. "localhost:4317").
Endpoint string
// Insecure disables TLS for the OTLP connection.
Insecure bool
// ExportInterval is how often metrics are pushed to the collector.
// Defaults to 60 s.
ExportInterval time.Duration
}
// DefaultMetricsConfig returns a MetricsConfig with sensible defaults.
func DefaultMetricsConfig() MetricsConfig {
return MetricsConfig{
Enabled: true,
Backend: "prometheus",
Prometheus: PrometheusConfig{
Path: "/metrics",
},
OTel: OTelConfig{
Protocol: "grpc",
Endpoint: "localhost:4317",
Insecure: true,
ExportInterval: 60 * time.Second,
},
ServiceName: "gerbil",
ServiceVersion: "1.0.0",
}
}
// Validate checks the configuration for logical errors.
func (c *MetricsConfig) Validate() error {
if !c.Enabled {
return nil
}
switch c.Backend {
case "prometheus", "none", "":
// valid
case "otel":
if c.OTel.Endpoint == "" {
return fmt.Errorf("metrics: backend=otel requires a non-empty OTel endpoint")
}
if c.OTel.Protocol != "grpc" && c.OTel.Protocol != "http" {
return fmt.Errorf("metrics: otel protocol must be \"grpc\" or \"http\", got %q", c.OTel.Protocol)
}
if c.OTel.ExportInterval <= 0 {
return fmt.Errorf("metrics: otel export interval must be positive")
}
default:
return fmt.Errorf("metrics: unknown backend %q (must be \"prometheus\", \"otel\", or \"none\")", c.Backend)
}
return nil
}
// effectiveBackend resolves the backend string, treating "" and "none" as noop.
func (c *MetricsConfig) effectiveBackend() string {
if !c.Enabled {
return "none"
}
if c.Backend == "" {
return "none"
}
return c.Backend
}

View File

@@ -0,0 +1,152 @@
package observability
import (
"context"
"fmt"
"net/http"
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
)
// Labels is a set of key-value pairs attached to a metric observation.
// Use only stable, bounded-cardinality label values.
type Labels = map[string]string
// Counter is a monotonically increasing instrument.
type Counter interface {
Add(ctx context.Context, value int64, labels Labels)
}
// UpDownCounter is a bidirectional integer instrument (can go up or down).
type UpDownCounter interface {
Add(ctx context.Context, value int64, labels Labels)
}
// Int64Gauge records a snapshot integer value.
type Int64Gauge interface {
Record(ctx context.Context, value int64, labels Labels)
}
// Float64Gauge records a snapshot float value.
type Float64Gauge interface {
Record(ctx context.Context, value float64, labels Labels)
}
// Histogram records a distribution of values.
type Histogram interface {
Record(ctx context.Context, value float64, labels Labels)
}
// Backend is the single interface that each metrics implementation must satisfy.
// Application code must not import backend-specific packages (prometheus, otel).
type Backend interface {
// NewCounter creates a counter metric.
// labelNames declares the set of label keys that will be passed at observation time.
NewCounter(name, desc string, labelNames ...string) Counter
// NewUpDownCounter creates an up-down counter metric.
NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter
// NewInt64Gauge creates an integer gauge metric.
NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge
// NewFloat64Gauge creates a float gauge metric.
NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge
// NewHistogram creates a histogram metric.
// buckets are the explicit upper-bound bucket boundaries.
NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram
// HTTPHandler returns the /metrics HTTP handler.
// Implementations that do not expose an HTTP endpoint return nil.
HTTPHandler() http.Handler
// Shutdown performs a graceful flush / shutdown of the backend.
Shutdown(ctx context.Context) error
}
// New creates the backend selected by cfg and returns it.
// Exactly one backend is created; the selection is mutually exclusive.
func New(cfg MetricsConfig) (Backend, error) {
if err := cfg.Validate(); err != nil {
return nil, err
}
switch cfg.effectiveBackend() {
case "prometheus":
b, err := obsprom.New(obsprom.Config{
Path: cfg.Prometheus.Path,
})
if err != nil {
return nil, err
}
return &promAdapter{b: b}, nil
case "otel":
b, err := obsotel.New(obsotel.Config{
Protocol: cfg.OTel.Protocol,
Endpoint: cfg.OTel.Endpoint,
Insecure: cfg.OTel.Insecure,
ExportInterval: cfg.OTel.ExportInterval,
ServiceName: cfg.ServiceName,
ServiceVersion: cfg.ServiceVersion,
DeploymentEnvironment: cfg.DeploymentEnvironment,
})
if err != nil {
return nil, err
}
return &otelAdapter{b: b}, nil
case "none":
return &NoopBackend{}, nil
default:
return nil, fmt.Errorf("observability: unknown backend %q", cfg.effectiveBackend())
}
}
// promAdapter wraps obsprom.Backend to implement the observability.Backend interface.
// The concrete instrument types from the prometheus sub-package satisfy the instrument
// interfaces via Go's structural (duck) typing without importing this package.
type promAdapter struct {
b *obsprom.Backend
}
func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
return a.b.NewCounter(name, desc, labelNames...)
}
func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
return a.b.NewUpDownCounter(name, desc, labelNames...)
}
func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
return a.b.NewInt64Gauge(name, desc, labelNames...)
}
func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
return a.b.NewFloat64Gauge(name, desc, labelNames...)
}
func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
return a.b.NewHistogram(name, desc, buckets, labelNames...)
}
func (a *promAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
func (a *promAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
// otelAdapter wraps obsotel.Backend to implement the observability.Backend interface.
type otelAdapter struct {
b *obsotel.Backend
}
func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
return a.b.NewCounter(name, desc, labelNames...)
}
func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
return a.b.NewUpDownCounter(name, desc, labelNames...)
}
func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
return a.b.NewInt64Gauge(name, desc, labelNames...)
}
func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
return a.b.NewFloat64Gauge(name, desc, labelNames...)
}
func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
return a.b.NewHistogram(name, desc, buckets, labelNames...)
}
func (a *otelAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
func (a *otelAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }

View File

@@ -0,0 +1,198 @@
package observability_test
import (
"context"
"testing"
"time"
"github.com/fosrl/gerbil/internal/observability"
)
const (
defaultMetricsPath = "/metrics"
otelGRPCEndpoint = "localhost:4317"
errUnexpectedFmt = "unexpected error: %v"
)
func TestDefaultMetricsConfig(t *testing.T) {
cfg := observability.DefaultMetricsConfig()
if !cfg.Enabled {
t.Error("default config should have Enabled=true")
}
if cfg.Backend != "prometheus" {
t.Errorf("default backend should be prometheus, got %q", cfg.Backend)
}
if cfg.Prometheus.Path != defaultMetricsPath {
t.Errorf("default prometheus path should be %s, got %q", defaultMetricsPath, cfg.Prometheus.Path)
}
if cfg.OTel.Protocol != "grpc" {
t.Errorf("default otel protocol should be grpc, got %q", cfg.OTel.Protocol)
}
if cfg.OTel.ExportInterval != 60*time.Second {
t.Errorf("default otel export interval should be 60s, got %v", cfg.OTel.ExportInterval)
}
}
func TestValidateValidConfigs(t *testing.T) {
tests := []struct {
name string
cfg observability.MetricsConfig
}{
{name: "disabled", cfg: observability.MetricsConfig{Enabled: false}},
{name: "backend none", cfg: observability.MetricsConfig{Enabled: true, Backend: "none"}},
{name: "backend empty", cfg: observability.MetricsConfig{Enabled: true, Backend: ""}},
{name: "prometheus", cfg: observability.MetricsConfig{Enabled: true, Backend: "prometheus"}},
{
name: "otel grpc",
cfg: observability.MetricsConfig{
Enabled: true, Backend: "otel",
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
},
},
{
name: "otel http",
cfg: observability.MetricsConfig{
Enabled: true, Backend: "otel",
OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if err := tt.cfg.Validate(); err != nil {
t.Errorf("unexpected validation error: %v", err)
}
})
}
}
func TestValidateInvalidConfigs(t *testing.T) {
tests := []struct {
name string
cfg observability.MetricsConfig
}{
{name: "unknown backend", cfg: observability.MetricsConfig{Enabled: true, Backend: "datadog"}},
{
name: "otel missing endpoint",
cfg: observability.MetricsConfig{
Enabled: true, Backend: "otel",
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second},
},
},
{
name: "otel invalid protocol",
cfg: observability.MetricsConfig{
Enabled: true, Backend: "otel",
OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
},
},
{
name: "otel zero interval",
cfg: observability.MetricsConfig{
Enabled: true, Backend: "otel",
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0},
},
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
if err := tt.cfg.Validate(); err == nil {
t.Error("expected validation error but got nil")
}
})
}
}
func TestNewNoopBackend(t *testing.T) {
b, err := observability.New(observability.MetricsConfig{Enabled: false})
if err != nil {
t.Fatalf(errUnexpectedFmt, err)
}
if b.HTTPHandler() != nil {
t.Error("noop backend HTTPHandler should return nil")
}
}
func TestNewNoneBackend(t *testing.T) {
b, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "none"})
if err != nil {
t.Fatalf(errUnexpectedFmt, err)
}
if b.HTTPHandler() != nil {
t.Error("none backend HTTPHandler should return nil")
}
}
func TestNewPrometheusBackend(t *testing.T) {
cfg := observability.MetricsConfig{
Enabled: true, Backend: "prometheus",
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
}
b, err := observability.New(cfg)
if err != nil {
t.Fatalf(errUnexpectedFmt, err)
}
if b.HTTPHandler() == nil {
t.Error("prometheus backend HTTPHandler should not be nil")
}
if err := b.Shutdown(context.Background()); err != nil {
t.Errorf("prometheus shutdown error: %v", err)
}
}
func TestNewInvalidBackend(t *testing.T) {
_, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "invalid"})
if err == nil {
t.Error("expected error for invalid backend")
}
}
func TestPrometheusAdapterAllInstruments(t *testing.T) {
b, err := observability.New(observability.MetricsConfig{
Enabled: true, Backend: "prometheus",
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
})
if err != nil {
t.Fatalf("failed to create backend: %v", err)
}
ctx := context.Background()
labels := observability.Labels{"k": "v"}
b.NewCounter("prom_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
b.NewUpDownCounter("prom_adapter_updown", "desc", "k").Add(ctx, 2, labels)
b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
if b.HTTPHandler() == nil {
t.Error("prometheus adapter HTTPHandler should not be nil")
}
if err := b.Shutdown(ctx); err != nil {
t.Errorf("Shutdown error: %v", err)
}
}
func TestOtelAdapterAllInstruments(t *testing.T) {
b, err := observability.New(observability.MetricsConfig{
Enabled: true, Backend: "otel",
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond},
})
if err != nil {
t.Fatalf("failed to create otel backend: %v", err)
}
ctx := context.Background()
labels := observability.Labels{"k": "v"}
b.NewCounter("otel_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
b.NewUpDownCounter("otel_adapter_updown", "desc", "k").Add(ctx, 2, labels)
b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
if b.HTTPHandler() != nil {
t.Error("OTel adapter HTTPHandler should be nil")
}
shutdownCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
defer cancel()
b.Shutdown(shutdownCtx) //nolint:errcheck
}

View File

@@ -0,0 +1,71 @@
package observability
import (
"context"
"net/http"
)
// NoopBackend is a Backend that discards all observations.
// It is used when metrics are disabled (Enabled=false or Backend="none").
// All methods are safe to call concurrently.
type NoopBackend struct{}
// Compile-time interface check.
var _ Backend = (*NoopBackend)(nil)
func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) Counter {
_ = n
return noopCounter{}
}
func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) UpDownCounter {
_ = n
return noopUpDownCounter{}
}
func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) Int64Gauge {
_ = n
return noopInt64Gauge{}
}
func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) Float64Gauge {
_ = n
return noopFloat64Gauge{}
}
func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) Histogram {
_ = n
return noopHistogram{}
}
func (n *NoopBackend) HTTPHandler() http.Handler {
_ = n
return nil
}
func (n *NoopBackend) Shutdown(_ context.Context) error {
_ = n
return nil
}
// --- noop instrument types ---
type noopCounter struct{}
func (noopCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
type noopUpDownCounter struct{}
func (noopUpDownCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
type noopInt64Gauge struct{}
func (noopInt64Gauge) Record(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
type noopFloat64Gauge struct{}
func (noopFloat64Gauge) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
type noopHistogram struct{}
func (noopHistogram) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }

View File

@@ -0,0 +1,67 @@
package observability_test
import (
"context"
"testing"
"github.com/fosrl/gerbil/internal/observability"
)
func TestNoopBackendAllInstruments(t *testing.T) {
n := &observability.NoopBackend{}
ctx := context.Background()
labels := observability.Labels{"k": "v"}
t.Run("Counter", func(_ *testing.T) {
c := n.NewCounter("test_counter", "desc")
c.Add(ctx, 1, labels)
c.Add(ctx, 0, nil)
})
t.Run("UpDownCounter", func(_ *testing.T) {
u := n.NewUpDownCounter("test_updown", "desc")
u.Add(ctx, 1, labels)
u.Add(ctx, -1, nil)
})
t.Run("Int64Gauge", func(_ *testing.T) {
g := n.NewInt64Gauge("test_int64gauge", "desc")
g.Record(ctx, 42, labels)
g.Record(ctx, 0, nil)
})
t.Run("Float64Gauge", func(_ *testing.T) {
g := n.NewFloat64Gauge("test_float64gauge", "desc")
g.Record(ctx, 3.14, labels)
g.Record(ctx, 0, nil)
})
t.Run("Histogram", func(_ *testing.T) {
h := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10})
h.Record(ctx, 2.5, labels)
h.Record(ctx, 0, nil)
})
t.Run("HTTPHandler", func(t *testing.T) {
if n.HTTPHandler() != nil {
t.Error("noop HTTPHandler should be nil")
}
})
t.Run("Shutdown", func(t *testing.T) {
if err := n.Shutdown(ctx); err != nil {
t.Errorf("noop Shutdown should not error: %v", err)
}
})
}
func TestNoopBackendLabelNames(_ *testing.T) {
// Verify that label names passed at creation time are accepted without panic.
n := &observability.NoopBackend{}
n.NewCounter("c", "d", "label1", "label2")
n.NewUpDownCounter("u", "d", "l1")
n.NewInt64Gauge("g1", "d", "l1", "l2", "l3")
n.NewFloat64Gauge("g2", "d")
n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1")
}

View File

@@ -0,0 +1,210 @@
// Package otel implements the OpenTelemetry metrics backend for Gerbil.
//
// Metrics are exported via OTLP (gRPC or HTTP) to an external collector.
// No Prometheus /metrics endpoint is exposed in this mode.
// Future OTel tracing and logging can be added alongside this package
// without touching the Prometheus-native path.
package otel
import (
"context"
"fmt"
"net/http"
"time"
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/metric"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
)
// Config holds OTel backend configuration.
type Config struct {
// Protocol is "grpc" (default) or "http".
Protocol string
// Endpoint is the OTLP collector address.
Endpoint string
// Insecure disables TLS.
Insecure bool
// ExportInterval is the period between pushes to the collector.
ExportInterval time.Duration
ServiceName string
ServiceVersion string
DeploymentEnvironment string
}
// Backend is the OTel metrics backend.
type Backend struct {
cfg Config
provider *sdkmetric.MeterProvider
meter metric.Meter
}
// New creates and initialises an OTel backend.
//
// cfg.Protocol must be "grpc" (default) or "http".
// cfg.Endpoint is the OTLP collector address (e.g. "localhost:4317").
// cfg.ExportInterval sets the push period (defaults to 60 s if ≤ 0).
// cfg.Insecure disables TLS on the OTLP connection.
//
// Connection to the collector is established lazily; New only validates cfg
// and creates the SDK components. It returns an error only if the OTel resource
// or exporter cannot be constructed.
func New(cfg Config) (*Backend, error) {
if cfg.Protocol == "" {
cfg.Protocol = "grpc"
}
if cfg.ExportInterval <= 0 {
cfg.ExportInterval = 60 * time.Second
}
if cfg.ServiceName == "" {
cfg.ServiceName = "gerbil"
}
res, err := newResource(cfg.ServiceName, cfg.ServiceVersion, cfg.DeploymentEnvironment)
if err != nil {
return nil, fmt.Errorf("otel backend: build resource: %w", err)
}
exp, err := newExporter(context.Background(), cfg)
if err != nil {
return nil, fmt.Errorf("otel backend: create exporter: %w", err)
}
reader := sdkmetric.NewPeriodicReader(exp,
sdkmetric.WithInterval(cfg.ExportInterval),
)
provider := sdkmetric.NewMeterProvider(
sdkmetric.WithResource(res),
sdkmetric.WithReader(reader),
)
meter := provider.Meter("github.com/fosrl/gerbil")
return &Backend{cfg: cfg, provider: provider, meter: meter}, nil
}
// HTTPHandler returns nil the OTel backend does not expose an HTTP endpoint.
func (b *Backend) HTTPHandler() http.Handler {
_ = b
return nil
}
// Shutdown flushes pending metrics and shuts down the MeterProvider.
func (b *Backend) Shutdown(ctx context.Context) error {
return b.provider.Shutdown(ctx)
}
// NewCounter creates an OTel Int64Counter.
func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter {
c, err := b.meter.Int64Counter(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create counter %q: %v", name, err))
}
return &Counter{c: c}
}
// NewUpDownCounter creates an OTel Int64UpDownCounter.
func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter {
c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err))
}
return &UpDownCounter{c: c}
}
// NewInt64Gauge creates an OTel Int64Gauge.
func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge {
g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err))
}
return &Int64Gauge{g: g}
}
// NewFloat64Gauge creates an OTel Float64Gauge.
func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge {
g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc))
if err != nil {
panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err))
}
return &Float64Gauge{g: g}
}
// NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries.
func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram {
h, err := b.meter.Float64Histogram(name,
metric.WithDescription(desc),
metric.WithExplicitBucketBoundaries(buckets...),
)
if err != nil {
panic(fmt.Sprintf("otel: create histogram %q: %v", name, err))
}
return &Histogram{h: h}
}
// labelsToAttrs converts a Labels map to OTel attribute key-value pairs.
func labelsToAttrs(labels map[string]string) []attribute.KeyValue {
if len(labels) == 0 {
return nil
}
attrs := make([]attribute.KeyValue, 0, len(labels))
for k, v := range labels {
attrs = append(attrs, attribute.String(k, v))
}
return attrs
}
// Counter wraps an OTel Int64Counter.
type Counter struct {
c metric.Int64Counter
}
// Add increments the counter by value.
func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) {
c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
}
// UpDownCounter wraps an OTel Int64UpDownCounter.
type UpDownCounter struct {
c metric.Int64UpDownCounter
}
// Add adjusts the up-down counter by value.
func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) {
u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
}
// Int64Gauge wraps an OTel Int64Gauge.
type Int64Gauge struct {
g metric.Int64Gauge
}
// Record sets the gauge to value.
func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) {
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
}
// Float64Gauge wraps an OTel Float64Gauge.
type Float64Gauge struct {
g metric.Float64Gauge
}
// Record sets the gauge to value.
func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) {
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
}
// Histogram wraps an OTel Float64Histogram.
type Histogram struct {
h metric.Float64Histogram
}
// Record observes value in the histogram.
func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) {
h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
}

View File

@@ -0,0 +1,141 @@
package otel_test
import (
"context"
"testing"
"time"
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
)
const (
defaultGRPCEndpoint = "localhost:4317"
defaultServiceName = "gerbil-test"
)
func newInMemoryBackend(t *testing.T) *obsotel.Backend {
t.Helper()
// Use a very short export interval; an in-process collector (noop exporter)
// is used by pointing to a non-existent endpoint with insecure mode.
// The backend itself should initialise without error since connection is lazy.
b, err := obsotel.New(obsotel.Config{
Protocol: "grpc",
Endpoint: defaultGRPCEndpoint,
Insecure: true,
ExportInterval: 100 * time.Millisecond,
ServiceName: defaultServiceName,
ServiceVersion: "0.0.1",
})
if err != nil {
t.Fatalf("failed to create otel backend: %v", err)
}
return b
}
func TestOtelBackendHTTPHandlerIsNil(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
if b.HTTPHandler() != nil {
t.Error("OTel backend HTTPHandler should return nil")
}
}
func TestOtelBackendShutdown(t *testing.T) {
b := newInMemoryBackend(t)
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
defer cancel()
if err := b.Shutdown(ctx); err != nil {
// Shutdown with unreachable collector may fail to flush; that's acceptable.
// What matters is that Shutdown does not panic.
t.Logf("Shutdown returned (expected with no collector): %v", err)
}
}
func TestOtelBackendCounter(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
c := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
// Should not panic
c.Add(context.Background(), 1, map[string]string{"result": "ok"})
c.Add(context.Background(), 5, nil)
}
func TestOtelBackendUpDownCounter(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
u.Add(context.Background(), 3, map[string]string{"state": "active"})
u.Add(context.Background(), -1, map[string]string{"state": "active"})
}
func TestOtelBackendInt64Gauge(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
g.Record(context.Background(), 42, nil)
}
func TestOtelBackendFloat64Gauge(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
g.Record(context.Background(), 3.14, nil)
}
func TestOtelBackendHistogram(t *testing.T) {
b := newInMemoryBackend(t)
defer b.Shutdown(context.Background()) //nolint:errcheck
h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
[]float64{0.1, 0.5, 1.0}, "method")
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
}
func TestOtelBackendHTTPProtocol(t *testing.T) {
b, err := obsotel.New(obsotel.Config{
Protocol: "http",
Endpoint: "localhost:4318",
Insecure: true,
ExportInterval: 100 * time.Millisecond,
ServiceName: defaultServiceName,
})
if err != nil {
t.Fatalf("failed to create otel http backend: %v", err)
}
defer b.Shutdown(context.Background()) //nolint:errcheck
if b.HTTPHandler() != nil {
t.Error("OTel HTTP backend should not expose a /metrics endpoint")
}
}
func TestOtelBackendInvalidProtocol(t *testing.T) {
_, err := obsotel.New(obsotel.Config{
Protocol: "tcp",
Endpoint: defaultGRPCEndpoint,
ExportInterval: 10 * time.Second,
})
if err == nil {
t.Error("expected error for invalid protocol")
}
}
func TestOtelBackendDeploymentEnvironment(t *testing.T) {
b, err := obsotel.New(obsotel.Config{
Protocol: "grpc",
Endpoint: defaultGRPCEndpoint,
Insecure: true,
ExportInterval: 100 * time.Millisecond,
ServiceName: defaultServiceName,
ServiceVersion: "1.2.3",
DeploymentEnvironment: "staging",
})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
defer b.Shutdown(context.Background()) //nolint:errcheck
}

View File

@@ -0,0 +1,50 @@
package otel
import (
"context"
"fmt"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
)
// newExporter creates the appropriate OTLP exporter based on cfg.Protocol.
func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
switch cfg.Protocol {
case "grpc", "":
return newGRPCExporter(ctx, cfg)
case "http":
return newHTTPExporter(ctx, cfg)
default:
return nil, fmt.Errorf("otel: unknown protocol %q (must be \"grpc\" or \"http\")", cfg.Protocol)
}
}
func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
opts := []otlpmetricgrpc.Option{
otlpmetricgrpc.WithEndpoint(cfg.Endpoint),
}
if cfg.Insecure {
opts = append(opts, otlpmetricgrpc.WithInsecure())
}
exp, err := otlpmetricgrpc.New(ctx, opts...)
if err != nil {
return nil, fmt.Errorf("otlp grpc exporter: %w", err)
}
return exp, nil
}
func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
opts := []otlpmetrichttp.Option{
otlpmetrichttp.WithEndpoint(cfg.Endpoint),
}
if cfg.Insecure {
opts = append(opts, otlpmetrichttp.WithInsecure())
}
exp, err := otlpmetrichttp.New(ctx, opts...)
if err != nil {
return nil, fmt.Errorf("otlp http exporter: %w", err)
}
return exp, nil
}

View File

@@ -0,0 +1,25 @@
package otel
import (
"go.opentelemetry.io/otel/attribute"
"go.opentelemetry.io/otel/sdk/resource"
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
)
// newResource builds an OTel resource for the Gerbil service.
func newResource(serviceName, serviceVersion, deploymentEnv string) (*resource.Resource, error) {
attrs := []attribute.KeyValue{
semconv.ServiceName(serviceName),
}
if serviceVersion != "" {
attrs = append(attrs, semconv.ServiceVersion(serviceVersion))
}
if deploymentEnv != "" {
attrs = append(attrs, semconv.DeploymentEnvironmentName(deploymentEnv))
}
return resource.Merge(
resource.Default(),
resource.NewWithAttributes(semconv.SchemaURL, attrs...),
)
}

View File

@@ -0,0 +1,185 @@
// Package prometheus implements the native Prometheus metrics backend for Gerbil.
//
// This backend uses the Prometheus Go client directly; it does NOT depend on the
// OpenTelemetry SDK. A dedicated Prometheus registry is used so that default
// Go/process metrics are not unintentionally included unless the caller opts in.
package prometheus
import (
"context"
"net/http"
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/collectors"
"github.com/prometheus/client_golang/prometheus/promhttp"
)
// Config holds Prometheus-backend configuration.
type Config struct {
// Path is the HTTP endpoint path (e.g. "/metrics").
Path string
// IncludeGoMetrics controls whether the standard Go runtime and process
// collectors are registered on the dedicated registry.
// Defaults to true if not explicitly set.
IncludeGoMetrics *bool
}
// Backend is the native Prometheus metrics backend.
// Metric instruments are created via the New* family of methods and stored
// in the backend-specific instrument types that implement the observability
// instrument interfaces.
type Backend struct {
cfg Config
registry *prometheus.Registry
handler http.Handler
}
// New creates and initialises a Prometheus backend.
//
// cfg.Path sets the HTTP endpoint path (defaults to "/metrics" if empty).
// cfg.IncludeGoMetrics controls whether standard Go runtime and process metrics
// are included; defaults to true when nil.
//
// Returns an error if the registry cannot be created.
func New(cfg Config) (*Backend, error) {
if cfg.Path == "" {
cfg.Path = "/metrics"
}
registry := prometheus.NewRegistry()
// Include Go and process metrics by default.
includeGo := cfg.IncludeGoMetrics == nil || *cfg.IncludeGoMetrics
if includeGo {
registry.MustRegister(
collectors.NewGoCollector(),
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
)
}
handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
EnableOpenMetrics: false,
})
return &Backend{cfg: cfg, registry: registry, handler: handler}, nil
}
// HTTPHandler returns the Prometheus /metrics HTTP handler.
func (b *Backend) HTTPHandler() http.Handler {
return b.handler
}
// Shutdown is a no-op for the Prometheus backend.
// The registry does not maintain background goroutines.
func (b *Backend) Shutdown(_ context.Context) error {
_ = b
return nil
}
// NewCounter creates a Prometheus CounterVec registered on the backend's registry.
func (b *Backend) NewCounter(name, desc string, labelNames ...string) *Counter {
vec := prometheus.NewCounterVec(prometheus.CounterOpts{
Name: name,
Help: desc,
}, labelNames)
b.registry.MustRegister(vec)
return &Counter{vec: vec}
}
// NewUpDownCounter creates a Prometheus GaugeVec (Prometheus gauges are
// bidirectional) registered on the backend's registry.
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) *UpDownCounter {
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: name,
Help: desc,
}, labelNames)
b.registry.MustRegister(vec)
return &UpDownCounter{vec: vec}
}
// NewInt64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) *Int64Gauge {
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: name,
Help: desc,
}, labelNames)
b.registry.MustRegister(vec)
return &Int64Gauge{vec: vec}
}
// NewFloat64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) *Float64Gauge {
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
Name: name,
Help: desc,
}, labelNames)
b.registry.MustRegister(vec)
return &Float64Gauge{vec: vec}
}
// NewHistogram creates a Prometheus HistogramVec registered on the backend's registry.
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) *Histogram {
vec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
Name: name,
Help: desc,
Buckets: buckets,
}, labelNames)
b.registry.MustRegister(vec)
return &Histogram{vec: vec}
}
// Counter is a native Prometheus counter instrument.
type Counter struct {
vec *prometheus.CounterVec
}
// Add increments the counter by value for the given labels.
//
// value must be non-negative. Negative values are ignored.
func (c *Counter) Add(_ context.Context, value int64, labels map[string]string) {
if value < 0 {
return
}
c.vec.With(prometheus.Labels(labels)).Add(float64(value))
}
// UpDownCounter is a native Prometheus gauge used as a bidirectional counter.
type UpDownCounter struct {
vec *prometheus.GaugeVec
}
// Add adjusts the gauge by value for the given labels.
func (u *UpDownCounter) Add(_ context.Context, value int64, labels map[string]string) {
u.vec.With(prometheus.Labels(labels)).Add(float64(value))
}
// Int64Gauge is a native Prometheus gauge recording integer snapshot values.
type Int64Gauge struct {
vec *prometheus.GaugeVec
}
// Record sets the gauge to value for the given labels.
func (g *Int64Gauge) Record(_ context.Context, value int64, labels map[string]string) {
g.vec.With(prometheus.Labels(labels)).Set(float64(value))
}
// Float64Gauge is a native Prometheus gauge recording float snapshot values.
type Float64Gauge struct {
vec *prometheus.GaugeVec
}
// Record sets the gauge to value for the given labels.
func (g *Float64Gauge) Record(_ context.Context, value float64, labels map[string]string) {
g.vec.With(prometheus.Labels(labels)).Set(value)
}
// Histogram is a native Prometheus histogram instrument.
type Histogram struct {
vec *prometheus.HistogramVec
}
// Record observes value for the given labels.
func (h *Histogram) Record(_ context.Context, value float64, labels map[string]string) {
h.vec.With(prometheus.Labels(labels)).Observe(value)
}

View File

@@ -0,0 +1,173 @@
package prometheus_test
import (
"context"
"io"
"net/http"
"net/http/httptest"
"strings"
"testing"
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
)
func newTestBackend(t *testing.T) *obsprom.Backend {
t.Helper()
b, err := obsprom.New(obsprom.Config{Path: "/metrics"})
if err != nil {
t.Fatalf("failed to create prometheus backend: %v", err)
}
return b
}
func TestPrometheusBackendHTTPHandler(t *testing.T) {
b := newTestBackend(t)
if b.HTTPHandler() == nil {
t.Error("HTTPHandler should not be nil")
}
}
func TestPrometheusBackendShutdown(t *testing.T) {
b := newTestBackend(t)
if err := b.Shutdown(context.Background()); err != nil {
t.Errorf("Shutdown returned error: %v", err)
}
}
func TestPrometheusBackendCounter(t *testing.T) {
b := newTestBackend(t)
c := b.NewCounter("test_counter_total", "A test counter", "result")
c.Add(context.Background(), 3, map[string]string{"result": "ok"})
body := scrapeMetrics(t, b)
assertMetricPresent(t, body, `test_counter_total{result="ok"} 3`)
}
func TestPrometheusBackendUpDownCounter(t *testing.T) {
b := newTestBackend(t)
u := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
u.Add(context.Background(), 5, map[string]string{"state": "active"})
u.Add(context.Background(), -2, map[string]string{"state": "active"})
body := scrapeMetrics(t, b)
assertMetricPresent(t, body, `test_gauge_total{state="active"} 3`)
}
func TestPrometheusBackendInt64Gauge(t *testing.T) {
b := newTestBackend(t)
g := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
g.Record(context.Background(), 42, map[string]string{"ifname": "wg0"})
body := scrapeMetrics(t, b)
assertMetricPresent(t, body, `test_int_gauge{ifname="wg0"} 42`)
}
func TestPrometheusBackendFloat64Gauge(t *testing.T) {
b := newTestBackend(t)
g := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
g.Record(context.Background(), 7.5, map[string]string{"cert": "example.com"})
body := scrapeMetrics(t, b)
assertMetricPresent(t, body, `test_float_gauge{cert="example.com"} 7.5`)
}
func TestPrometheusBackendHistogram(t *testing.T) {
b := newTestBackend(t)
buckets := []float64{0.1, 0.5, 1.0, 5.0}
h := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
body := scrapeMetrics(t, b)
if !strings.Contains(body, "test_duration_seconds") {
t.Errorf("expected histogram metric in output, body:\n%s", body)
}
}
func TestPrometheusBackendMultipleLabels(t *testing.T) {
b := newTestBackend(t)
c := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
c.Add(context.Background(), 1, map[string]string{
"method": "POST",
"route": "/api/peers",
"status_code": "200",
})
body := scrapeMetrics(t, b)
if !strings.Contains(body, "multi_label_total") {
t.Errorf("expected multi_label_total in output, body:\n%s", body)
}
}
func TestPrometheusBackendGoMetrics(t *testing.T) {
b := newTestBackend(t)
body := scrapeMetrics(t, b)
// Default backend includes Go runtime metrics.
if !strings.Contains(body, "go_goroutines") {
t.Error("expected go_goroutines in default backend output")
}
}
func TestPrometheusBackendNoGoMetrics(t *testing.T) {
f := false
b, err := obsprom.New(obsprom.Config{IncludeGoMetrics: &f})
if err != nil {
t.Fatalf("unexpected error: %v", err)
}
body := scrapeMetrics(t, b)
if strings.Contains(body, "go_goroutines") {
t.Error("expected no go_goroutines when IncludeGoMetrics=false")
}
}
func TestPrometheusBackendNilLabels(t *testing.T) {
// Adding with nil labels should not panic (treated as empty map).
b := newTestBackend(t)
c := b.NewCounter("nil_labels_total", "counter with no labels")
// nil labels with no label names declared should be safe
c.Add(context.Background(), 1, nil)
}
func TestPrometheusBackendConcurrentAdd(t *testing.T) {
b := newTestBackend(t)
c := b.NewCounter("concurrent_total", "concurrent counter", "worker")
done := make(chan struct{})
for i := 0; i < 10; i++ {
go func(_ int) {
for j := 0; j < 100; j++ {
c.Add(context.Background(), 1, map[string]string{"worker": "w"})
}
done <- struct{}{}
}(i)
}
for i := 0; i < 10; i++ {
<-done
}
body := scrapeMetrics(t, b)
assertMetricPresent(t, body, `concurrent_total{worker="w"} 1000`)
}
// --- helpers ---
func scrapeMetrics(t *testing.T, b *obsprom.Backend) string {
t.Helper()
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
rr := httptest.NewRecorder()
b.HTTPHandler().ServeHTTP(rr, req)
if rr.Code != http.StatusOK {
t.Fatalf("metrics handler returned %d", rr.Code)
}
body, err := io.ReadAll(rr.Body)
if err != nil {
t.Fatalf("failed to read response body: %v", err)
}
return string(body)
}
func assertMetricPresent(t *testing.T, body, expected string) {
t.Helper()
if !strings.Contains(body, expected) {
t.Errorf("expected %q in metrics output\nbody:\n%s", expected, body)
}
}

1312
main.go

File diff suppressed because it is too large Load Diff

901
proxy/proxy.go Normal file
View File

@@ -0,0 +1,901 @@
package proxy
import (
"bytes"
"context"
"crypto/tls"
"encoding/json"
"fmt"
"hash/fnv"
"io"
"log"
"net"
"net/http"
"strconv"
"strings"
"sync"
"time"
"github.com/fosrl/gerbil/internal/metrics"
"github.com/fosrl/gerbil/logger"
"github.com/patrickmn/go-cache"
)
// RouteRecord represents a routing configuration
type RouteRecord struct {
Hostname string
TargetHost string
TargetPort int
}
// RouteAPIResponse represents the response from the route API
type RouteAPIResponse struct {
Endpoints []string `json:"endpoints"`
}
// ProxyProtocolInfo holds information parsed from incoming PROXY protocol header
type ProxyProtocolInfo struct {
Protocol string // TCP4 or TCP6
SrcIP string
DestIP string
SrcPort int
DestPort int
OriginalConn net.Conn // The original connection after PROXY protocol parsing
}
// SNIProxy represents the main proxy server
type SNIProxy struct {
port int
cache *cache.Cache
listener net.Listener
ctx context.Context
cancel context.CancelFunc
wg sync.WaitGroup
localProxyAddr string
localProxyPort int
remoteConfigURL string
publicKey string
proxyProtocol bool // Enable PROXY protocol v1
// New fields for fast local SNI lookup
localSNIs map[string]struct{}
localSNIsLock sync.RWMutex
// Local overrides for domains that should always use local proxy
localOverrides map[string]struct{}
// Track active tunnels by SNI
activeTunnels map[string]*activeTunnel
activeTunnelsLock sync.Mutex
// Trusted upstream proxies that can send PROXY protocol
trustedUpstreams map[string]struct{}
// Reusable HTTP client for API requests
httpClient *http.Client
// Buffer pool for connection piping
bufferPool *sync.Pool
}
type activeTunnel struct {
conns []net.Conn
}
// readOnlyConn is a wrapper for io.Reader that implements net.Conn
type readOnlyConn struct {
reader io.Reader
}
func (conn readOnlyConn) Read(p []byte) (int, error) { return conn.reader.Read(p) }
func (conn readOnlyConn) Write(p []byte) (int, error) { return 0, io.ErrClosedPipe }
func (conn readOnlyConn) Close() error { return nil }
func (conn readOnlyConn) LocalAddr() net.Addr { return nil }
func (conn readOnlyConn) RemoteAddr() net.Addr { return nil }
func (conn readOnlyConn) SetDeadline(t time.Time) error { return nil }
func (conn readOnlyConn) SetReadDeadline(t time.Time) error { return nil }
func (conn readOnlyConn) SetWriteDeadline(t time.Time) error { return nil }
// parseProxyProtocolHeader parses a PROXY protocol v1 header from the connection
func (p *SNIProxy) parseProxyProtocolHeader(conn net.Conn) (*ProxyProtocolInfo, net.Conn, error) {
// Check if the connection comes from a trusted upstream
remoteHost, _, err := net.SplitHostPort(conn.RemoteAddr().String())
if err != nil {
return nil, conn, fmt.Errorf("failed to parse remote address: %w", err)
}
// Resolve the remote IP to hostname to check if it's trusted
// For simplicity, we'll check the IP directly in trusted upstreams
// In production, you might want to do reverse DNS lookup
if _, isTrusted := p.trustedUpstreams[remoteHost]; !isTrusted {
// Not from trusted upstream, return original connection
return nil, conn, nil
}
// Set read timeout for PROXY protocol parsing
if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
return nil, conn, fmt.Errorf("failed to set read deadline: %w", err)
}
// Read the first line (PROXY protocol header)
buffer := make([]byte, 512) // PROXY protocol header should be much smaller
n, err := conn.Read(buffer)
if err != nil {
// If we can't read from trusted upstream, treat as regular connection
logger.Debug("Could not read from trusted upstream %s, treating as regular connection: %v", remoteHost, err)
// Clear read timeout before returning
if clearErr := conn.SetReadDeadline(time.Time{}); clearErr != nil {
logger.Debug("Failed to clear read deadline: %v", clearErr)
}
return nil, conn, nil
}
// Find the end of the first line (CRLF)
headerEnd := bytes.Index(buffer[:n], []byte("\r\n"))
if headerEnd == -1 {
// No PROXY protocol header found, treat as regular TLS connection
// Return the connection with the buffered data prepended
logger.Debug("No PROXY protocol header from trusted upstream %s, treating as regular TLS connection", remoteHost)
// Clear read timeout
if err := conn.SetReadDeadline(time.Time{}); err != nil {
logger.Debug("Failed to clear read deadline: %v", err)
}
// Create a reader that includes the buffered data + original connection
newReader := io.MultiReader(bytes.NewReader(buffer[:n]), conn)
wrappedConn := &proxyProtocolConn{
Conn: conn,
reader: newReader,
}
return nil, wrappedConn, nil
}
headerLine := string(buffer[:headerEnd])
remainingData := buffer[headerEnd+2 : n]
// Parse PROXY protocol line: "PROXY TCP4/TCP6 srcIP destIP srcPort destPort"
parts := strings.Fields(headerLine)
if len(parts) != 6 || parts[0] != "PROXY" {
// Check for PROXY UNKNOWN
if len(parts) == 2 && parts[0] == "PROXY" && parts[1] == "UNKNOWN" {
// PROXY UNKNOWN - use original connection info
return nil, conn, nil
}
// Invalid PROXY protocol, but might be regular TLS - treat as such
logger.Debug("Invalid PROXY protocol from trusted upstream %s, treating as regular TLS connection: %s", remoteHost, headerLine)
// Clear read timeout
if err := conn.SetReadDeadline(time.Time{}); err != nil {
logger.Debug("Failed to clear read deadline: %v", err)
}
// Return the connection with all buffered data prepended
newReader := io.MultiReader(bytes.NewReader(buffer[:n]), conn)
wrappedConn := &proxyProtocolConn{
Conn: conn,
reader: newReader,
}
return nil, wrappedConn, nil
}
protocol := parts[1]
srcIP := parts[2]
destIP := parts[3]
srcPort, err := strconv.Atoi(parts[4])
if err != nil {
return nil, conn, fmt.Errorf("invalid source port in PROXY header: %s", parts[4])
}
destPort, err := strconv.Atoi(parts[5])
if err != nil {
return nil, conn, fmt.Errorf("invalid destination port in PROXY header: %s", parts[5])
}
// Create a new reader that includes remaining data + original connection
var newReader io.Reader
if len(remainingData) > 0 {
newReader = io.MultiReader(bytes.NewReader(remainingData), conn)
} else {
newReader = conn
}
// Create a wrapper connection that reads from the combined reader
wrappedConn := &proxyProtocolConn{
Conn: conn,
reader: newReader,
}
proxyInfo := &ProxyProtocolInfo{
Protocol: protocol,
SrcIP: srcIP,
DestIP: destIP,
SrcPort: srcPort,
DestPort: destPort,
OriginalConn: wrappedConn,
}
// Clear read timeout
if err := conn.SetReadDeadline(time.Time{}); err != nil {
return nil, conn, fmt.Errorf("failed to clear read deadline: %w", err)
}
return proxyInfo, wrappedConn, nil
}
// proxyProtocolConn wraps a connection to read from a custom reader
type proxyProtocolConn struct {
net.Conn
reader io.Reader
}
func (c *proxyProtocolConn) Read(b []byte) (int, error) {
return c.reader.Read(b)
}
// buildProxyProtocolHeaderFromInfo creates a PROXY protocol v1 header using ProxyProtocolInfo
func (p *SNIProxy) buildProxyProtocolHeaderFromInfo(proxyInfo *ProxyProtocolInfo, targetAddr net.Addr) string {
targetTCP, ok := targetAddr.(*net.TCPAddr)
if !ok {
// Fallback for unknown address types
return "PROXY UNKNOWN\r\n"
}
// Use the original client information from the PROXY protocol
var targetIP string
var protocol string
// Parse source IP to determine protocol family
srcIP := net.ParseIP(proxyInfo.SrcIP)
if srcIP == nil {
return "PROXY UNKNOWN\r\n"
}
if srcIP.To4() != nil {
// Source is IPv4, use TCP4 protocol
protocol = "TCP4"
if targetTCP.IP.To4() != nil {
// Target is also IPv4, use as-is
targetIP = targetTCP.IP.String()
} else {
// Target is IPv6, but we need IPv4 for consistent protocol family
if targetTCP.IP.IsLoopback() {
targetIP = "127.0.0.1"
} else {
targetIP = "127.0.0.1" // Safe fallback
}
}
} else {
// Source is IPv6, use TCP6 protocol
protocol = "TCP6"
if targetTCP.IP.To4() != nil {
// Target is IPv4, convert to IPv6 representation
targetIP = "::ffff:" + targetTCP.IP.String()
} else {
// Target is also IPv6, use as-is
targetIP = targetTCP.IP.String()
}
}
return fmt.Sprintf("PROXY %s %s %s %d %d\r\n",
protocol,
proxyInfo.SrcIP,
targetIP,
proxyInfo.SrcPort,
targetTCP.Port)
}
// buildProxyProtocolHeader creates a PROXY protocol v1 header
func buildProxyProtocolHeader(clientAddr, targetAddr net.Addr) string {
clientTCP, ok := clientAddr.(*net.TCPAddr)
if !ok {
// Fallback for unknown address types
return "PROXY UNKNOWN\r\n"
}
targetTCP, ok := targetAddr.(*net.TCPAddr)
if !ok {
// Fallback for unknown address types
return "PROXY UNKNOWN\r\n"
}
// Determine protocol family based on client IP and normalize target IP accordingly
var protocol string
var targetIP string
if clientTCP.IP.To4() != nil {
// Client is IPv4, use TCP4 protocol
protocol = "TCP4"
if targetTCP.IP.To4() != nil {
// Target is also IPv4, use as-is
targetIP = targetTCP.IP.String()
} else {
// Target is IPv6, but we need IPv4 for consistent protocol family
// Use the IPv4 loopback if target is IPv6 loopback, otherwise use 127.0.0.1
if targetTCP.IP.IsLoopback() {
targetIP = "127.0.0.1"
} else {
// For non-loopback IPv6 targets, we could try to extract embedded IPv4
// or fall back to a sensible IPv4 address based on the target
targetIP = "127.0.0.1" // Safe fallback
}
}
} else {
// Client is IPv6, use TCP6 protocol
protocol = "TCP6"
if targetTCP.IP.To4() != nil {
// Target is IPv4, convert to IPv6 representation
targetIP = "::ffff:" + targetTCP.IP.String()
} else {
// Target is also IPv6, use as-is
targetIP = targetTCP.IP.String()
}
}
return fmt.Sprintf("PROXY %s %s %s %d %d\r\n",
protocol,
clientTCP.IP.String(),
targetIP,
clientTCP.Port,
targetTCP.Port)
}
// NewSNIProxy creates a new SNI proxy instance
func NewSNIProxy(port int, remoteConfigURL, publicKey, localProxyAddr string, localProxyPort int, localOverrides []string, proxyProtocol bool, trustedUpstreams []string) (*SNIProxy, error) {
ctx, cancel := context.WithCancel(context.Background())
// Create local overrides map
overridesMap := make(map[string]struct{})
for _, domain := range localOverrides {
if domain != "" {
overridesMap[domain] = struct{}{}
}
}
// Create trusted upstreams map
trustedMap := make(map[string]struct{})
for _, upstream := range trustedUpstreams {
if upstream != "" {
// Add both the domain and potentially resolved IPs
trustedMap[upstream] = struct{}{}
// Try to resolve the domain to IPs and add them too
if ips, err := net.LookupIP(upstream); err == nil {
for _, ip := range ips {
trustedMap[ip.String()] = struct{}{}
}
}
}
}
proxy := &SNIProxy{
port: port,
cache: cache.New(3*time.Second, 10*time.Minute),
ctx: ctx,
cancel: cancel,
localProxyAddr: localProxyAddr,
localProxyPort: localProxyPort,
remoteConfigURL: remoteConfigURL,
publicKey: publicKey,
proxyProtocol: proxyProtocol,
localSNIs: make(map[string]struct{}),
localOverrides: overridesMap,
activeTunnels: make(map[string]*activeTunnel),
trustedUpstreams: trustedMap,
httpClient: &http.Client{
Timeout: 5 * time.Second,
Transport: &http.Transport{
MaxIdleConns: 100,
MaxIdleConnsPerHost: 10,
IdleConnTimeout: 90 * time.Second,
},
},
bufferPool: &sync.Pool{
New: func() interface{} {
buf := make([]byte, 32*1024)
return &buf
},
},
}
return proxy, nil
}
// Start begins listening for connections
func (p *SNIProxy) Start() error {
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", p.port))
if err != nil {
return fmt.Errorf("failed to listen on port %d: %w", p.port, err)
}
p.listener = listener
logger.Debug("SNI Proxy listening on port %d", p.port)
// Accept connections in a goroutine
go p.acceptConnections()
return nil
}
// Stop gracefully shuts down the proxy
func (p *SNIProxy) Stop() error {
log.Println("Stopping SNI Proxy...")
p.cancel()
if p.listener != nil {
p.listener.Close()
}
// Wait for all goroutines to finish with timeout
done := make(chan struct{})
go func() {
p.wg.Wait()
close(done)
}()
select {
case <-done:
log.Println("All connections closed gracefully")
case <-time.After(30 * time.Second):
log.Println("Timeout waiting for connections to close")
}
log.Println("SNI Proxy stopped")
return nil
}
// acceptConnections handles incoming connections
func (p *SNIProxy) acceptConnections() {
for {
conn, err := p.listener.Accept()
if err != nil {
select {
case <-p.ctx.Done():
return
default:
logger.Debug("Accept error: %v", err)
continue
}
}
p.wg.Add(1)
go p.handleConnection(conn)
}
}
// readClientHello reads and parses the TLS ClientHello message
func (p *SNIProxy) readClientHello(reader io.Reader) (*tls.ClientHelloInfo, error) {
var hello *tls.ClientHelloInfo
err := tls.Server(readOnlyConn{reader: reader}, &tls.Config{
GetConfigForClient: func(argHello *tls.ClientHelloInfo) (*tls.Config, error) {
hello = new(tls.ClientHelloInfo)
*hello = *argHello
return nil, nil
},
}).Handshake()
if hello == nil {
return nil, err
}
return hello, nil
}
// peekClientHello reads the ClientHello while preserving the data for forwarding
func (p *SNIProxy) peekClientHello(reader io.Reader) (*tls.ClientHelloInfo, io.Reader, error) {
peekedBytes := new(bytes.Buffer)
hello, err := p.readClientHello(io.TeeReader(reader, peekedBytes))
if err != nil {
return nil, nil, err
}
return hello, io.MultiReader(peekedBytes, reader), nil
}
// extractSNI extracts the SNI hostname from the TLS ClientHello
func (p *SNIProxy) extractSNI(conn net.Conn) (string, io.Reader, error) {
clientHello, clientReader, err := p.peekClientHello(conn)
if err != nil {
return "", nil, fmt.Errorf("failed to peek ClientHello: %w", err)
}
if clientHello.ServerName == "" {
return "", clientReader, fmt.Errorf("no SNI hostname found in ClientHello")
}
return clientHello.ServerName, clientReader, nil
}
// handleConnection processes a single client connection
func (p *SNIProxy) handleConnection(clientConn net.Conn) {
defer p.wg.Done()
defer clientConn.Close()
metrics.RecordSNIConnection("accepted")
logger.Debug("Accepted connection from %s", clientConn.RemoteAddr())
// Check for PROXY protocol from trusted upstream
var proxyInfo *ProxyProtocolInfo
var actualClientConn net.Conn = clientConn
if len(p.trustedUpstreams) > 0 {
var err error
proxyInfo, actualClientConn, err = p.parseProxyProtocolHeader(clientConn)
if err != nil {
metrics.RecordSNIProxyProtocolParseError()
logger.Debug("Failed to parse PROXY protocol: %v", err)
return
}
if proxyInfo != nil {
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
logger.Debug("Received PROXY protocol from trusted upstream: %s:%d -> %s:%d",
proxyInfo.SrcIP, proxyInfo.SrcPort, proxyInfo.DestIP, proxyInfo.DestPort)
} else {
// No PROXY protocol detected, but connection is from trusted upstream
// This is fine - treat as regular connection
logger.Debug("No PROXY protocol detected from trusted upstream, treating as regular connection")
}
}
// Set read timeout for SNI extraction
if err := actualClientConn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
logger.Debug("Failed to set read deadline: %v", err)
return
}
// Extract SNI hostname
clientHelloStart := time.Now()
hostname, clientReader, err := p.extractSNI(actualClientConn)
if err != nil {
logger.Debug("SNI extraction failed: %v", err)
return
}
metrics.RecordProxyTLSHandshake(hostname, time.Since(clientHelloStart).Seconds())
if hostname == "" {
log.Println("No SNI hostname found")
return
}
logger.Debug("SNI hostname detected: %s", hostname)
// Remove read timeout for normal operation
if err := actualClientConn.SetReadDeadline(time.Time{}); err != nil {
logger.Debug("Failed to clear read deadline: %v", err)
return
}
// Get routing information - use original client address if available from PROXY protocol
var clientAddrStr string
if proxyInfo != nil {
clientAddrStr = fmt.Sprintf("%s:%d", proxyInfo.SrcIP, proxyInfo.SrcPort)
} else {
clientAddrStr = clientConn.RemoteAddr().String()
}
route, err := p.getRoute(hostname, clientAddrStr)
if err != nil {
logger.Debug("Failed to get route for %s: %v", hostname, err)
return
}
if route == nil {
logger.Debug("No route found for hostname: %s", hostname)
return
}
logger.Debug("Routing %s to %s:%d", hostname, route.TargetHost, route.TargetPort)
// Connect to target server
targetConn, err := net.DialTimeout("tcp",
fmt.Sprintf("%s:%d", route.TargetHost, route.TargetPort),
10*time.Second)
if err != nil {
logger.Debug("Failed to connect to target %s:%d: %v",
route.TargetHost, route.TargetPort, err)
return
}
defer targetConn.Close()
logger.Debug("Connected to target: %s:%d", route.TargetHost, route.TargetPort)
metrics.RecordActiveProxyConnection(hostname, 1)
defer metrics.RecordActiveProxyConnection(hostname, -1)
// Send PROXY protocol header if enabled
if p.proxyProtocol {
var proxyHeader string
if proxyInfo != nil {
// Use original client info from PROXY protocol
proxyHeader = p.buildProxyProtocolHeaderFromInfo(proxyInfo, targetConn.LocalAddr())
} else {
// Use direct client connection info
proxyHeader = buildProxyProtocolHeader(clientConn.RemoteAddr(), targetConn.LocalAddr())
}
logger.Debug("Sending PROXY protocol header: %s", strings.TrimSpace(proxyHeader))
if _, err := targetConn.Write([]byte(proxyHeader)); err != nil {
logger.Debug("Failed to send PROXY protocol header: %v", err)
return
}
}
// Track this tunnel by SNI
p.activeTunnelsLock.Lock()
tunnel, ok := p.activeTunnels[hostname]
if !ok {
tunnel = &activeTunnel{}
p.activeTunnels[hostname] = tunnel
}
tunnel.conns = append(tunnel.conns, actualClientConn)
p.activeTunnelsLock.Unlock()
defer func() {
// Remove this conn from active tunnels
p.activeTunnelsLock.Lock()
if tunnel, ok := p.activeTunnels[hostname]; ok {
newConns := make([]net.Conn, 0, len(tunnel.conns))
for _, c := range tunnel.conns {
if c != actualClientConn {
newConns = append(newConns, c)
}
}
if len(newConns) == 0 {
delete(p.activeTunnels, hostname)
} else {
tunnel.conns = newConns
}
}
p.activeTunnelsLock.Unlock()
}()
// Start bidirectional data transfer
p.pipe(hostname, actualClientConn, targetConn, clientReader)
}
// getRoute retrieves routing information for a hostname
func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
// Check local overrides first
if _, isOverride := p.localOverrides[hostname]; isOverride {
logger.Debug("Local override matched for hostname: %s", hostname)
metrics.RecordProxyRouteLookup("local_override", hostname)
return &RouteRecord{
Hostname: hostname,
TargetHost: p.localProxyAddr,
TargetPort: p.localProxyPort,
}, nil
}
// Fast path: check if hostname is in localSNIs
p.localSNIsLock.RLock()
_, isLocal := p.localSNIs[hostname]
p.localSNIsLock.RUnlock()
if isLocal {
metrics.RecordProxyRouteLookup("local", hostname)
return &RouteRecord{
Hostname: hostname,
TargetHost: p.localProxyAddr,
TargetPort: p.localProxyPort,
}, nil
}
// Check cache first
if cached, found := p.cache.Get(hostname); found {
if cached == nil {
metrics.RecordProxyRouteLookup("cached_not_found", hostname)
return nil, nil // Cached negative result
}
logger.Debug("Cache hit for hostname: %s", hostname)
metrics.RecordProxyRouteLookup("cache_hit", hostname)
return cached.(*RouteRecord), nil
}
logger.Debug("Cache miss for hostname: %s, querying API", hostname)
metrics.RecordProxyRouteLookup("cache_miss", hostname)
// Query API with timeout
ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
defer cancel()
// Construct API URL (without hostname in path)
apiURL := fmt.Sprintf("%s/gerbil/get-resolved-hostname", p.remoteConfigURL)
// Create request body with hostname and public key
requestBody := map[string]string{
"hostname": hostname,
"publicKey": p.publicKey,
}
jsonBody, err := json.Marshal(requestBody)
if err != nil {
return nil, fmt.Errorf("failed to marshal request body: %w", err)
}
// Create HTTP request
req, err := http.NewRequestWithContext(ctx, "POST", apiURL, bytes.NewBuffer(jsonBody))
if err != nil {
return nil, fmt.Errorf("failed to create request: %w", err)
}
req.Header.Set("Content-Type", "application/json")
// Make HTTP request
apiStart := time.Now()
// Make HTTP request using reusable client
resp, err := p.httpClient.Do(req)
if err != nil {
metrics.RecordSNIRouteAPIRequest("error")
return nil, fmt.Errorf("API request failed: %w", err)
}
defer resp.Body.Close()
metrics.RecordSNIRouteAPILatency(time.Since(apiStart).Seconds())
if resp.StatusCode == http.StatusNotFound {
metrics.RecordSNIRouteAPIRequest("not_found")
// Cache negative result for shorter time (1 minute)
p.cache.Set(hostname, nil, 1*time.Minute)
return nil, nil
}
if resp.StatusCode != http.StatusOK {
metrics.RecordSNIRouteAPIRequest("error")
return nil, fmt.Errorf("API returned status %d", resp.StatusCode)
}
metrics.RecordSNIRouteAPIRequest("success")
// Parse response
var apiResponse RouteAPIResponse
if err := json.NewDecoder(resp.Body).Decode(&apiResponse); err != nil {
return nil, fmt.Errorf("failed to decode API response: %w", err)
}
endpoints := apiResponse.Endpoints
// Default target configuration
targetHost := p.localProxyAddr
targetPort := p.localProxyPort
// If no endpoints returned, use local node
if len(endpoints) == 0 {
logger.Debug("No endpoints returned for hostname: %s, using local node", hostname)
} else {
// Select endpoint using consistent hashing for stickiness
selectedEndpoint := p.selectStickyEndpoint(clientAddr, endpoints)
targetHost = selectedEndpoint
targetPort = 443 // Default HTTPS port
logger.Debug("Selected endpoint %s for hostname %s from client %s", selectedEndpoint, hostname, clientAddr)
}
route := &RouteRecord{
Hostname: hostname,
TargetHost: targetHost,
TargetPort: targetPort,
}
// Cache the result
p.cache.Set(hostname, route, cache.DefaultExpiration)
logger.Debug("Cached route for hostname: %s", hostname)
return route, nil
}
// selectStickyEndpoint selects an endpoint using consistent hashing to ensure
// the same client always routes to the same endpoint for load balancing
func (p *SNIProxy) selectStickyEndpoint(clientAddr string, endpoints []string) string {
if len(endpoints) == 0 {
return p.localProxyAddr
}
if len(endpoints) == 1 {
return endpoints[0]
}
// Use FNV hash for consistent selection based on client address
hash := fnv.New32a()
hash.Write([]byte(clientAddr))
index := hash.Sum32() % uint32(len(endpoints))
return endpoints[index]
}
// pipe handles bidirectional data transfer between connections
func (p *SNIProxy) pipe(hostname string, clientConn, targetConn net.Conn, clientReader io.Reader) {
var wg sync.WaitGroup
wg.Add(2)
// closeOnce ensures we only close connections once
var closeOnce sync.Once
closeConns := func() {
closeOnce.Do(func() {
// Close both connections to unblock any pending reads
clientConn.Close()
targetConn.Close()
})
}
// Copy data from client to target (using the buffered reader)
go func() {
defer wg.Done()
defer closeConns()
// Get buffer from pool and return when done
bufPtr := p.bufferPool.Get().(*[]byte)
defer func() {
// Clear buffer before returning to pool to prevent data leakage
clear(*bufPtr)
p.bufferPool.Put(bufPtr)
}()
bytesCopied, err := io.CopyBuffer(targetConn, clientReader, *bufPtr)
metrics.RecordProxyBytesTransmitted(hostname, "client_to_target", bytesCopied)
if err != nil && err != io.EOF {
logger.Debug("Copy client->target error: %v", err)
}
}()
// Copy data from target to client
go func() {
defer wg.Done()
defer closeConns()
// Get buffer from pool and return when done
bufPtr := p.bufferPool.Get().(*[]byte)
defer func() {
// Clear buffer before returning to pool to prevent data leakage
clear(*bufPtr)
p.bufferPool.Put(bufPtr)
}()
bytesCopied, err := io.CopyBuffer(clientConn, targetConn, *bufPtr)
metrics.RecordProxyBytesTransmitted(hostname, "target_to_client", bytesCopied)
if err != nil && err != io.EOF {
logger.Debug("Copy target->client error: %v", err)
}
}()
wg.Wait()
}
// GetCacheStats returns cache statistics
func (p *SNIProxy) GetCacheStats() (int, int) {
return p.cache.ItemCount(), len(p.cache.Items())
}
// ClearCache clears all cached entries
func (p *SNIProxy) ClearCache() {
p.cache.Flush()
log.Println("Cache cleared")
}
// UpdateLocalSNIs updates the local SNIs and invalidates cache for changed domains
func (p *SNIProxy) UpdateLocalSNIs(fullDomains []string) {
newSNIs := make(map[string]struct{})
for _, domain := range fullDomains {
newSNIs[domain] = struct{}{}
// Invalidate any cached route for this domain
p.cache.Delete(domain)
}
// Update localSNIs
p.localSNIsLock.Lock()
removed := make([]string, 0)
for sni := range p.localSNIs {
if _, stillLocal := newSNIs[sni]; !stillLocal {
removed = append(removed, sni)
}
}
p.localSNIs = newSNIs
p.localSNIsLock.Unlock()
logger.Debug("Updated local SNIs, added %d, removed %d", len(newSNIs), len(removed))
// Terminate tunnels for removed SNIs
if len(removed) > 0 {
p.activeTunnelsLock.Lock()
for _, sni := range removed {
if tunnels, ok := p.activeTunnels[sni]; ok {
for _, conn := range tunnels.conns {
conn.Close()
}
delete(p.activeTunnels, sni)
logger.Debug("Closed tunnels for SNI target change: %s", sni)
}
}
p.activeTunnelsLock.Unlock()
}
}

119
proxy/proxy_test.go Normal file
View File

@@ -0,0 +1,119 @@
package proxy
import (
"net"
"testing"
)
func TestBuildProxyProtocolHeader(t *testing.T) {
tests := []struct {
name string
clientAddr string
targetAddr string
expected string
}{
{
name: "IPv4 client and target",
clientAddr: "192.168.1.100:12345",
targetAddr: "10.0.0.1:443",
expected: "PROXY TCP4 192.168.1.100 10.0.0.1 12345 443\r\n",
},
{
name: "IPv6 client and target",
clientAddr: "[2001:db8::1]:12345",
targetAddr: "[2001:db8::2]:443",
expected: "PROXY TCP6 2001:db8::1 2001:db8::2 12345 443\r\n",
},
{
name: "IPv4 client with IPv6 loopback target",
clientAddr: "192.168.1.100:12345",
targetAddr: "[::1]:443",
expected: "PROXY TCP4 192.168.1.100 127.0.0.1 12345 443\r\n",
},
{
name: "IPv4 client with IPv6 target",
clientAddr: "192.168.1.100:12345",
targetAddr: "[2001:db8::2]:443",
expected: "PROXY TCP4 192.168.1.100 127.0.0.1 12345 443\r\n",
},
{
name: "IPv6 client with IPv4 target",
clientAddr: "[2001:db8::1]:12345",
targetAddr: "10.0.0.1:443",
expected: "PROXY TCP6 2001:db8::1 ::ffff:10.0.0.1 12345 443\r\n",
},
}
for _, tt := range tests {
t.Run(tt.name, func(t *testing.T) {
clientTCP, err := net.ResolveTCPAddr("tcp", tt.clientAddr)
if err != nil {
t.Fatalf("Failed to resolve client address: %v", err)
}
targetTCP, err := net.ResolveTCPAddr("tcp", tt.targetAddr)
if err != nil {
t.Fatalf("Failed to resolve target address: %v", err)
}
result := buildProxyProtocolHeader(clientTCP, targetTCP)
if result != tt.expected {
t.Errorf("Expected %q, got %q", tt.expected, result)
}
})
}
}
func TestBuildProxyProtocolHeaderUnknownType(t *testing.T) {
// Test with non-TCP address type
clientAddr := &net.UDPAddr{IP: net.ParseIP("192.168.1.100"), Port: 12345}
targetAddr := &net.UDPAddr{IP: net.ParseIP("10.0.0.1"), Port: 443}
result := buildProxyProtocolHeader(clientAddr, targetAddr)
expected := "PROXY UNKNOWN\r\n"
if result != expected {
t.Errorf("Expected %q, got %q", expected, result)
}
}
func TestBuildProxyProtocolHeaderFromInfo(t *testing.T) {
proxy, err := NewSNIProxy(8443, "", "", "127.0.0.1", 443, nil, true, nil)
if err != nil {
t.Fatalf("Failed to create SNI proxy: %v", err)
}
// Test IPv4 case
proxyInfo := &ProxyProtocolInfo{
Protocol: "TCP4",
SrcIP: "10.0.0.1",
DestIP: "192.168.1.100",
SrcPort: 12345,
DestPort: 443,
}
targetAddr, _ := net.ResolveTCPAddr("tcp", "127.0.0.1:8080")
header := proxy.buildProxyProtocolHeaderFromInfo(proxyInfo, targetAddr)
expected := "PROXY TCP4 10.0.0.1 127.0.0.1 12345 8080\r\n"
if header != expected {
t.Errorf("Expected header '%s', got '%s'", expected, header)
}
// Test IPv6 case
proxyInfo = &ProxyProtocolInfo{
Protocol: "TCP6",
SrcIP: "2001:db8::1",
DestIP: "2001:db8::2",
SrcPort: 12345,
DestPort: 443,
}
targetAddr, _ = net.ResolveTCPAddr("tcp6", "[::1]:8080")
header = proxy.buildProxyProtocolHeaderFromInfo(proxyInfo, targetAddr)
expected = "PROXY TCP6 2001:db8::1 ::1 12345 8080\r\n"
if header != expected {
t.Errorf("Expected header '%s', got '%s'", expected, header)
}
}

1251
relay/relay.go Normal file

File diff suppressed because it is too large Load Diff