mirror of
https://github.com/fosrl/gerbil.git
synced 2026-04-28 18:39:22 -05:00
Compare commits
190 Commits
1.0.0-beta
...
dev
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b642df3e1e | ||
|
|
6c91e99497 | ||
|
|
58415dee7e | ||
|
|
c3ed355127 | ||
|
|
c6e1881e6a | ||
|
|
eedd813e2f | ||
|
|
3cf2ccdc54 | ||
|
|
726b6b171c | ||
|
|
037618acbc | ||
|
|
1a6bc81ddd | ||
|
|
a3dbdef7cc | ||
|
|
f07c83fde4 | ||
|
|
652d9c5c68 | ||
|
|
e47a57cb4f | ||
|
|
4357ddf64b | ||
|
|
f322b4c921 | ||
|
|
56f72d6643 | ||
|
|
367e5bfa08 | ||
|
|
aeb8b7c56f | ||
|
|
f5c77d7df8 | ||
|
|
a37aadddb5 | ||
|
|
80747bf98b | ||
|
|
69418a439c | ||
|
|
d065897c4d | ||
|
|
b57574cc4b | ||
|
|
a3862260c9 | ||
|
|
40da38708c | ||
|
|
3af64d8bd3 | ||
|
|
fcead8cc15 | ||
|
|
20dad7bb8e | ||
|
|
a955aa6169 | ||
|
|
c7d9c72f29 | ||
|
|
abc744c647 | ||
|
|
b118fef265 | ||
|
|
7985f97eb6 | ||
|
|
986a2c6bb6 | ||
|
|
58674ec025 | ||
|
|
5dbe3dbb84 | ||
|
|
32d7af44ca | ||
|
|
b9261b8fea | ||
|
|
fdc398eb9c | ||
|
|
c3e73d0189 | ||
|
|
df2fbdf160 | ||
|
|
cb4ac8199d | ||
|
|
dd4b86b3e5 | ||
|
|
bad290aa4e | ||
|
|
8c27d5e3bf | ||
|
|
7e7a37d49c | ||
|
|
d44aa97f32 | ||
|
|
b57ad74589 | ||
|
|
82256a3f6f | ||
|
|
9e140a94db | ||
|
|
d0c9ea5a57 | ||
|
|
c88810ef24 | ||
|
|
fcd290272f | ||
|
|
463a4eea79 | ||
|
|
4576a2e8a7 | ||
|
|
69c13adcdb | ||
|
|
3886c1a8c1 | ||
|
|
06eb4d4310 | ||
|
|
247c47b27f | ||
|
|
060038c29b | ||
|
|
5414d21dcd | ||
|
|
364fa020aa | ||
|
|
b96ee16fbf | ||
|
|
467d69aa7c | ||
|
|
7c7762ebc5 | ||
|
|
526f9c8b4e | ||
|
|
905983cf61 | ||
|
|
a0879114e2 | ||
|
|
0d54a07973 | ||
|
|
4cb2fde961 | ||
|
|
9602599565 | ||
|
|
11f858b341 | ||
|
|
29b2cb33a2 | ||
|
|
34290ffe09 | ||
|
|
1013d0591e | ||
|
|
2f6d62ab45 | ||
|
|
8d6ba79408 | ||
|
|
208b434cb7 | ||
|
|
39ce0ac407 | ||
|
|
72bee56412 | ||
|
|
b32da3a714 | ||
|
|
971452e5d3 | ||
|
|
bba4345b0f | ||
|
|
b2392fb250 | ||
|
|
697f4131e7 | ||
|
|
e282715251 | ||
|
|
709df6db3e | ||
|
|
cf2b436470 | ||
|
|
2a29021572 | ||
|
|
a3f9a89079 | ||
|
|
ee27bf3153 | ||
|
|
a90f681957 | ||
|
|
3afc82ef9a | ||
|
|
d3a16f4c59 | ||
|
|
2a1911a66f | ||
|
|
08341b2385 | ||
|
|
6cde07d479 | ||
|
|
06b1e84f99 | ||
|
|
2b7e93ec92 | ||
|
|
ca23ae7a30 | ||
|
|
661fd86305 | ||
|
|
594a499b95 | ||
|
|
44aed84827 | ||
|
|
bf038eb4a2 | ||
|
|
6da3129b4e | ||
|
|
ac0f9b6a82 | ||
|
|
16aef10cca | ||
|
|
19031ebdfd | ||
|
|
0eebbc51d5 | ||
|
|
d321a8ba7e | ||
|
|
3ea86222ca | ||
|
|
c3ebe930d9 | ||
|
|
f2b96f2a38 | ||
|
|
9038239bbe | ||
|
|
3e64eb9c4f | ||
|
|
92992b8c14 | ||
|
|
4ee9d77532 | ||
|
|
bd7a5bd4b0 | ||
|
|
1cd49f8ee3 | ||
|
|
7a919d867b | ||
|
|
ce50c627a7 | ||
|
|
691d5f0271 | ||
|
|
56151089e3 | ||
|
|
af7c1caf98 | ||
|
|
dd208ab67c | ||
|
|
8189d41a45 | ||
|
|
ea3477c8ce | ||
|
|
a8a0f92c9b | ||
|
|
7040a9436e | ||
|
|
04361242fe | ||
|
|
554b1d55dc | ||
|
|
b03f8911a5 | ||
|
|
47589570c9 | ||
|
|
9f5b8dea26 | ||
|
|
f6a1e1e27c | ||
|
|
f983a8f141 | ||
|
|
efce3cb0b2 | ||
|
|
6eeebd81b2 | ||
|
|
c970fd5a18 | ||
|
|
09bd02456d | ||
|
|
c24537af36 | ||
|
|
9de3f14799 | ||
|
|
0908f75f5f | ||
|
|
10958f8c55 | ||
|
|
b1840fd5c3 | ||
|
|
1df5eb19ff | ||
|
|
f71f183886 | ||
|
|
8922ca9736 | ||
|
|
38483f4a26 | ||
|
|
78c768e497 | ||
|
|
fc7df8a530 | ||
|
|
50b42059ac | ||
|
|
825f7fcf60 | ||
|
|
8c8ec72b40 | ||
|
|
c61b7fc4fb | ||
|
|
96e3376147 | ||
|
|
e47a7c80d1 | ||
|
|
f1e373f2d8 | ||
|
|
ef4d0db475 | ||
|
|
b6b97f5ed3 | ||
|
|
dff267a42e | ||
|
|
bb98db7f5e | ||
|
|
f1016200b3 | ||
|
|
f1ab8094cf | ||
|
|
ad2bc0d397 | ||
|
|
a78d141ca3 | ||
|
|
10b1ad2a5a | ||
|
|
8a9f29043a | ||
|
|
05c9d851f4 | ||
|
|
c9a6b85e1d | ||
|
|
a16021cd86 | ||
|
|
9506b545f4 | ||
|
|
17b87e6707 | ||
|
|
cba4dc646d | ||
|
|
88be6d133d | ||
|
|
34a80c6411 | ||
|
|
6565fdbe62 | ||
|
|
993f5f86c5 | ||
|
|
093a4c21f2 | ||
|
|
f7c0bb9135 | ||
|
|
a145b77f79 | ||
|
|
7b3f7d2b12 | ||
|
|
9c5ddcdfb8 | ||
|
|
32176c74a0 | ||
|
|
aa4f4ebfab | ||
|
|
bab8630756 | ||
|
|
24e993ee41 | ||
|
|
5d4faaff65 |
@@ -6,4 +6,5 @@ README.md
|
||||
Makefile
|
||||
public/
|
||||
LICENSE
|
||||
CONTRIBUTING.md
|
||||
CONTRIBUTING.md
|
||||
.git
|
||||
|
||||
1
.github/CODEOWNERS
vendored
Normal file
1
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1 @@
|
||||
* @oschwartz10612 @miloschwartz
|
||||
47
.github/DISCUSSION_TEMPLATE/feature-requests.yml
vendored
Normal file
47
.github/DISCUSSION_TEMPLATE/feature-requests.yml
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Summary
|
||||
description: A clear and concise summary of the requested feature.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Motivation
|
||||
description: |
|
||||
Why is this feature important?
|
||||
Explain the problem this feature would solve or what use case it would enable.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Proposed Solution
|
||||
description: |
|
||||
How would you like to see this feature implemented?
|
||||
Provide as much detail as possible about the desired behavior, configuration, or changes.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Alternatives Considered
|
||||
description: Describe any alternative solutions or workarounds you've thought about.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional Context
|
||||
description: Add any other context, mockups, or screenshots about the feature request here.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before submitting, please:
|
||||
- Check if there is an existing issue for this feature.
|
||||
- Clearly explain the benefit and use case.
|
||||
- Be as specific as possible to help contributors evaluate and implement.
|
||||
51
.github/ISSUE_TEMPLATE/1.bug_report.yml
vendored
Normal file
51
.github/ISSUE_TEMPLATE/1.bug_report.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Bug Report
|
||||
description: Create a bug report
|
||||
labels: []
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Describe the Bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: Please fill out the relevant details below for your environment.
|
||||
value: |
|
||||
- OS Type & Version: (e.g., Ubuntu 22.04)
|
||||
- Pangolin Version:
|
||||
- Gerbil Version:
|
||||
- Traefik Version:
|
||||
- Newt Version:
|
||||
- Olm Version: (if applicable)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: |
|
||||
Steps to reproduce the behavior, please provide a clear description of how to reproduce the issue, based on the linked minimal reproduction. Screenshots can be provided in the issue body below.
|
||||
|
||||
If using code blocks, make sure syntax highlighting is correct and double-check that the rendered preview is not broken.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: A clear and concise description of what you expected to happen.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before posting the issue go through the steps you've written down to make sure the steps provided are detailed and clear.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Contributors should be able to follow the steps provided in order to reproduce the bug.
|
||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Need help or have questions?
|
||||
url: https://github.com/orgs/fosrl/discussions
|
||||
about: Ask questions, get help, and discuss with other community members
|
||||
- name: Request a Feature
|
||||
url: https://github.com/orgs/fosrl/discussions/new?category=feature-requests
|
||||
about: Feature requests should be opened as discussions so others can upvote and comment
|
||||
40
.github/dependabot.yml
vendored
Normal file
40
.github/dependabot.yml
vendored
Normal file
@@ -0,0 +1,40 @@
|
||||
version: 2
|
||||
updates:
|
||||
- package-ecosystem: "gomod"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
groups:
|
||||
dev-patch-updates:
|
||||
dependency-type: "development"
|
||||
update-types:
|
||||
- "patch"
|
||||
dev-minor-updates:
|
||||
dependency-type: "development"
|
||||
update-types:
|
||||
- "minor"
|
||||
prod-patch-updates:
|
||||
dependency-type: "production"
|
||||
update-types:
|
||||
- "patch"
|
||||
prod-minor-updates:
|
||||
dependency-type: "production"
|
||||
update-types:
|
||||
- "minor"
|
||||
|
||||
- package-ecosystem: "docker"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
groups:
|
||||
patch-updates:
|
||||
update-types:
|
||||
- "patch"
|
||||
minor-updates:
|
||||
update-types:
|
||||
- "minor"
|
||||
|
||||
- package-ecosystem: "github-actions"
|
||||
directory: "/"
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
161
.github/workflows/cicd.yml
vendored
Normal file
161
.github/workflows/cicd.yml
vendored
Normal file
@@ -0,0 +1,161 @@
|
||||
name: CI/CD Pipeline
|
||||
|
||||
# CI/CD workflow for building, publishing, mirroring, signing container images and building release binaries.
|
||||
# Actions are pinned to specific SHAs to reduce supply-chain risk. This workflow triggers on tag push events.
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write # for GHCR push
|
||||
id-token: write # for Cosign Keyless (OIDC) Signing
|
||||
|
||||
# Required secrets:
|
||||
# - DOCKER_HUB_USERNAME / DOCKER_HUB_ACCESS_TOKEN: push to Docker Hub
|
||||
# - GITHUB_TOKEN: used for GHCR login and OIDC keyless signing
|
||||
# - COSIGN_PRIVATE_KEY / COSIGN_PASSWORD / COSIGN_PUBLIC_KEY: for key-based signing
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "[0-9]+.[0-9]+.[0-9]+"
|
||||
- "[0-9]+.[0-9]+.[0-9]+.rc.[0-9]+"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
release:
|
||||
name: Build and Release
|
||||
runs-on: amd64-runner
|
||||
# Job-level timeout to avoid runaway or stuck runs
|
||||
timeout-minutes: 120
|
||||
env:
|
||||
# Target images
|
||||
DOCKERHUB_IMAGE: docker.io/fosrl/${{ github.event.repository.name }}
|
||||
GHCR_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||
with:
|
||||
registry: docker.io
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Extract tag name
|
||||
id: get-tag
|
||||
run: echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
|
||||
with:
|
||||
go-version: 1.25
|
||||
|
||||
- name: Update version in main.go
|
||||
run: |
|
||||
TAG=${{ env.TAG }}
|
||||
if [ -f main.go ]; then
|
||||
sed -i 's/version_replaceme/'"$TAG"'/' main.go
|
||||
echo "Updated main.go with version $TAG"
|
||||
else
|
||||
echo "main.go not found"
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Build and push Docker images (Docker Hub)
|
||||
run: |
|
||||
TAG=${{ env.TAG }}
|
||||
make docker-build-release tag=$TAG
|
||||
echo "Built & pushed to: ${{ env.DOCKERHUB_IMAGE }}:${TAG}"
|
||||
shell: bash
|
||||
|
||||
- name: Login in to GHCR
|
||||
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||
with:
|
||||
registry: ghcr.io
|
||||
username: ${{ github.actor }}
|
||||
password: ${{ secrets.GITHUB_TOKEN }}
|
||||
|
||||
- name: Install skopeo + jq
|
||||
# skopeo: copy/inspect images between registries
|
||||
# jq: JSON parsing tool used to extract digest values
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y skopeo jq
|
||||
skopeo --version
|
||||
shell: bash
|
||||
|
||||
- name: Copy tag from Docker Hub to GHCR
|
||||
# Mirror the already-built image (all architectures) to GHCR so we can sign it
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG=${{ env.TAG }}
|
||||
echo "Copying ${{ env.DOCKERHUB_IMAGE }}:${TAG} -> ${{ env.GHCR_IMAGE }}:${TAG}"
|
||||
skopeo copy --all --retry-times 3 \
|
||||
docker://$DOCKERHUB_IMAGE:$TAG \
|
||||
docker://$GHCR_IMAGE:$TAG
|
||||
shell: bash
|
||||
|
||||
- name: Install cosign
|
||||
# cosign is used to sign and verify container images (key and keyless)
|
||||
uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0
|
||||
|
||||
- name: Dual-sign and verify (GHCR & Docker Hub)
|
||||
# Sign each image by digest using keyless (OIDC) and key-based signing,
|
||||
# then verify both the public key signature and the keyless OIDC signature.
|
||||
env:
|
||||
TAG: ${{ env.TAG }}
|
||||
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
|
||||
COSIGN_YES: "true"
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
issuer="https://token.actions.githubusercontent.com"
|
||||
id_regex="^https://github.com/${{ github.repository }}/.+" # accept this repo (all workflows/refs)
|
||||
|
||||
for IMAGE in "${GHCR_IMAGE}" "${DOCKERHUB_IMAGE}"; do
|
||||
echo "Processing ${IMAGE}:${TAG}"
|
||||
|
||||
DIGEST="$(skopeo inspect --retry-times 3 docker://${IMAGE}:${TAG} | jq -r '.Digest')"
|
||||
REF="${IMAGE}@${DIGEST}"
|
||||
echo "Resolved digest: ${REF}"
|
||||
|
||||
echo "==> cosign sign (keyless) --recursive ${REF}"
|
||||
cosign sign --recursive "${REF}"
|
||||
|
||||
echo "==> cosign sign (key) --recursive ${REF}"
|
||||
cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${REF}"
|
||||
|
||||
echo "==> cosign verify (public key) ${REF}"
|
||||
cosign verify --key env://COSIGN_PUBLIC_KEY "${REF}" -o text
|
||||
|
||||
echo "==> cosign verify (keyless policy) ${REF}"
|
||||
cosign verify \
|
||||
--certificate-oidc-issuer "${issuer}" \
|
||||
--certificate-identity-regexp "${id_regex}" \
|
||||
"${REF}" -o text
|
||||
done
|
||||
shell: bash
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
make go-build-release
|
||||
shell: bash
|
||||
|
||||
- name: Upload artifacts from /bin
|
||||
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||
with:
|
||||
name: binaries
|
||||
path: bin/
|
||||
132
.github/workflows/mirror.yaml
vendored
Normal file
132
.github/workflows/mirror.yaml
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
name: Mirror & Sign (Docker Hub to GHCR)
|
||||
|
||||
on:
|
||||
workflow_dispatch: {}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write # for keyless OIDC
|
||||
|
||||
env:
|
||||
SOURCE_IMAGE: docker.io/fosrl/gerbil
|
||||
DEST_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||
|
||||
jobs:
|
||||
mirror-and-dual-sign:
|
||||
runs-on: amd64-runner
|
||||
steps:
|
||||
- name: Install skopeo + jq
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y skopeo jq
|
||||
skopeo --version
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0
|
||||
|
||||
- name: Input check
|
||||
run: |
|
||||
test -n "${SOURCE_IMAGE}" || (echo "SOURCE_IMAGE is empty" && exit 1)
|
||||
echo "Source : ${SOURCE_IMAGE}"
|
||||
echo "Target : ${DEST_IMAGE}"
|
||||
|
||||
# Auth for skopeo (containers-auth)
|
||||
- name: Skopeo login to GHCR
|
||||
run: |
|
||||
skopeo login ghcr.io -u "${{ github.actor }}" -p "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
# Auth for cosign (docker-config)
|
||||
- name: Docker login to GHCR (for cosign)
|
||||
run: |
|
||||
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
|
||||
|
||||
- name: List source tags
|
||||
run: |
|
||||
set -euo pipefail
|
||||
skopeo list-tags --retry-times 3 docker://"${SOURCE_IMAGE}" \
|
||||
| jq -r '.Tags[]' | sort -u > src-tags.txt
|
||||
echo "Found source tags: $(wc -l < src-tags.txt)"
|
||||
head -n 20 src-tags.txt || true
|
||||
|
||||
- name: List destination tags (skip existing)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if skopeo list-tags --retry-times 3 docker://"${DEST_IMAGE}" >/tmp/dst.json 2>/dev/null; then
|
||||
jq -r '.Tags[]' /tmp/dst.json | sort -u > dst-tags.txt
|
||||
else
|
||||
: > dst-tags.txt
|
||||
fi
|
||||
echo "Existing destination tags: $(wc -l < dst-tags.txt)"
|
||||
|
||||
- name: Mirror, dual-sign, and verify
|
||||
env:
|
||||
# keyless
|
||||
COSIGN_YES: "true"
|
||||
# key-based
|
||||
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||
# verify
|
||||
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
copied=0; skipped=0; v_ok=0; errs=0
|
||||
|
||||
issuer="https://token.actions.githubusercontent.com"
|
||||
id_regex="^https://github.com/${{ github.repository }}/.+"
|
||||
|
||||
while read -r tag; do
|
||||
[ -z "$tag" ] && continue
|
||||
|
||||
if grep -Fxq "$tag" dst-tags.txt; then
|
||||
echo "::notice ::Skip (exists) ${DEST_IMAGE}:${tag}"
|
||||
skipped=$((skipped+1))
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "==> Copy ${SOURCE_IMAGE}:${tag} → ${DEST_IMAGE}:${tag}"
|
||||
if ! skopeo copy --all --retry-times 3 \
|
||||
docker://"${SOURCE_IMAGE}:${tag}" docker://"${DEST_IMAGE}:${tag}"; then
|
||||
echo "::warning title=Copy failed::${SOURCE_IMAGE}:${tag}"
|
||||
errs=$((errs+1)); continue
|
||||
fi
|
||||
copied=$((copied+1))
|
||||
|
||||
digest="$(skopeo inspect --retry-times 3 docker://"${DEST_IMAGE}:${tag}" | jq -r '.Digest')"
|
||||
ref="${DEST_IMAGE}@${digest}"
|
||||
|
||||
echo "==> cosign sign (keyless) --recursive ${ref}"
|
||||
if ! cosign sign --recursive "${ref}"; then
|
||||
echo "::warning title=Keyless sign failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
fi
|
||||
|
||||
echo "==> cosign sign (key) --recursive ${ref}"
|
||||
if ! cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${ref}"; then
|
||||
echo "::warning title=Key sign failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
fi
|
||||
|
||||
echo "==> cosign verify (public key) ${ref}"
|
||||
if ! cosign verify --key env://COSIGN_PUBLIC_KEY "${ref}" -o text; then
|
||||
echo "::warning title=Verify(pubkey) failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
fi
|
||||
|
||||
echo "==> cosign verify (keyless policy) ${ref}"
|
||||
if ! cosign verify \
|
||||
--certificate-oidc-issuer "${issuer}" \
|
||||
--certificate-identity-regexp "${id_regex}" \
|
||||
"${ref}" -o text; then
|
||||
echo "::warning title=Verify(keyless) failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
else
|
||||
v_ok=$((v_ok+1))
|
||||
fi
|
||||
done < src-tags.txt
|
||||
|
||||
echo "---- Summary ----"
|
||||
echo "Copied : $copied"
|
||||
echo "Skipped : $skipped"
|
||||
echo "Verified OK : $v_ok"
|
||||
echo "Errors : $errs"
|
||||
31
.github/workflows/test.yml
vendored
Normal file
31
.github/workflows/test.yml
vendored
Normal file
@@ -0,0 +1,31 @@
|
||||
name: Run Tests
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
- main
|
||||
- dev
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: amd64-runner
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
|
||||
with:
|
||||
go-version: 1.26
|
||||
|
||||
- name: Build go
|
||||
run: go build
|
||||
|
||||
- name: Build Docker image
|
||||
run: make build
|
||||
|
||||
- name: Build binaries
|
||||
run: make go-build-release
|
||||
4
.gitignore
vendored
4
.gitignore
vendored
@@ -1 +1,3 @@
|
||||
gerbil
|
||||
gerbil
|
||||
.DS_Store
|
||||
bin/
|
||||
1
.go-version
Normal file
1
.go-version
Normal file
@@ -0,0 +1 @@
|
||||
1.25
|
||||
@@ -4,11 +4,7 @@ Contributions are welcome!
|
||||
|
||||
Please see the contribution and local development guide on the docs page before getting started:
|
||||
|
||||
https://docs.fossorial.io/development
|
||||
|
||||
For ideas about what features to work on and our future plans, please see the roadmap:
|
||||
|
||||
https://docs.fossorial.io/roadmap
|
||||
https://docs.pangolin.net/development/contributing
|
||||
|
||||
### Licensing Considerations
|
||||
|
||||
|
||||
11
Dockerfile
11
Dockerfile
@@ -1,4 +1,4 @@
|
||||
FROM golang:1.23.1-alpine AS builder
|
||||
FROM golang:1.26-alpine AS builder
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /app
|
||||
@@ -16,16 +16,13 @@ COPY . .
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /gerbil
|
||||
|
||||
# Start a new stage from scratch
|
||||
FROM ubuntu:22.04 AS runner
|
||||
FROM alpine:3.23 AS runner
|
||||
|
||||
RUN apk add --no-cache iptables iproute2
|
||||
|
||||
# Copy the pre-built binary file from the previous stage and the entrypoint script
|
||||
COPY --from=builder /gerbil /usr/local/bin/
|
||||
COPY entrypoint.sh /
|
||||
|
||||
RUN chmod +x /entrypoint.sh
|
||||
|
||||
# Copy the entrypoint script
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
|
||||
# Command to run the executable
|
||||
CMD ["gerbil"]
|
||||
14
Makefile
14
Makefile
@@ -1,6 +1,14 @@
|
||||
|
||||
all: build push
|
||||
|
||||
docker-build-release:
|
||||
@if [ -z "$(tag)" ]; then \
|
||||
echo "Error: tag is required. Usage: make docker-build-release tag=<tag>"; \
|
||||
exit 1; \
|
||||
fi
|
||||
docker buildx build --platform linux/arm64,linux/amd64 -t fosrl/gerbil:latest -f Dockerfile --push .
|
||||
docker buildx build --platform linux/arm64,linux/amd64 -t fosrl/gerbil:$(tag) -f Dockerfile --push .
|
||||
|
||||
build:
|
||||
docker build -t fosrl/gerbil:latest .
|
||||
|
||||
@@ -13,5 +21,9 @@ test:
|
||||
local:
|
||||
CGO_ENABLED=0 GOOS=linux go build -o gerbil
|
||||
|
||||
go-build-release:
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=arm64 go build -o bin/gerbil_linux_arm64
|
||||
CGO_ENABLED=0 GOOS=linux GOARCH=amd64 go build -o bin/gerbil_linux_amd64
|
||||
|
||||
clean:
|
||||
rm gerbil
|
||||
rm gerbil
|
||||
|
||||
99
README.md
99
README.md
@@ -4,16 +4,9 @@ Gerbil is a simple [WireGuard](https://www.wireguard.com/) interface management
|
||||
|
||||
### Installation and Documentation
|
||||
|
||||
Gerbil can be used standalone with your own API, a static JSON file, or with Pangolin and Newt as part of the larger system. See documentation below:
|
||||
Gerbil works with Pangolin, Newt, and Olm as part of the larger system. See documentation below:
|
||||
|
||||
- [Installation Instructions](https://docs.fossorial.io)
|
||||
- [Full Documentation](https://docs.fossorial.io)
|
||||
|
||||
## Preview
|
||||
|
||||
<img src="public/screenshots/preview.png" alt="Preview"/>
|
||||
|
||||
_Sample output of a Gerbil container connected to Pangolin and terminating various peers._
|
||||
- [Full Documentation](https://docs.pangolin.net)
|
||||
|
||||
## Key Functions
|
||||
|
||||
@@ -27,30 +20,89 @@ Gerbil will create the peers defined in the config on the WireGuard interface. T
|
||||
|
||||
### Report Bandwidth
|
||||
|
||||
Bytes transmitted in and out of each peer are collected every 10 seconds, and incremental usage is reported via the "reportBandwidthTo" endpoint. This can be used to track data usage of each peer on the remote server.
|
||||
Bytes transmitted in and out of each peer are collected every 10 seconds, and incremental usage is reported via the api endpoint. This can be used to track data usage of each peer on the remote server.
|
||||
|
||||
### Handle client relaying
|
||||
|
||||
Gerbil listens on port 21820 for incoming UDP hole punch packets to orchestrate NAT hole punching between olm and newt clients. Additionally, it handles relaying data through the gerbil server down to the newt. This is accomplished by scanning each packet for headers and handling them appropriately.
|
||||
|
||||
### SNI Proxy
|
||||
|
||||
Gerbil includes an SNI (Server Name Indication) proxy that enables intelligent routing of HTTPS traffic between Pangolin nodes. When a TLS connection comes in, the proxy extracts the hostname from the SNI extension and queries Pangolin to determine the correct routing destination. This allows seamless routing of web traffic through the WireGuard mesh network:
|
||||
|
||||
- If the hostname is configured for local handling (via local overrides or local SNIs), traffic is routed to the local proxy
|
||||
- Otherwise, the proxy queries Pangolin's routing API to determine which node should handle the traffic
|
||||
- Supports caching of routing decisions to improve performance
|
||||
- Handles connection pooling and graceful shutdown
|
||||
- Optional PROXY protocol v1 support to preserve original client IP addresses when forwarding to downstream proxies (HAProxy, Nginx, etc.)
|
||||
|
||||
The PROXY protocol allows downstream proxies to know the real client IP address instead of seeing the SNI proxy's IP. When enabled with `--proxy-protocol`, the SNI proxy will prepend a PROXY protocol header to each connection containing the original client's IP and port information.
|
||||
|
||||
In single node (self hosted) Pangolin deployments this can be bypassed by using port 443:443 to route to Traefik instead of the SNI proxy at 8443.
|
||||
|
||||
### Observability with OpenTelemetry
|
||||
|
||||
Gerbil includes comprehensive OpenTelemetry metrics instrumentation for monitoring and observability. Metrics can be exported via:
|
||||
|
||||
- **Prometheus**: Pull-based metrics at the `/metrics` endpoint (enabled by default)
|
||||
- **OTLP**: Push-based metrics to any OpenTelemetry-compatible collector
|
||||
|
||||
Key metrics include:
|
||||
|
||||
- WireGuard interface and peer status
|
||||
- Bandwidth usage per peer
|
||||
- Active relay sessions and proxy connections
|
||||
- Handshake success/failure rates
|
||||
- Route lookup cache hit/miss ratios
|
||||
- Go runtime metrics (GC, goroutines, memory)
|
||||
|
||||
See [docs/observability.md](docs/observability.md) for complete documentation, metrics reference, and examples.
|
||||
|
||||
## CLI Args
|
||||
|
||||
Important:
|
||||
- `reachableAt`: How should the remote server reach Gerbil's API?
|
||||
- `generateAndSaveKeyTo`: Where to save the generated WireGuard private key to persist across restarts.
|
||||
- `remoteConfig` (optional): Remote config location to HTTP get the JSON based config from. See `example_config.json`
|
||||
- `config` (optional): Local JSON file path to load config. Used if remote config is not supplied. See `example_config.json`
|
||||
- `remoteConfig`: Remote config location to HTTP get the JSON based config from.
|
||||
|
||||
Note: You must use either `config` or `remoteConfig` to configure WireGuard.
|
||||
|
||||
- `reportBandwidthTo` (optional): Remote HTTP endpoint to send peer bandwidth data
|
||||
Others:
|
||||
- `reportBandwidthTo` (optional): **DEPRECATED** - Use `remoteConfig` instead. Remote HTTP endpoint to send peer bandwidth data
|
||||
- `interface` (optional): Name of the WireGuard interface created by Gerbil. Default: `wg0`
|
||||
- `listen` (optional): Port to listen on for HTTP server. Default: `3003`
|
||||
- `log-level` (optional): The log level to use. Default: INFO
|
||||
- `listen` (optional): Port to listen on for HTTP server. Default: `:3004`
|
||||
- `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: `INFO`
|
||||
- `mtu` (optional): MTU of the WireGuard interface. Default: `1280`
|
||||
- `notify` (optional): URL to notify on peer changes
|
||||
- `sni-port` (optional): Port for the SNI proxy to listen on. Default: `8443`
|
||||
- `local-proxy` (optional): Address for local proxy when routing local traffic. Default: `localhost`
|
||||
- `local-proxy-port` (optional): Port for local proxy when routing local traffic. Default: `443`
|
||||
- `local-overrides` (optional): Comma-separated list of domain names that should always be routed to the local proxy
|
||||
- `proxy-protocol` (optional): Enable PROXY protocol v1 for preserving client IP addresses when forwarding to downstream proxies. Default: `false`
|
||||
|
||||
## Environment Variables
|
||||
|
||||
All CLI arguments can also be provided via environment variables:
|
||||
|
||||
- `INTERFACE`: Name of the WireGuard interface
|
||||
- `REMOTE_CONFIG`: URL of the remote config server
|
||||
- `LISTEN`: Address to listen on for HTTP server
|
||||
- `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key
|
||||
- `REACHABLE_AT`: Endpoint of the HTTP server to tell remote config about
|
||||
- `LOG_LEVEL`: Log level (DEBUG, INFO, WARN, ERROR, FATAL)
|
||||
- `MTU`: MTU of the WireGuard interface
|
||||
- `NOTIFY_URL`: URL to notify on peer changes
|
||||
- `SNI_PORT`: Port for the SNI proxy to listen on
|
||||
- `LOCAL_PROXY`: Address for local proxy when routing local traffic
|
||||
- `LOCAL_PROXY_PORT`: Port for local proxy when routing local traffic
|
||||
- `LOCAL_OVERRIDES`: Comma-separated list of domain names that should always be routed to the local proxy
|
||||
- `PROXY_PROTOCOL`: Enable PROXY protocol v1 for preserving client IP addresses (true/false)
|
||||
|
||||
Example:
|
||||
|
||||
```bash
|
||||
./gerbil \
|
||||
--reachableAt=http://gerbil:3003 \
|
||||
--reachableAt=http://gerbil:3004 \
|
||||
--generateAndSaveKeyTo=/var/config/key \
|
||||
--remoteConfig=http://pangolin:3001/api/v1/gerbil/get-config \
|
||||
--reportBandwidthTo=http://pangolin:3001/api/v1/gerbil/receive-bandwidth
|
||||
--remoteConfig=http://pangolin:3001/api/v1/
|
||||
```
|
||||
|
||||
```yaml
|
||||
@@ -60,10 +112,9 @@ services:
|
||||
container_name: gerbil
|
||||
restart: unless-stopped
|
||||
command:
|
||||
- --reachableAt=http://gerbil:3003
|
||||
- --reachableAt=http://gerbil:3004
|
||||
- --generateAndSaveKeyTo=/var/config/key
|
||||
- --remoteConfig=http://pangolin:3001/api/v1/gerbil/get-config
|
||||
- --reportBandwidthTo=http://pangolin:3001/api/v1/gerbil/receive-bandwidth
|
||||
- --remoteConfig=http://pangolin:3001/api/v1/
|
||||
volumes:
|
||||
- ./config/:/var/config
|
||||
cap_add:
|
||||
@@ -71,6 +122,8 @@ services:
|
||||
- SYS_MODULE
|
||||
ports:
|
||||
- 51820:51820/udp
|
||||
- 21820:21820/udp
|
||||
- 443:8443/tcp # SNI proxy port
|
||||
```
|
||||
|
||||
## Build
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
If you discover a security vulnerability, please follow the steps below to responsibly disclose it to us:
|
||||
|
||||
1. **Do not create a public GitHub issue or discussion post.** This could put the security of other users at risk.
|
||||
2. Send a detailed report to [security@fossorial.io](mailto:security@fossorial.io) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
|
||||
2. Send a detailed report to [security@pangolin.net](mailto:security@pangolin.net) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
|
||||
|
||||
- Description and location of the vulnerability.
|
||||
- Potential impact of the vulnerability.
|
||||
|
||||
@@ -1,23 +0,0 @@
|
||||
{
|
||||
"privateKey": "kBGTgk7c+zncEEoSnMl+jsLjVh5ZVoL/HwBSQem+d1M=",
|
||||
"listenPort": 51820,
|
||||
"ipAddress": "10.0.0.1/24",
|
||||
"peers": [
|
||||
{
|
||||
"publicKey": "5UzzoeveFVSzuqK3nTMS5bA1jIMs1fQffVQzJ8MXUQM=",
|
||||
"allowedIps": ["10.0.0.0/28"]
|
||||
},
|
||||
{
|
||||
"publicKey": "kYrZpuO2NsrFoBh1GMNgkhd1i9Rgtu1rAjbJ7qsfngU=",
|
||||
"allowedIps": ["10.0.0.16/28"]
|
||||
},
|
||||
{
|
||||
"publicKey": "1YfPUVr9ZF4zehkbI2BQhCxaRLz+Vtwa4vJwH+mpK0A=",
|
||||
"allowedIps": ["10.0.0.32/28"]
|
||||
},
|
||||
{
|
||||
"publicKey": "2/U4oyZ+sai336Dal/yExCphL8AxyqvIxMk4qsUy4iI=",
|
||||
"allowedIps": ["10.0.0.48/28"]
|
||||
}
|
||||
]
|
||||
}
|
||||
269
docs/observability.md
Normal file
269
docs/observability.md
Normal file
@@ -0,0 +1,269 @@
|
||||
<!-- markdownlint-disable MD036 MD060 -->
|
||||
# Gerbil Observability Architecture
|
||||
|
||||
This document describes the metrics subsystem for Gerbil, explains the design
|
||||
decisions, and shows how to configure each backend.
|
||||
|
||||
---
|
||||
|
||||
## Architecture Overview
|
||||
|
||||
Gerbil's metrics subsystem uses a **pluggable backend** design:
|
||||
|
||||
```text
|
||||
main.go ─── internal/metrics ─── internal/observability ─── backend
|
||||
(facade) (interface) Prometheus
|
||||
OR OTel/OTLP
|
||||
OR Noop (disabled)
|
||||
```
|
||||
|
||||
Application code (main, relay, proxy) calls only the `metrics.Record*`
|
||||
functions in `internal/metrics`. That package delegates to whichever backend
|
||||
was selected at startup via `internal/observability.Backend`.
|
||||
|
||||
### Why Prometheus-native and OTel are mutually exclusive
|
||||
|
||||
**Exactly one** metrics backend may be active at runtime:
|
||||
|
||||
| Mode | What happens |
|
||||
|------|-------------|
|
||||
| `prometheus` | Native Prometheus client registers metrics on a dedicated registry and exposes `/metrics`. No OTel SDK is initialised. |
|
||||
| `otel` | OTel SDK pushes metrics via OTLP/gRPC or OTLP/HTTP to an external collector. No `/metrics` endpoint is exposed. |
|
||||
| `none` | A safe noop backend is used. All `Record*` calls are discarded. |
|
||||
|
||||
Running both simultaneously would mean every metric is recorded twice through
|
||||
two different code paths, with differing semantics (pull vs. push, different
|
||||
naming rules, different cardinality handling). The design enforces a single
|
||||
source of truth.
|
||||
|
||||
### Future OTel tracing and logging
|
||||
|
||||
The `internal/observability/otel/` package is designed so that tracing and
|
||||
logging support can be added **beside** the existing metrics code without
|
||||
touching the Prometheus-native path:
|
||||
|
||||
```bash
|
||||
internal/observability/otel/
|
||||
backend.go ← metrics
|
||||
exporter.go ← OTLP exporter creation
|
||||
resource.go ← OTel resource
|
||||
trace.go ← future: TracerProvider setup
|
||||
log.go ← future: LoggerProvider setup
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Configuration
|
||||
|
||||
### Config precedence
|
||||
|
||||
1. CLI flags (highest priority)
|
||||
2. Environment variables
|
||||
3. Defaults
|
||||
|
||||
### Config struct
|
||||
|
||||
```go
|
||||
type MetricsConfig struct {
|
||||
Enabled bool
|
||||
Backend string // "prometheus" | "otel" | "none"
|
||||
Prometheus PrometheusConfig
|
||||
OTel OTelConfig
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
DeploymentEnvironment string
|
||||
}
|
||||
|
||||
type PrometheusConfig struct {
|
||||
Path string // default: "/metrics"
|
||||
}
|
||||
|
||||
type OTelConfig struct {
|
||||
Protocol string // "grpc" (default) or "http"
|
||||
Endpoint string // default: "localhost:4317"
|
||||
Insecure bool // default: true
|
||||
ExportInterval time.Duration // default: 60s
|
||||
}
|
||||
```
|
||||
|
||||
### Environment variables
|
||||
|
||||
| Variable | Default | Description |
|
||||
|----------|---------|-------------|
|
||||
| `METRICS_ENABLED` | `true` | Enable/disable metrics |
|
||||
| `METRICS_BACKEND` | `prometheus` | Backend: `prometheus`, `otel`, or `none` |
|
||||
| `METRICS_PATH` | `/metrics` | HTTP path for Prometheus endpoint |
|
||||
| `OTEL_METRICS_PROTOCOL` | `grpc` | OTLP transport: `grpc` or `http` |
|
||||
| `OTEL_METRICS_ENDPOINT` | `localhost:4317` | OTLP collector address |
|
||||
| `OTEL_METRICS_INSECURE` | `true` | Disable TLS for OTLP |
|
||||
| `OTEL_METRICS_EXPORT_INTERVAL` | `60s` | Push interval (e.g. `10s`, `1m`) |
|
||||
| `DEPLOYMENT_ENVIRONMENT` | _(unset)_ | OTel deployment.environment attribute |
|
||||
|
||||
### CLI flags
|
||||
|
||||
```bash
|
||||
--metrics-enabled bool (default: true)
|
||||
--metrics-backend string (default: prometheus)
|
||||
--metrics-path string (default: /metrics)
|
||||
--otel-metrics-protocol string (default: grpc)
|
||||
--otel-metrics-endpoint string (default: localhost:4317)
|
||||
--otel-metrics-insecure bool (default: true)
|
||||
--otel-metrics-export-interval duration (default: 1m0s)
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## When to choose each backend
|
||||
|
||||
| Criterion | Prometheus | OTel/OTLP |
|
||||
|-----------|-----------|-----------|
|
||||
| Existing Prometheus/Grafana stack | ✅ | |
|
||||
| Pull-based scraping | ✅ | |
|
||||
| No external collector required | ✅ | |
|
||||
| Vendor-neutral telemetry | | ✅ |
|
||||
| Push-based export | | ✅ |
|
||||
| Grafana Cloud / managed OTLP | | ✅ |
|
||||
| Future traces + logs via same pipeline | | ✅ |
|
||||
|
||||
---
|
||||
|
||||
## Enabling Prometheus-native mode
|
||||
|
||||
### Environment variables
|
||||
|
||||
```bash
|
||||
METRICS_ENABLED=true
|
||||
METRICS_BACKEND=prometheus
|
||||
METRICS_PATH=/metrics
|
||||
```
|
||||
|
||||
### CLI
|
||||
|
||||
```bash
|
||||
./gerbil --metrics-enabled --metrics-backend=prometheus --metrics-path=/metrics \
|
||||
--config=/etc/gerbil/config.json
|
||||
```
|
||||
|
||||
The metrics config is supplied separately via env/flags; it is not embedded
|
||||
in the WireGuard config file.
|
||||
|
||||
The Prometheus `/metrics` endpoint is registered only when
|
||||
`--metrics-backend=prometheus`. All gerbil_* metrics plus Go runtime metrics
|
||||
are available.
|
||||
|
||||
---
|
||||
|
||||
## Enabling OTel mode
|
||||
|
||||
### Environment variables
|
||||
|
||||
```bash
|
||||
export METRICS_ENABLED=true
|
||||
export METRICS_BACKEND=otel
|
||||
export OTEL_METRICS_PROTOCOL=grpc
|
||||
export OTEL_METRICS_ENDPOINT=otel-collector:4317
|
||||
export OTEL_METRICS_INSECURE=true
|
||||
export OTEL_METRICS_EXPORT_INTERVAL=10s
|
||||
export DEPLOYMENT_ENVIRONMENT=production
|
||||
```
|
||||
|
||||
### CLI
|
||||
|
||||
```bash
|
||||
./gerbil --metrics-enabled \
|
||||
--metrics-backend=otel \
|
||||
--otel-metrics-protocol=grpc \
|
||||
--otel-metrics-endpoint=otel-collector:4317 \
|
||||
--otel-metrics-insecure \
|
||||
--otel-metrics-export-interval=10s \
|
||||
--config=/etc/gerbil/config.json
|
||||
```
|
||||
|
||||
### HTTP mode (OTLP/HTTP)
|
||||
|
||||
```bash
|
||||
export OTEL_METRICS_PROTOCOL=http
|
||||
export OTEL_METRICS_ENDPOINT=otel-collector:4318
|
||||
```
|
||||
|
||||
---
|
||||
|
||||
## Disabling metrics
|
||||
|
||||
```bash
|
||||
export METRICS_ENABLED=false
|
||||
# or
|
||||
./gerbil --metrics-enabled=false
|
||||
# or
|
||||
./gerbil --metrics-backend=none
|
||||
```
|
||||
|
||||
When disabled, all `Record*` calls are directed to a safe noop backend that
|
||||
discards observations without allocating or locking.
|
||||
|
||||
---
|
||||
|
||||
## Metric catalog
|
||||
|
||||
All metrics use the prefix `gerbil_<component>_<name>`.
|
||||
|
||||
### WireGuard metrics
|
||||
|
||||
| Metric | Type | Labels | Description |
|
||||
|--------|------|--------|-------------|
|
||||
| `gerbil_wg_interface_up` | Gauge | `ifname`, `instance` | 1=up, 0=down |
|
||||
| `gerbil_wg_peers_total` | UpDownCounter | `ifname` | Configured peers |
|
||||
| `gerbil_wg_peer_connected` | Gauge | `ifname`, `peer` | 1=connected, 0=disconnected |
|
||||
| `gerbil_wg_bytes_received_total` | Counter | `ifname`, `peer` | Bytes received |
|
||||
| `gerbil_wg_bytes_transmitted_total` | Counter | `ifname`, `peer` | Bytes transmitted |
|
||||
| `gerbil_wg_handshakes_total` | Counter | `ifname`, `peer`, `result` | Handshake attempts |
|
||||
| `gerbil_wg_handshake_latency_seconds` | Histogram | `ifname`, `peer` | Handshake duration |
|
||||
| `gerbil_wg_peer_rtt_seconds` | Histogram | `ifname`, `peer` | Peer round-trip time |
|
||||
|
||||
### Relay metrics
|
||||
|
||||
| Metric | Type | Labels |
|
||||
|--------|------|--------|
|
||||
| `gerbil_proxy_mapping_active` | UpDownCounter | `ifname` |
|
||||
| `gerbil_session_active` | UpDownCounter | `ifname` |
|
||||
| `gerbil_active_sessions` | UpDownCounter | `ifname` |
|
||||
| `gerbil_udp_packets_total` | Counter | `ifname`, `type`, `direction` |
|
||||
| `gerbil_hole_punch_events_total` | Counter | `ifname`, `result` |
|
||||
|
||||
### SNI proxy metrics
|
||||
|
||||
| Metric | Type | Labels |
|
||||
|--------|------|--------|
|
||||
| `gerbil_sni_connections_total` | Counter | `result` |
|
||||
| `gerbil_sni_active_connections` | UpDownCounter | _(none)_ |
|
||||
| `gerbil_sni_route_cache_hits_total` | Counter | `result` |
|
||||
| `gerbil_sni_route_api_requests_total` | Counter | `result` |
|
||||
| `gerbil_proxy_route_lookups_total` | Counter | `result`, `hostname` |
|
||||
|
||||
### HTTP metrics
|
||||
|
||||
| Metric | Type | Labels |
|
||||
|--------|------|--------|
|
||||
| `gerbil_http_requests_total` | Counter | `endpoint`, `method`, `status_code` |
|
||||
| `gerbil_http_request_duration_seconds` | Histogram | `endpoint`, `method` |
|
||||
|
||||
---
|
||||
|
||||
## Using Docker Compose
|
||||
|
||||
The `docker-compose.metrics.yml` provides a complete observability stack.
|
||||
|
||||
**Prometheus mode:**
|
||||
|
||||
```bash
|
||||
METRICS_BACKEND=prometheus docker-compose -f docker-compose.metrics.yml up -d
|
||||
# Scrape at http://localhost:3003/metrics
|
||||
# Grafana at http://localhost:3000 (admin/admin)
|
||||
```
|
||||
|
||||
**OTel mode:**
|
||||
|
||||
```bash
|
||||
METRICS_BACKEND=otel OTEL_METRICS_ENDPOINT=otel-collector:4317 \
|
||||
docker-compose -f docker-compose.metrics.yml up -d
|
||||
```
|
||||
46
examples/otel-collector-config.yaml
Normal file
46
examples/otel-collector-config.yaml
Normal file
@@ -0,0 +1,46 @@
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: 0.0.0.0:4317
|
||||
http:
|
||||
endpoint: 0.0.0.0:4318
|
||||
|
||||
processors:
|
||||
batch:
|
||||
timeout: 10s
|
||||
send_batch_size: 1024
|
||||
|
||||
# Add resource attributes
|
||||
resource:
|
||||
attributes:
|
||||
- key: service.environment
|
||||
value: "development"
|
||||
action: insert
|
||||
|
||||
exporters:
|
||||
# Prometheus exporter for scraping
|
||||
prometheus:
|
||||
endpoint: "0.0.0.0:8889"
|
||||
namespace: "gerbil"
|
||||
send_timestamps: true
|
||||
metric_expiration: 5m
|
||||
resource_to_telemetry_conversion:
|
||||
enabled: true
|
||||
|
||||
# Prometheus remote write (optional)
|
||||
prometheusremotewrite:
|
||||
endpoint: "http://prometheus:9090/api/v1/write"
|
||||
tls:
|
||||
insecure: true
|
||||
|
||||
# Debug exporter for debugging
|
||||
debug:
|
||||
verbosity: normal
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [batch, resource]
|
||||
exporters: [prometheus, prometheusremotewrite, debug]
|
||||
24
examples/prometheus.yml
Normal file
24
examples/prometheus.yml
Normal file
@@ -0,0 +1,24 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
evaluation_interval: 15s
|
||||
external_labels:
|
||||
cluster: 'gerbil-dev'
|
||||
|
||||
scrape_configs:
|
||||
# Scrape Gerbil's /metrics endpoint directly
|
||||
- job_name: 'gerbil'
|
||||
static_configs:
|
||||
- targets: ['gerbil:3003']
|
||||
labels:
|
||||
service: 'gerbil'
|
||||
environment: 'development'
|
||||
|
||||
# Scrape OpenTelemetry Collector metrics
|
||||
- job_name: 'otel-collector'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:8888']
|
||||
labels:
|
||||
service: 'otel-collector'
|
||||
- targets: ['otel-collector:8889']
|
||||
labels:
|
||||
service: 'otel-collector-prometheus-exporter'
|
||||
45
go.mod
45
go.mod
@@ -1,23 +1,50 @@
|
||||
module github.com/fosrl/gerbil
|
||||
|
||||
go 1.23.1
|
||||
go 1.26.0
|
||||
|
||||
toolchain go1.23.2
|
||||
require (
|
||||
github.com/vishvananda/netlink v1.3.0
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible
|
||||
github.com/prometheus/client_golang v1.20.5
|
||||
github.com/vishvananda/netlink v1.3.1
|
||||
go.opentelemetry.io/otel v1.42.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0
|
||||
go.opentelemetry.io/otel/metric v1.42.0
|
||||
go.opentelemetry.io/otel/sdk v1.42.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.42.0
|
||||
golang.org/x/crypto v0.49.0
|
||||
golang.org/x/sync v0.20.0
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/google/go-cmp v0.5.9 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/google/go-cmp v0.7.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
|
||||
github.com/josharian/native v1.1.0 // indirect
|
||||
github.com/klauspost/compress v1.17.9 // indirect
|
||||
github.com/mdlayher/genetlink v1.3.2 // indirect
|
||||
github.com/mdlayher/netlink v1.7.2 // indirect
|
||||
github.com/mdlayher/socket v0.4.1 // indirect
|
||||
github.com/vishvananda/netns v0.0.4 // indirect
|
||||
golang.org/x/crypto v0.8.0 // indirect
|
||||
golang.org/x/net v0.9.0 // indirect
|
||||
golang.org/x/sync v0.1.0 // indirect
|
||||
golang.org/x/sys v0.10.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/prometheus/client_model v0.6.1 // indirect
|
||||
github.com/prometheus/common v0.61.0 // indirect
|
||||
github.com/prometheus/procfs v0.15.1 // indirect
|
||||
github.com/vishvananda/netns v0.0.5 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.42.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
|
||||
golang.org/x/net v0.51.0 // indirect
|
||||
golang.org/x/sys v0.42.0 // indirect
|
||||
golang.org/x/text v0.35.0 // indirect
|
||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||
google.golang.org/grpc v1.79.3 // indirect
|
||||
google.golang.org/protobuf v1.36.11 // indirect
|
||||
)
|
||||
|
||||
102
go.sum
102
go.sum
@@ -1,29 +1,101 @@
|
||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
|
||||
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
|
||||
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
|
||||
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
||||
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
|
||||
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
|
||||
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
|
||||
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
|
||||
github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U=
|
||||
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
|
||||
github.com/vishvananda/netlink v1.3.0 h1:X7l42GfcV4S6E4vHTsw48qbrV+9PVojNfIhZcwQdrZk=
|
||||
github.com/vishvananda/netlink v1.3.0/go.mod h1:i6NetklAujEcC6fK0JPjT8qSwWyO0HLn4UKG+hGqeJs=
|
||||
github.com/vishvananda/netns v0.0.4 h1:Oeaw1EM2JMxD51g9uhtC0D7erkIjgmj8+JZc26m1YX8=
|
||||
github.com/vishvananda/netns v0.0.4/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
golang.org/x/crypto v0.8.0 h1:pd9TJtTueMTVQXzk8E2XESSMQDj/U7OUu0PqJqPXQjQ=
|
||||
golang.org/x/crypto v0.8.0/go.mod h1:mRqEX+O9/h5TFCrQhkgjo2yKi0yYA+9ecGkdQoHrywE=
|
||||
golang.org/x/net v0.9.0 h1:aWJ/m6xSmxWBx+V0XRHTlrYrPG56jKsLdTFmsSsCzOM=
|
||||
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
|
||||
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
|
||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws=
|
||||
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
|
||||
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
|
||||
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||
github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ=
|
||||
github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s=
|
||||
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
|
||||
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
|
||||
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
|
||||
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
|
||||
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0 h1:MdKucPl/HbzckWWEisiNqMPhRrAOQX8r4jTuGr636gk=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0/go.mod h1:RolT8tWtfHcjajEH5wFIZ4Dgh5jpPdFXYV9pTAk/qjc=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0 h1:H7O6RlGOMTizyl3R08Kn5pdM06bnH8oscSj7o11tmLA=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0/go.mod h1:mBFWu/WOVDkWWsR7Tx7h6EpQB8wsv7P0Yrh0Pb7othc=
|
||||
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
|
||||
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
|
||||
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
|
||||
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
|
||||
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
|
||||
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
|
||||
go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
|
||||
go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
|
||||
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
|
||||
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
|
||||
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
||||
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.7.0 h1:3jlCCIQZPdOYu1h8BkNvLz8Kgwtae2cagcG/VamtZRU=
|
||||
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.10.0 h1:SqMFp9UcQJZa+pmYuAKjd9xq1f0j5rLcDIk0mj4qAsA=
|
||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
|
||||
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
|
||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx3wnr6L04cJFbWoceSK9JWBdglINo=
|
||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6 h1:CawjfCvYQH2OU3/TnxLx97WDSUDRABfT18pCOYwc2GE=
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6/go.mod h1:3rxYc4HtVcSG9gVaTs2GEBdehh+sYPOwKtyUWEOTb80=
|
||||
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
||||
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||
google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
|
||||
google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
|
||||
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
|
||||
506
internal/metrics/metrics.go
Normal file
506
internal/metrics/metrics.go
Normal file
@@ -0,0 +1,506 @@
|
||||
// Package metrics provides the application-level metrics facade for Gerbil.
|
||||
//
|
||||
// Application code (main, relay, proxy) uses only the Record* functions in this
|
||||
// package. The actual recording is delegated to the backend selected in
|
||||
// internal/observability. Neither Prometheus nor OTel packages are imported here.
|
||||
package metrics
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
// Config is the metrics configuration type. It is an alias for
|
||||
// observability.MetricsConfig so callers do not need to import observability.
|
||||
type Config = observability.MetricsConfig
|
||||
|
||||
// PrometheusConfig is re-exported for convenience.
|
||||
type PrometheusConfig = observability.PrometheusConfig
|
||||
|
||||
// OTelConfig is re-exported for convenience.
|
||||
type OTelConfig = observability.OTelConfig
|
||||
|
||||
var (
|
||||
backend observability.Backend
|
||||
|
||||
// Interface and peer metrics
|
||||
wgInterfaceUp observability.Int64Gauge
|
||||
wgPeersTotal observability.UpDownCounter
|
||||
wgPeerConnected observability.Int64Gauge
|
||||
wgHandshakesTotal observability.Counter
|
||||
wgHandshakeLatency observability.Histogram
|
||||
wgPeerRTT observability.Histogram
|
||||
wgBytesReceived observability.Counter
|
||||
wgBytesTransmitted observability.Counter
|
||||
allowedIPsCount observability.UpDownCounter
|
||||
keyRotationTotal observability.Counter
|
||||
|
||||
// System and proxy metrics
|
||||
netlinkEventsTotal observability.Counter
|
||||
netlinkErrorsTotal observability.Counter
|
||||
syncDuration observability.Histogram
|
||||
workqueueDepth observability.UpDownCounter
|
||||
kernelModuleLoads observability.Counter
|
||||
firewallRulesApplied observability.Counter
|
||||
activeSessions observability.UpDownCounter
|
||||
activeProxyConnections observability.UpDownCounter
|
||||
proxyRouteLookups observability.Counter
|
||||
proxyTLSHandshake observability.Histogram
|
||||
proxyBytesTransmitted observability.Counter
|
||||
|
||||
// UDP Relay / Proxy Metrics
|
||||
udpPacketsTotal observability.Counter
|
||||
udpPacketSizeBytes observability.Histogram
|
||||
holePunchEventsTotal observability.Counter
|
||||
proxyMappingActive observability.UpDownCounter
|
||||
sessionActive observability.UpDownCounter
|
||||
sessionRebuiltTotal observability.Counter
|
||||
commPatternActive observability.UpDownCounter
|
||||
proxyCleanupRemovedTotal observability.Counter
|
||||
proxyConnectionErrorsTotal observability.Counter
|
||||
proxyInitialMappingsTotal observability.Int64Gauge
|
||||
proxyMappingUpdatesTotal observability.Counter
|
||||
proxyIdleCleanupDuration observability.Histogram
|
||||
|
||||
// SNI Proxy Metrics
|
||||
sniConnectionsTotal observability.Counter
|
||||
sniConnectionDuration observability.Histogram
|
||||
sniActiveConnections observability.UpDownCounter
|
||||
sniRouteCacheHitsTotal observability.Counter
|
||||
sniRouteAPIRequestsTotal observability.Counter
|
||||
sniRouteAPILatency observability.Histogram
|
||||
sniLocalOverrideTotal observability.Counter
|
||||
sniTrustedProxyEventsTotal observability.Counter
|
||||
sniProxyProtocolParseErrorsTotal observability.Counter
|
||||
sniDataBytesTotal observability.Counter
|
||||
sniTunnelTerminationsTotal observability.Counter
|
||||
|
||||
// HTTP API & Peer Management Metrics
|
||||
httpRequestsTotal observability.Counter
|
||||
httpRequestDuration observability.Histogram
|
||||
peerOperationsTotal observability.Counter
|
||||
proxyMappingUpdateRequestsTotal observability.Counter
|
||||
destinationsUpdateRequestsTotal observability.Counter
|
||||
|
||||
// Remote Configuration, Reporting & Housekeeping
|
||||
remoteConfigFetchesTotal observability.Counter
|
||||
bandwidthReportsTotal observability.Counter
|
||||
peerBandwidthBytesTotal observability.Counter
|
||||
memorySpikeTotal observability.Counter
|
||||
heapProfilesWrittenTotal observability.Counter
|
||||
|
||||
// Operational metrics
|
||||
configReloadsTotal observability.Counter
|
||||
restartTotal observability.Counter
|
||||
authFailuresTotal observability.Counter
|
||||
aclDeniedTotal observability.Counter
|
||||
certificateExpiryDays observability.Float64Gauge
|
||||
)
|
||||
|
||||
// DefaultConfig returns a default metrics configuration.
|
||||
func DefaultConfig() Config {
|
||||
return observability.DefaultMetricsConfig()
|
||||
}
|
||||
|
||||
// Initialize sets up the metrics system using the selected backend.
|
||||
// It returns the /metrics HTTP handler (non-nil only for Prometheus backend).
|
||||
func Initialize(cfg Config) (http.Handler, error) {
|
||||
b, err := observability.New(cfg)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
backend = b
|
||||
|
||||
if err := createInstruments(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return backend.HTTPHandler(), nil
|
||||
}
|
||||
|
||||
// Shutdown gracefully shuts down the metrics backend.
|
||||
func Shutdown(ctx context.Context) error {
|
||||
if backend != nil {
|
||||
return backend.Shutdown(ctx)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func createInstruments() error {
|
||||
durationBuckets := []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}
|
||||
sizeBuckets := []float64{512, 1024, 4096, 16384, 65536, 262144, 1048576}
|
||||
sniDurationBuckets := []float64{0.1, 0.5, 1, 2.5, 5, 10, 30, 60, 120}
|
||||
|
||||
b := backend
|
||||
|
||||
wgInterfaceUp = b.NewInt64Gauge("gerbil_wg_interface_up",
|
||||
"Operational state of a WireGuard interface (1=up, 0=down)", "ifname", "instance")
|
||||
wgPeersTotal = b.NewUpDownCounter("gerbil_wg_peers_total",
|
||||
"Total number of configured peers per interface", "ifname")
|
||||
wgPeerConnected = b.NewInt64Gauge("gerbil_wg_peer_connected",
|
||||
"Whether a specific peer is connected (1=connected, 0=disconnected)", "ifname", "peer")
|
||||
allowedIPsCount = b.NewUpDownCounter("gerbil_allowed_ips_count",
|
||||
"Number of allowed IPs configured per peer", "ifname", "peer")
|
||||
keyRotationTotal = b.NewCounter("gerbil_key_rotation_total",
|
||||
"Key rotation events", "ifname", "reason")
|
||||
wgHandshakesTotal = b.NewCounter("gerbil_wg_handshakes_total",
|
||||
"Count of handshake attempts with their result status", "ifname", "peer", "result")
|
||||
wgHandshakeLatency = b.NewHistogram("gerbil_wg_handshake_latency_seconds",
|
||||
"Distribution of handshake latencies in seconds", durationBuckets, "ifname", "peer")
|
||||
wgPeerRTT = b.NewHistogram("gerbil_wg_peer_rtt_seconds",
|
||||
"Observed round-trip time to a peer in seconds", durationBuckets, "ifname", "peer")
|
||||
wgBytesReceived = b.NewCounter("gerbil_wg_bytes_received_total",
|
||||
"Number of bytes received from a peer", "ifname", "peer")
|
||||
wgBytesTransmitted = b.NewCounter("gerbil_wg_bytes_transmitted_total",
|
||||
"Number of bytes transmitted to a peer", "ifname", "peer")
|
||||
netlinkEventsTotal = b.NewCounter("gerbil_netlink_events_total",
|
||||
"Number of netlink events processed", "event_type")
|
||||
netlinkErrorsTotal = b.NewCounter("gerbil_netlink_errors_total",
|
||||
"Count of netlink or kernel errors", "component", "error_type")
|
||||
syncDuration = b.NewHistogram("gerbil_sync_duration_seconds",
|
||||
"Duration of reconciliation/sync loops in seconds", durationBuckets, "component")
|
||||
workqueueDepth = b.NewUpDownCounter("gerbil_workqueue_depth",
|
||||
"Current length of internal work queues", "queue")
|
||||
kernelModuleLoads = b.NewCounter("gerbil_kernel_module_loads_total",
|
||||
"Count of kernel module load attempts", "result")
|
||||
firewallRulesApplied = b.NewCounter("gerbil_firewall_rules_applied_total",
|
||||
"IPTables/NFT rules applied", "result", "chain")
|
||||
activeSessions = b.NewUpDownCounter("gerbil_active_sessions",
|
||||
"Number of active UDP relay sessions", "ifname")
|
||||
activeProxyConnections = b.NewUpDownCounter("gerbil_active_proxy_connections",
|
||||
"Active SNI proxy connections")
|
||||
proxyRouteLookups = b.NewCounter("gerbil_proxy_route_lookups_total",
|
||||
"Number of route lookups", "result")
|
||||
proxyTLSHandshake = b.NewHistogram("gerbil_proxy_tls_handshake_seconds",
|
||||
"TLS handshake duration for SNI proxy in seconds", durationBuckets)
|
||||
proxyBytesTransmitted = b.NewCounter("gerbil_proxy_bytes_transmitted_total",
|
||||
"Bytes sent/received by the SNI proxy", "direction")
|
||||
configReloadsTotal = b.NewCounter("gerbil_config_reloads_total",
|
||||
"Number of configuration reloads", "result")
|
||||
restartTotal = b.NewCounter("gerbil_restart_total",
|
||||
"Process restart count")
|
||||
authFailuresTotal = b.NewCounter("gerbil_auth_failures_total",
|
||||
"Count of authentication or peer validation failures", "peer", "reason")
|
||||
aclDeniedTotal = b.NewCounter("gerbil_acl_denied_total",
|
||||
"Access control denied events", "ifname", "peer", "policy")
|
||||
certificateExpiryDays = b.NewFloat64Gauge("gerbil_certificate_expiry_days",
|
||||
"Days until certificate expiry", "cert_name", "ifname")
|
||||
udpPacketsTotal = b.NewCounter("gerbil_udp_packets_total",
|
||||
"Count of UDP packets processed by relay workers", "ifname", "type", "direction")
|
||||
udpPacketSizeBytes = b.NewHistogram("gerbil_udp_packet_size_bytes",
|
||||
"Size distribution of packets forwarded through relay", sizeBuckets, "ifname", "type")
|
||||
holePunchEventsTotal = b.NewCounter("gerbil_hole_punch_events_total",
|
||||
"Count of hole punch messages processed", "ifname", "result")
|
||||
proxyMappingActive = b.NewUpDownCounter("gerbil_proxy_mapping_active",
|
||||
"Number of active proxy mappings", "ifname")
|
||||
sessionActive = b.NewUpDownCounter("gerbil_session_active",
|
||||
"Number of active WireGuard sessions", "ifname")
|
||||
sessionRebuiltTotal = b.NewCounter("gerbil_session_rebuilt_total",
|
||||
"Count of sessions rebuilt from communication patterns", "ifname")
|
||||
commPatternActive = b.NewUpDownCounter("gerbil_comm_pattern_active",
|
||||
"Number of active communication patterns", "ifname")
|
||||
proxyCleanupRemovedTotal = b.NewCounter("gerbil_proxy_cleanup_removed_total",
|
||||
"Count of items removed during cleanup routines", "ifname", "component")
|
||||
proxyConnectionErrorsTotal = b.NewCounter("gerbil_proxy_connection_errors_total",
|
||||
"Count of connection errors in proxy operations", "ifname", "error_type")
|
||||
proxyInitialMappingsTotal = b.NewInt64Gauge("gerbil_proxy_initial_mappings",
|
||||
"Number of initial proxy mappings loaded", "ifname")
|
||||
proxyMappingUpdatesTotal = b.NewCounter("gerbil_proxy_mapping_updates_total",
|
||||
"Count of proxy mapping updates", "ifname")
|
||||
proxyIdleCleanupDuration = b.NewHistogram("gerbil_proxy_idle_cleanup_duration_seconds",
|
||||
"Duration of cleanup cycles", durationBuckets, "ifname", "component")
|
||||
sniConnectionsTotal = b.NewCounter("gerbil_sni_connections_total",
|
||||
"Count of connections processed by SNI proxy", "result")
|
||||
sniConnectionDuration = b.NewHistogram("gerbil_sni_connection_duration_seconds",
|
||||
"Lifetime distribution of proxied TLS connections", sniDurationBuckets)
|
||||
sniActiveConnections = b.NewUpDownCounter("gerbil_sni_active_connections",
|
||||
"Number of active SNI tunnels")
|
||||
sniRouteCacheHitsTotal = b.NewCounter("gerbil_sni_route_cache_hits_total",
|
||||
"Count of route cache hits and misses", "result")
|
||||
sniRouteAPIRequestsTotal = b.NewCounter("gerbil_sni_route_api_requests_total",
|
||||
"Count of route API requests", "result")
|
||||
sniRouteAPILatency = b.NewHistogram("gerbil_sni_route_api_latency_seconds",
|
||||
"Distribution of route API call latencies", durationBuckets)
|
||||
sniLocalOverrideTotal = b.NewCounter("gerbil_sni_local_override_total",
|
||||
"Count of routes using local overrides", "hit")
|
||||
sniTrustedProxyEventsTotal = b.NewCounter("gerbil_sni_trusted_proxy_events_total",
|
||||
"Count of PROXY protocol events", "event")
|
||||
sniProxyProtocolParseErrorsTotal = b.NewCounter("gerbil_sni_proxy_protocol_parse_errors_total",
|
||||
"Count of PROXY protocol parse failures")
|
||||
sniDataBytesTotal = b.NewCounter("gerbil_sni_data_bytes_total",
|
||||
"Count of bytes proxied through SNI tunnels", "direction")
|
||||
sniTunnelTerminationsTotal = b.NewCounter("gerbil_sni_tunnel_terminations_total",
|
||||
"Count of tunnel terminations by reason", "reason")
|
||||
httpRequestsTotal = b.NewCounter("gerbil_http_requests_total",
|
||||
"Count of HTTP requests to management API", "endpoint", "method", "status_code")
|
||||
httpRequestDuration = b.NewHistogram("gerbil_http_request_duration_seconds",
|
||||
"Distribution of HTTP request handling time", durationBuckets, "endpoint", "method")
|
||||
peerOperationsTotal = b.NewCounter("gerbil_peer_operations_total",
|
||||
"Count of peer lifecycle operations", "operation", "result")
|
||||
proxyMappingUpdateRequestsTotal = b.NewCounter("gerbil_proxy_mapping_update_requests_total",
|
||||
"Count of proxy mapping update API calls", "result")
|
||||
destinationsUpdateRequestsTotal = b.NewCounter("gerbil_destinations_update_requests_total",
|
||||
"Count of destinations update API calls", "result")
|
||||
remoteConfigFetchesTotal = b.NewCounter("gerbil_remote_config_fetches_total",
|
||||
"Count of remote configuration fetch attempts", "result")
|
||||
bandwidthReportsTotal = b.NewCounter("gerbil_bandwidth_reports_total",
|
||||
"Count of bandwidth report transmissions", "result")
|
||||
peerBandwidthBytesTotal = b.NewCounter("gerbil_peer_bandwidth_bytes_total",
|
||||
"Bytes per peer tracked by bandwidth calculation", "peer", "direction")
|
||||
memorySpikeTotal = b.NewCounter("gerbil_memory_spike_total",
|
||||
"Count of memory spikes detected", "severity")
|
||||
heapProfilesWrittenTotal = b.NewCounter("gerbil_heap_profiles_written_total",
|
||||
"Count of heap profile files generated")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func RecordInterfaceUp(ifname, instance string, up bool) {
|
||||
value := int64(0)
|
||||
if up {
|
||||
value = 1
|
||||
}
|
||||
wgInterfaceUp.Record(context.Background(), value, observability.Labels{"ifname": ifname, "instance": instance})
|
||||
}
|
||||
|
||||
func RecordPeersTotal(ifname string, delta int64) {
|
||||
wgPeersTotal.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordPeerConnected(ifname, peer string, connected bool) {
|
||||
value := int64(0)
|
||||
if connected {
|
||||
value = 1
|
||||
}
|
||||
wgPeerConnected.Record(context.Background(), value, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordHandshake(ifname, peer, result string) {
|
||||
wgHandshakesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "result": result})
|
||||
}
|
||||
|
||||
func RecordHandshakeLatency(ifname, peer string, seconds float64) {
|
||||
wgHandshakeLatency.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordPeerRTT(ifname, peer string, seconds float64) {
|
||||
wgPeerRTT.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordBytesReceived(ifname, peer string, bytes int64) {
|
||||
wgBytesReceived.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordBytesTransmitted(ifname, peer string, bytes int64) {
|
||||
wgBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordAllowedIPsCount(ifname, peer string, delta int64) {
|
||||
allowedIPsCount.Add(context.Background(), delta, observability.Labels{"ifname": ifname, "peer": peer})
|
||||
}
|
||||
|
||||
func RecordKeyRotation(ifname, reason string) {
|
||||
keyRotationTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "reason": reason})
|
||||
}
|
||||
|
||||
func RecordNetlinkEvent(eventType string) {
|
||||
netlinkEventsTotal.Add(context.Background(), 1, observability.Labels{"event_type": eventType})
|
||||
}
|
||||
|
||||
func RecordNetlinkError(component, errorType string) {
|
||||
netlinkErrorsTotal.Add(context.Background(), 1, observability.Labels{"component": component, "error_type": errorType})
|
||||
}
|
||||
|
||||
func RecordSyncDuration(component string, seconds float64) {
|
||||
syncDuration.Record(context.Background(), seconds, observability.Labels{"component": component})
|
||||
}
|
||||
|
||||
func RecordWorkqueueDepth(queue string, delta int64) {
|
||||
workqueueDepth.Add(context.Background(), delta, observability.Labels{"queue": queue})
|
||||
}
|
||||
|
||||
func RecordKernelModuleLoad(result string) {
|
||||
kernelModuleLoads.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordFirewallRuleApplied(result, chain string) {
|
||||
firewallRulesApplied.Add(context.Background(), 1, observability.Labels{"result": result, "chain": chain})
|
||||
}
|
||||
|
||||
func RecordActiveSession(ifname string, delta int64) {
|
||||
activeSessions.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordActiveProxyConnection(hostname string, delta int64) {
|
||||
_ = hostname
|
||||
activeProxyConnections.Add(context.Background(), delta, nil)
|
||||
}
|
||||
|
||||
func RecordProxyRouteLookup(result, hostname string) {
|
||||
_ = hostname
|
||||
proxyRouteLookups.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordProxyTLSHandshake(hostname string, seconds float64) {
|
||||
_ = hostname
|
||||
proxyTLSHandshake.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordProxyBytesTransmitted(hostname, direction string, bytes int64) {
|
||||
_ = hostname
|
||||
proxyBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||
}
|
||||
|
||||
func RecordConfigReload(result string) {
|
||||
configReloadsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordRestart() {
|
||||
restartTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func RecordAuthFailure(peer, reason string) {
|
||||
authFailuresTotal.Add(context.Background(), 1, observability.Labels{"peer": peer, "reason": reason})
|
||||
}
|
||||
|
||||
func RecordACLDenied(ifname, peer, policy string) {
|
||||
aclDeniedTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "policy": policy})
|
||||
}
|
||||
|
||||
func RecordCertificateExpiry(certName, ifname string, days float64) {
|
||||
certificateExpiryDays.Record(context.Background(), days, observability.Labels{"cert_name": certName, "ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordUDPPacket(ifname, packetType, direction string) {
|
||||
udpPacketsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "type": packetType, "direction": direction})
|
||||
}
|
||||
|
||||
func RecordUDPPacketSize(ifname, packetType string, bytes float64) {
|
||||
udpPacketSizeBytes.Record(context.Background(), bytes, observability.Labels{"ifname": ifname, "type": packetType})
|
||||
}
|
||||
|
||||
func RecordHolePunchEvent(ifname, result string) {
|
||||
holePunchEventsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "result": result})
|
||||
}
|
||||
|
||||
func RecordProxyMapping(ifname string, delta int64) {
|
||||
proxyMappingActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordSession(ifname string, delta int64) {
|
||||
sessionActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordSessionRebuilt(ifname string) {
|
||||
sessionRebuiltTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordCommPattern(ifname string, delta int64) {
|
||||
commPatternActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyCleanupRemoved(ifname, component string, count int64) {
|
||||
proxyCleanupRemovedTotal.Add(context.Background(), count, observability.Labels{"ifname": ifname, "component": component})
|
||||
}
|
||||
|
||||
func RecordProxyConnectionError(ifname, errorType string) {
|
||||
proxyConnectionErrorsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "error_type": errorType})
|
||||
}
|
||||
|
||||
func RecordProxyInitialMappings(ifname string, count int64) {
|
||||
proxyInitialMappingsTotal.Record(context.Background(), count, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyMappingUpdate(ifname string) {
|
||||
proxyMappingUpdatesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||
}
|
||||
|
||||
func RecordProxyIdleCleanupDuration(ifname, component string, seconds float64) {
|
||||
proxyIdleCleanupDuration.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "component": component})
|
||||
}
|
||||
|
||||
func RecordSNIConnection(result string) {
|
||||
sniConnectionsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIConnectionDuration(seconds float64) {
|
||||
sniConnectionDuration.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordSNIActiveConnection(delta int64) {
|
||||
sniActiveConnections.Add(context.Background(), delta, nil)
|
||||
}
|
||||
|
||||
func RecordSNIRouteCacheHit(result string) {
|
||||
sniRouteCacheHitsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIRouteAPIRequest(result string) {
|
||||
sniRouteAPIRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordSNIRouteAPILatency(seconds float64) {
|
||||
sniRouteAPILatency.Record(context.Background(), seconds, nil)
|
||||
}
|
||||
|
||||
func RecordSNILocalOverride(hit string) {
|
||||
sniLocalOverrideTotal.Add(context.Background(), 1, observability.Labels{"hit": hit})
|
||||
}
|
||||
|
||||
func RecordSNITrustedProxyEvent(event string) {
|
||||
sniTrustedProxyEventsTotal.Add(context.Background(), 1, observability.Labels{"event": event})
|
||||
}
|
||||
|
||||
func RecordSNIProxyProtocolParseError() {
|
||||
sniProxyProtocolParseErrorsTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func RecordSNIDataBytes(direction string, bytes int64) {
|
||||
sniDataBytesTotal.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||
}
|
||||
|
||||
func RecordSNITunnelTermination(reason string) {
|
||||
sniTunnelTerminationsTotal.Add(context.Background(), 1, observability.Labels{"reason": reason})
|
||||
}
|
||||
|
||||
func RecordHTTPRequest(endpoint, method, statusCode string) {
|
||||
httpRequestsTotal.Add(context.Background(), 1, observability.Labels{"endpoint": endpoint, "method": method, "status_code": statusCode})
|
||||
}
|
||||
|
||||
func RecordHTTPRequestDuration(endpoint, method string, seconds float64) {
|
||||
httpRequestDuration.Record(context.Background(), seconds, observability.Labels{"endpoint": endpoint, "method": method})
|
||||
}
|
||||
|
||||
func RecordPeerOperation(operation, result string) {
|
||||
peerOperationsTotal.Add(context.Background(), 1, observability.Labels{"operation": operation, "result": result})
|
||||
}
|
||||
|
||||
func RecordProxyMappingUpdateRequest(result string) {
|
||||
proxyMappingUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordDestinationsUpdateRequest(result string) {
|
||||
destinationsUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordRemoteConfigFetch(result string) {
|
||||
remoteConfigFetchesTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordBandwidthReport(result string) {
|
||||
bandwidthReportsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||
}
|
||||
|
||||
func RecordPeerBandwidthBytes(peer, direction string, bytes int64) {
|
||||
peerBandwidthBytesTotal.Add(context.Background(), bytes, observability.Labels{"peer": peer, "direction": direction})
|
||||
}
|
||||
|
||||
func RecordMemorySpike(severity string) {
|
||||
memorySpikeTotal.Add(context.Background(), 1, observability.Labels{"severity": severity})
|
||||
}
|
||||
|
||||
func RecordHeapProfileWritten() {
|
||||
heapProfilesWrittenTotal.Add(context.Background(), 1, nil)
|
||||
}
|
||||
258
internal/metrics/metrics_test.go
Normal file
258
internal/metrics/metrics_test.go
Normal file
@@ -0,0 +1,258 @@
|
||||
package metrics_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/metrics"
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
const exampleHostname = "example.com"
|
||||
|
||||
func initPrometheus(t *testing.T) http.Handler {
|
||||
t.Helper()
|
||||
cfg := metrics.DefaultConfig()
|
||||
cfg.Enabled = true
|
||||
cfg.Backend = "prometheus"
|
||||
cfg.Prometheus.Path = "/metrics"
|
||||
|
||||
h, err := metrics.Initialize(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("Initialize failed: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||
})
|
||||
return h
|
||||
}
|
||||
|
||||
func initNoop(t *testing.T) {
|
||||
t.Helper()
|
||||
cfg := metrics.DefaultConfig()
|
||||
cfg.Enabled = false
|
||||
_, err := metrics.Initialize(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("Initialize noop failed: %v", err)
|
||||
}
|
||||
t.Cleanup(func() {
|
||||
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||
})
|
||||
}
|
||||
|
||||
func scrape(t *testing.T, h http.Handler) string {
|
||||
t.Helper()
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
|
||||
rr := httptest.NewRecorder()
|
||||
h.ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("scrape returned %d", rr.Code)
|
||||
}
|
||||
b, _ := io.ReadAll(rr.Body)
|
||||
return string(b)
|
||||
}
|
||||
|
||||
func assertContains(t *testing.T, body, substr string) {
|
||||
t.Helper()
|
||||
if !strings.Contains(body, substr) {
|
||||
t.Errorf("expected %q in output\nbody:\n%s", substr, body)
|
||||
}
|
||||
}
|
||||
|
||||
// --- Tests ---
|
||||
|
||||
func TestInitializePrometheus(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
if h == nil {
|
||||
t.Error("expected non-nil HTTP handler for prometheus backend")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeNoop(t *testing.T) {
|
||||
initNoop(t)
|
||||
// All Record* functions must not panic when noop backend is active.
|
||||
metrics.RecordRestart()
|
||||
metrics.RecordHTTPRequest("/test", "GET", "200")
|
||||
metrics.RecordSNIConnection("accepted")
|
||||
metrics.RecordPeersTotal("wg0", 1)
|
||||
}
|
||||
|
||||
func TestDefaultConfig(t *testing.T) {
|
||||
cfg := metrics.DefaultConfig()
|
||||
if cfg.Backend != "prometheus" {
|
||||
t.Errorf("expected prometheus default backend, got %q", cfg.Backend)
|
||||
}
|
||||
}
|
||||
|
||||
func TestShutdownNoInit(t *testing.T) {
|
||||
// Shutdown without Initialize should not panic or error.
|
||||
if err := metrics.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("unexpected error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestRecordHTTPRequest(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordHTTPRequest("/peers", "POST", "201")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_http_requests_total")
|
||||
}
|
||||
|
||||
func TestRecordHTTPRequestDuration(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordHTTPRequestDuration("/peers", "POST", 0.05)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_http_request_duration_seconds")
|
||||
}
|
||||
|
||||
func TestRecordInterfaceUp(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordInterfaceUp("wg0", "host1", true)
|
||||
metrics.RecordInterfaceUp("wg0", "host1", false)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_interface_up")
|
||||
}
|
||||
|
||||
func TestRecordPeersTotal(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordPeersTotal("wg0", 3)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_peers_total")
|
||||
}
|
||||
|
||||
func TestRecordBytesReceivedTransmitted(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordBytesReceived("wg0", "peer1", 1024)
|
||||
metrics.RecordBytesTransmitted("wg0", "peer1", 512)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_bytes_received_total")
|
||||
assertContains(t, body, "gerbil_wg_bytes_transmitted_total")
|
||||
}
|
||||
|
||||
func TestRecordSNI(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordSNIConnection("accepted")
|
||||
metrics.RecordSNIActiveConnection(1)
|
||||
metrics.RecordSNIConnectionDuration(1.5)
|
||||
metrics.RecordSNIRouteCacheHit("hit")
|
||||
metrics.RecordSNIRouteAPIRequest("success")
|
||||
metrics.RecordSNIRouteAPILatency(0.01)
|
||||
metrics.RecordSNILocalOverride("yes")
|
||||
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
|
||||
metrics.RecordSNIProxyProtocolParseError()
|
||||
metrics.RecordSNIDataBytes("client_to_target", 2048)
|
||||
metrics.RecordSNITunnelTermination("eof")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_sni_connections_total")
|
||||
assertContains(t, body, "gerbil_sni_active_connections")
|
||||
}
|
||||
|
||||
func TestRecordRelay(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordUDPPacket("relay", "data", "in")
|
||||
metrics.RecordUDPPacketSize("relay", "data", 256)
|
||||
metrics.RecordHolePunchEvent("relay", "success")
|
||||
metrics.RecordProxyMapping("relay", 1)
|
||||
metrics.RecordSession("relay", 1)
|
||||
metrics.RecordSessionRebuilt("relay")
|
||||
metrics.RecordCommPattern("relay", 1)
|
||||
metrics.RecordProxyCleanupRemoved("relay", "session", 2)
|
||||
metrics.RecordProxyConnectionError("relay", "dial_udp")
|
||||
metrics.RecordProxyInitialMappings("relay", 5)
|
||||
metrics.RecordProxyMappingUpdate("relay")
|
||||
metrics.RecordProxyIdleCleanupDuration("relay", "conn", 0.1)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_udp_packets_total")
|
||||
assertContains(t, body, "gerbil_proxy_mapping_active")
|
||||
}
|
||||
|
||||
func TestRecordWireGuard(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordHandshake("wg0", "peer1", "success")
|
||||
metrics.RecordHandshakeLatency("wg0", "peer1", 0.02)
|
||||
metrics.RecordPeerRTT("wg0", "peer1", 0.005)
|
||||
metrics.RecordPeerConnected("wg0", "peer1", true)
|
||||
metrics.RecordAllowedIPsCount("wg0", "peer1", 2)
|
||||
metrics.RecordKeyRotation("wg0", "scheduled")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_wg_handshakes_total")
|
||||
assertContains(t, body, "gerbil_wg_peer_connected")
|
||||
}
|
||||
|
||||
func TestRecordHousekeeping(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordRemoteConfigFetch("success")
|
||||
metrics.RecordBandwidthReport("success")
|
||||
metrics.RecordPeerBandwidthBytes("peer1", "rx", 512)
|
||||
metrics.RecordMemorySpike("warning")
|
||||
metrics.RecordHeapProfileWritten()
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_remote_config_fetches_total")
|
||||
assertContains(t, body, "gerbil_memory_spike_total")
|
||||
}
|
||||
|
||||
func TestRecordOperational(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordConfigReload("success")
|
||||
metrics.RecordRestart()
|
||||
metrics.RecordAuthFailure("peer1", "bad_key")
|
||||
metrics.RecordACLDenied("wg0", "peer1", "default-deny")
|
||||
metrics.RecordCertificateExpiry(exampleHostname, "wg0", 90.0)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_config_reloads_total")
|
||||
assertContains(t, body, "gerbil_restart_total")
|
||||
}
|
||||
|
||||
func TestRecordNetlink(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordNetlinkEvent("link_up")
|
||||
metrics.RecordNetlinkError("wg", "timeout")
|
||||
metrics.RecordSyncDuration("config", 0.1)
|
||||
metrics.RecordWorkqueueDepth("main", 3)
|
||||
metrics.RecordKernelModuleLoad("success")
|
||||
metrics.RecordFirewallRuleApplied("success", "INPUT")
|
||||
metrics.RecordActiveSession("wg0", 1)
|
||||
metrics.RecordActiveProxyConnection(exampleHostname, 1)
|
||||
metrics.RecordProxyRouteLookup("hit", exampleHostname)
|
||||
metrics.RecordProxyTLSHandshake(exampleHostname, 0.05)
|
||||
metrics.RecordProxyBytesTransmitted(exampleHostname, "tx", 1024)
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_netlink_events_total")
|
||||
assertContains(t, body, "gerbil_active_sessions")
|
||||
}
|
||||
|
||||
func TestRecordPeerOperation(t *testing.T) {
|
||||
h := initPrometheus(t)
|
||||
metrics.RecordPeerOperation("add", "success")
|
||||
metrics.RecordProxyMappingUpdateRequest("success")
|
||||
metrics.RecordDestinationsUpdateRequest("success")
|
||||
body := scrape(t, h)
|
||||
assertContains(t, body, "gerbil_peer_operations_total")
|
||||
}
|
||||
|
||||
func TestInitializeInvalidBackend(t *testing.T) {
|
||||
cfg := observability.MetricsConfig{Enabled: true, Backend: "invalid"}
|
||||
_, err := metrics.Initialize(cfg)
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid backend")
|
||||
}
|
||||
}
|
||||
|
||||
func TestInitializeBackendNone(t *testing.T) {
|
||||
cfg := metrics.DefaultConfig()
|
||||
cfg.Backend = "none"
|
||||
h, err := metrics.Initialize(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
if h != nil {
|
||||
t.Error("none backend should return nil handler")
|
||||
}
|
||||
// All Record* calls should be noop
|
||||
metrics.RecordRestart()
|
||||
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||
}
|
||||
119
internal/observability/config.go
Normal file
119
internal/observability/config.go
Normal file
@@ -0,0 +1,119 @@
|
||||
// Package observability provides a backend-neutral metrics abstraction for Gerbil.
|
||||
//
|
||||
// Exactly one metrics backend may be enabled at runtime:
|
||||
// - "prometheus" – native Prometheus client; exposes /metrics (no OTel SDK required)
|
||||
// - "otel" – OpenTelemetry metrics pushed via OTLP (gRPC or HTTP)
|
||||
// - "none" – metrics disabled; a safe noop implementation is used
|
||||
//
|
||||
// Future OTel tracing and logging can be added to this package alongside the
|
||||
// existing otel sub-package without touching the Prometheus-native path.
|
||||
package observability
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"time"
|
||||
)
|
||||
|
||||
// MetricsConfig is the top-level metrics configuration.
|
||||
type MetricsConfig struct {
|
||||
// Enabled controls whether any metrics backend is started.
|
||||
// When false the noop backend is used regardless of Backend.
|
||||
Enabled bool
|
||||
|
||||
// Backend selects the active backend: "prometheus", "otel", or "none".
|
||||
Backend string
|
||||
|
||||
// Prometheus holds settings used only by the Prometheus-native backend.
|
||||
Prometheus PrometheusConfig
|
||||
|
||||
// OTel holds settings used only by the OTel backend.
|
||||
OTel OTelConfig
|
||||
|
||||
// ServiceName is propagated to OTel resource attributes.
|
||||
ServiceName string
|
||||
|
||||
// ServiceVersion is propagated to OTel resource attributes.
|
||||
ServiceVersion string
|
||||
|
||||
// DeploymentEnvironment is an optional OTel resource attribute.
|
||||
DeploymentEnvironment string
|
||||
}
|
||||
|
||||
// PrometheusConfig holds Prometheus-native backend settings.
|
||||
type PrometheusConfig struct {
|
||||
// Path is the HTTP path to expose the /metrics endpoint.
|
||||
// Defaults to "/metrics".
|
||||
Path string
|
||||
}
|
||||
|
||||
// OTelConfig holds OpenTelemetry backend settings.
|
||||
type OTelConfig struct {
|
||||
// Protocol is the OTLP transport: "grpc" (default) or "http".
|
||||
Protocol string
|
||||
|
||||
// Endpoint is the OTLP collector address (e.g. "localhost:4317").
|
||||
Endpoint string
|
||||
|
||||
// Insecure disables TLS for the OTLP connection.
|
||||
Insecure bool
|
||||
|
||||
// ExportInterval is how often metrics are pushed to the collector.
|
||||
// Defaults to 60 s.
|
||||
ExportInterval time.Duration
|
||||
}
|
||||
|
||||
// DefaultMetricsConfig returns a MetricsConfig with sensible defaults.
|
||||
func DefaultMetricsConfig() MetricsConfig {
|
||||
return MetricsConfig{
|
||||
Enabled: true,
|
||||
Backend: "prometheus",
|
||||
Prometheus: PrometheusConfig{
|
||||
Path: "/metrics",
|
||||
},
|
||||
OTel: OTelConfig{
|
||||
Protocol: "grpc",
|
||||
Endpoint: "localhost:4317",
|
||||
Insecure: true,
|
||||
ExportInterval: 60 * time.Second,
|
||||
},
|
||||
ServiceName: "gerbil",
|
||||
ServiceVersion: "1.0.0",
|
||||
}
|
||||
}
|
||||
|
||||
// Validate checks the configuration for logical errors.
|
||||
func (c *MetricsConfig) Validate() error {
|
||||
if !c.Enabled {
|
||||
return nil
|
||||
}
|
||||
|
||||
switch c.Backend {
|
||||
case "prometheus", "none", "":
|
||||
// valid
|
||||
case "otel":
|
||||
if c.OTel.Endpoint == "" {
|
||||
return fmt.Errorf("metrics: backend=otel requires a non-empty OTel endpoint")
|
||||
}
|
||||
if c.OTel.Protocol != "grpc" && c.OTel.Protocol != "http" {
|
||||
return fmt.Errorf("metrics: otel protocol must be \"grpc\" or \"http\", got %q", c.OTel.Protocol)
|
||||
}
|
||||
if c.OTel.ExportInterval <= 0 {
|
||||
return fmt.Errorf("metrics: otel export interval must be positive")
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("metrics: unknown backend %q (must be \"prometheus\", \"otel\", or \"none\")", c.Backend)
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// effectiveBackend resolves the backend string, treating "" and "none" as noop.
|
||||
func (c *MetricsConfig) effectiveBackend() string {
|
||||
if !c.Enabled {
|
||||
return "none"
|
||||
}
|
||||
if c.Backend == "" {
|
||||
return "none"
|
||||
}
|
||||
return c.Backend
|
||||
}
|
||||
152
internal/observability/metrics.go
Normal file
152
internal/observability/metrics.go
Normal file
@@ -0,0 +1,152 @@
|
||||
package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
|
||||
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
|
||||
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
|
||||
)
|
||||
|
||||
// Labels is a set of key-value pairs attached to a metric observation.
|
||||
// Use only stable, bounded-cardinality label values.
|
||||
type Labels = map[string]string
|
||||
|
||||
// Counter is a monotonically increasing instrument.
|
||||
type Counter interface {
|
||||
Add(ctx context.Context, value int64, labels Labels)
|
||||
}
|
||||
|
||||
// UpDownCounter is a bidirectional integer instrument (can go up or down).
|
||||
type UpDownCounter interface {
|
||||
Add(ctx context.Context, value int64, labels Labels)
|
||||
}
|
||||
|
||||
// Int64Gauge records a snapshot integer value.
|
||||
type Int64Gauge interface {
|
||||
Record(ctx context.Context, value int64, labels Labels)
|
||||
}
|
||||
|
||||
// Float64Gauge records a snapshot float value.
|
||||
type Float64Gauge interface {
|
||||
Record(ctx context.Context, value float64, labels Labels)
|
||||
}
|
||||
|
||||
// Histogram records a distribution of values.
|
||||
type Histogram interface {
|
||||
Record(ctx context.Context, value float64, labels Labels)
|
||||
}
|
||||
|
||||
// Backend is the single interface that each metrics implementation must satisfy.
|
||||
// Application code must not import backend-specific packages (prometheus, otel).
|
||||
type Backend interface {
|
||||
// NewCounter creates a counter metric.
|
||||
// labelNames declares the set of label keys that will be passed at observation time.
|
||||
NewCounter(name, desc string, labelNames ...string) Counter
|
||||
|
||||
// NewUpDownCounter creates an up-down counter metric.
|
||||
NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter
|
||||
|
||||
// NewInt64Gauge creates an integer gauge metric.
|
||||
NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge
|
||||
|
||||
// NewFloat64Gauge creates a float gauge metric.
|
||||
NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge
|
||||
|
||||
// NewHistogram creates a histogram metric.
|
||||
// buckets are the explicit upper-bound bucket boundaries.
|
||||
NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram
|
||||
|
||||
// HTTPHandler returns the /metrics HTTP handler.
|
||||
// Implementations that do not expose an HTTP endpoint return nil.
|
||||
HTTPHandler() http.Handler
|
||||
|
||||
// Shutdown performs a graceful flush / shutdown of the backend.
|
||||
Shutdown(ctx context.Context) error
|
||||
}
|
||||
|
||||
// New creates the backend selected by cfg and returns it.
|
||||
// Exactly one backend is created; the selection is mutually exclusive.
|
||||
func New(cfg MetricsConfig) (Backend, error) {
|
||||
if err := cfg.Validate(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
switch cfg.effectiveBackend() {
|
||||
case "prometheus":
|
||||
b, err := obsprom.New(obsprom.Config{
|
||||
Path: cfg.Prometheus.Path,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &promAdapter{b: b}, nil
|
||||
case "otel":
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: cfg.OTel.Protocol,
|
||||
Endpoint: cfg.OTel.Endpoint,
|
||||
Insecure: cfg.OTel.Insecure,
|
||||
ExportInterval: cfg.OTel.ExportInterval,
|
||||
ServiceName: cfg.ServiceName,
|
||||
ServiceVersion: cfg.ServiceVersion,
|
||||
DeploymentEnvironment: cfg.DeploymentEnvironment,
|
||||
})
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
return &otelAdapter{b: b}, nil
|
||||
case "none":
|
||||
return &NoopBackend{}, nil
|
||||
default:
|
||||
return nil, fmt.Errorf("observability: unknown backend %q", cfg.effectiveBackend())
|
||||
}
|
||||
}
|
||||
|
||||
// promAdapter wraps obsprom.Backend to implement the observability.Backend interface.
|
||||
// The concrete instrument types from the prometheus sub-package satisfy the instrument
|
||||
// interfaces via Go's structural (duck) typing without importing this package.
|
||||
type promAdapter struct {
|
||||
b *obsprom.Backend
|
||||
}
|
||||
|
||||
func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||
return a.b.NewCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
}
|
||||
func (a *promAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||
func (a *promAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
|
||||
|
||||
// otelAdapter wraps obsotel.Backend to implement the observability.Backend interface.
|
||||
type otelAdapter struct {
|
||||
b *obsotel.Backend
|
||||
}
|
||||
|
||||
func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||
return a.b.NewCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||
}
|
||||
func (a *otelAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||
func (a *otelAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
|
||||
198
internal/observability/metrics_test.go
Normal file
198
internal/observability/metrics_test.go
Normal file
@@ -0,0 +1,198 @@
|
||||
package observability_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultMetricsPath = "/metrics"
|
||||
otelGRPCEndpoint = "localhost:4317"
|
||||
errUnexpectedFmt = "unexpected error: %v"
|
||||
)
|
||||
|
||||
func TestDefaultMetricsConfig(t *testing.T) {
|
||||
cfg := observability.DefaultMetricsConfig()
|
||||
if !cfg.Enabled {
|
||||
t.Error("default config should have Enabled=true")
|
||||
}
|
||||
if cfg.Backend != "prometheus" {
|
||||
t.Errorf("default backend should be prometheus, got %q", cfg.Backend)
|
||||
}
|
||||
if cfg.Prometheus.Path != defaultMetricsPath {
|
||||
t.Errorf("default prometheus path should be %s, got %q", defaultMetricsPath, cfg.Prometheus.Path)
|
||||
}
|
||||
if cfg.OTel.Protocol != "grpc" {
|
||||
t.Errorf("default otel protocol should be grpc, got %q", cfg.OTel.Protocol)
|
||||
}
|
||||
if cfg.OTel.ExportInterval != 60*time.Second {
|
||||
t.Errorf("default otel export interval should be 60s, got %v", cfg.OTel.ExportInterval)
|
||||
}
|
||||
}
|
||||
func TestValidateValidConfigs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg observability.MetricsConfig
|
||||
}{
|
||||
{name: "disabled", cfg: observability.MetricsConfig{Enabled: false}},
|
||||
{name: "backend none", cfg: observability.MetricsConfig{Enabled: true, Backend: "none"}},
|
||||
{name: "backend empty", cfg: observability.MetricsConfig{Enabled: true, Backend: ""}},
|
||||
{name: "prometheus", cfg: observability.MetricsConfig{Enabled: true, Backend: "prometheus"}},
|
||||
{
|
||||
name: "otel grpc",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel http",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.cfg.Validate(); err != nil {
|
||||
t.Errorf("unexpected validation error: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestValidateInvalidConfigs(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
cfg observability.MetricsConfig
|
||||
}{
|
||||
{name: "unknown backend", cfg: observability.MetricsConfig{Enabled: true, Backend: "datadog"}},
|
||||
{
|
||||
name: "otel missing endpoint",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel invalid protocol",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||
},
|
||||
},
|
||||
{
|
||||
name: "otel zero interval",
|
||||
cfg: observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0},
|
||||
},
|
||||
},
|
||||
}
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
if err := tt.cfg.Validate(); err == nil {
|
||||
t.Error("expected validation error but got nil")
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewNoopBackend(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{Enabled: false})
|
||||
if err != nil {
|
||||
t.Fatalf(errUnexpectedFmt, err)
|
||||
}
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("noop backend HTTPHandler should return nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewNoneBackend(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "none"})
|
||||
if err != nil {
|
||||
t.Fatalf(errUnexpectedFmt, err)
|
||||
}
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("none backend HTTPHandler should return nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewPrometheusBackend(t *testing.T) {
|
||||
cfg := observability.MetricsConfig{
|
||||
Enabled: true, Backend: "prometheus",
|
||||
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
|
||||
}
|
||||
b, err := observability.New(cfg)
|
||||
if err != nil {
|
||||
t.Fatalf(errUnexpectedFmt, err)
|
||||
}
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("prometheus backend HTTPHandler should not be nil")
|
||||
}
|
||||
if err := b.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("prometheus shutdown error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestNewInvalidBackend(t *testing.T) {
|
||||
_, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "invalid"})
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid backend")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusAdapterAllInstruments(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{
|
||||
Enabled: true, Backend: "prometheus",
|
||||
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create backend: %v", err)
|
||||
}
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
b.NewCounter("prom_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||
b.NewUpDownCounter("prom_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||
b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||
b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||
b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("prometheus adapter HTTPHandler should not be nil")
|
||||
}
|
||||
if err := b.Shutdown(ctx); err != nil {
|
||||
t.Errorf("Shutdown error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelAdapterAllInstruments(t *testing.T) {
|
||||
b, err := observability.New(observability.MetricsConfig{
|
||||
Enabled: true, Backend: "otel",
|
||||
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond},
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel backend: %v", err)
|
||||
}
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
b.NewCounter("otel_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||
b.NewUpDownCounter("otel_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||
b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||
b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||
b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel adapter HTTPHandler should be nil")
|
||||
}
|
||||
|
||||
shutdownCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||
defer cancel()
|
||||
b.Shutdown(shutdownCtx) //nolint:errcheck
|
||||
}
|
||||
71
internal/observability/noop.go
Normal file
71
internal/observability/noop.go
Normal file
@@ -0,0 +1,71 @@
|
||||
package observability
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
)
|
||||
|
||||
// NoopBackend is a Backend that discards all observations.
|
||||
// It is used when metrics are disabled (Enabled=false or Backend="none").
|
||||
// All methods are safe to call concurrently.
|
||||
type NoopBackend struct{}
|
||||
|
||||
// Compile-time interface check.
|
||||
var _ Backend = (*NoopBackend)(nil)
|
||||
|
||||
func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) Counter {
|
||||
_ = n
|
||||
return noopCounter{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) UpDownCounter {
|
||||
_ = n
|
||||
return noopUpDownCounter{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) Int64Gauge {
|
||||
_ = n
|
||||
return noopInt64Gauge{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) Float64Gauge {
|
||||
_ = n
|
||||
return noopFloat64Gauge{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) Histogram {
|
||||
_ = n
|
||||
return noopHistogram{}
|
||||
}
|
||||
|
||||
func (n *NoopBackend) HTTPHandler() http.Handler {
|
||||
_ = n
|
||||
return nil
|
||||
}
|
||||
|
||||
func (n *NoopBackend) Shutdown(_ context.Context) error {
|
||||
_ = n
|
||||
return nil
|
||||
}
|
||||
|
||||
// --- noop instrument types ---
|
||||
|
||||
type noopCounter struct{}
|
||||
|
||||
func (noopCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopUpDownCounter struct{}
|
||||
|
||||
func (noopUpDownCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopInt64Gauge struct{}
|
||||
|
||||
func (noopInt64Gauge) Record(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopFloat64Gauge struct{}
|
||||
|
||||
func (noopFloat64Gauge) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
|
||||
|
||||
type noopHistogram struct{}
|
||||
|
||||
func (noopHistogram) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
|
||||
67
internal/observability/noop_test.go
Normal file
67
internal/observability/noop_test.go
Normal file
@@ -0,0 +1,67 @@
|
||||
package observability_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/observability"
|
||||
)
|
||||
|
||||
func TestNoopBackendAllInstruments(t *testing.T) {
|
||||
n := &observability.NoopBackend{}
|
||||
|
||||
ctx := context.Background()
|
||||
labels := observability.Labels{"k": "v"}
|
||||
|
||||
t.Run("Counter", func(_ *testing.T) {
|
||||
c := n.NewCounter("test_counter", "desc")
|
||||
c.Add(ctx, 1, labels)
|
||||
c.Add(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("UpDownCounter", func(_ *testing.T) {
|
||||
u := n.NewUpDownCounter("test_updown", "desc")
|
||||
u.Add(ctx, 1, labels)
|
||||
u.Add(ctx, -1, nil)
|
||||
})
|
||||
|
||||
t.Run("Int64Gauge", func(_ *testing.T) {
|
||||
g := n.NewInt64Gauge("test_int64gauge", "desc")
|
||||
g.Record(ctx, 42, labels)
|
||||
g.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("Float64Gauge", func(_ *testing.T) {
|
||||
g := n.NewFloat64Gauge("test_float64gauge", "desc")
|
||||
g.Record(ctx, 3.14, labels)
|
||||
g.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("Histogram", func(_ *testing.T) {
|
||||
h := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10})
|
||||
h.Record(ctx, 2.5, labels)
|
||||
h.Record(ctx, 0, nil)
|
||||
})
|
||||
|
||||
t.Run("HTTPHandler", func(t *testing.T) {
|
||||
if n.HTTPHandler() != nil {
|
||||
t.Error("noop HTTPHandler should be nil")
|
||||
}
|
||||
})
|
||||
|
||||
t.Run("Shutdown", func(t *testing.T) {
|
||||
if err := n.Shutdown(ctx); err != nil {
|
||||
t.Errorf("noop Shutdown should not error: %v", err)
|
||||
}
|
||||
})
|
||||
}
|
||||
|
||||
func TestNoopBackendLabelNames(_ *testing.T) {
|
||||
// Verify that label names passed at creation time are accepted without panic.
|
||||
n := &observability.NoopBackend{}
|
||||
n.NewCounter("c", "d", "label1", "label2")
|
||||
n.NewUpDownCounter("u", "d", "l1")
|
||||
n.NewInt64Gauge("g1", "d", "l1", "l2", "l3")
|
||||
n.NewFloat64Gauge("g2", "d")
|
||||
n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1")
|
||||
}
|
||||
210
internal/observability/otel/backend.go
Normal file
210
internal/observability/otel/backend.go
Normal file
@@ -0,0 +1,210 @@
|
||||
// Package otel implements the OpenTelemetry metrics backend for Gerbil.
|
||||
//
|
||||
// Metrics are exported via OTLP (gRPC or HTTP) to an external collector.
|
||||
// No Prometheus /metrics endpoint is exposed in this mode.
|
||||
// Future OTel tracing and logging can be added alongside this package
|
||||
// without touching the Prometheus-native path.
|
||||
package otel
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
// Config holds OTel backend configuration.
|
||||
type Config struct {
|
||||
// Protocol is "grpc" (default) or "http".
|
||||
Protocol string
|
||||
|
||||
// Endpoint is the OTLP collector address.
|
||||
Endpoint string
|
||||
|
||||
// Insecure disables TLS.
|
||||
Insecure bool
|
||||
|
||||
// ExportInterval is the period between pushes to the collector.
|
||||
ExportInterval time.Duration
|
||||
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
DeploymentEnvironment string
|
||||
}
|
||||
|
||||
// Backend is the OTel metrics backend.
|
||||
type Backend struct {
|
||||
cfg Config
|
||||
provider *sdkmetric.MeterProvider
|
||||
meter metric.Meter
|
||||
}
|
||||
|
||||
// New creates and initialises an OTel backend.
|
||||
//
|
||||
// cfg.Protocol must be "grpc" (default) or "http".
|
||||
// cfg.Endpoint is the OTLP collector address (e.g. "localhost:4317").
|
||||
// cfg.ExportInterval sets the push period (defaults to 60 s if ≤ 0).
|
||||
// cfg.Insecure disables TLS on the OTLP connection.
|
||||
//
|
||||
// Connection to the collector is established lazily; New only validates cfg
|
||||
// and creates the SDK components. It returns an error only if the OTel resource
|
||||
// or exporter cannot be constructed.
|
||||
func New(cfg Config) (*Backend, error) {
|
||||
if cfg.Protocol == "" {
|
||||
cfg.Protocol = "grpc"
|
||||
}
|
||||
if cfg.ExportInterval <= 0 {
|
||||
cfg.ExportInterval = 60 * time.Second
|
||||
}
|
||||
if cfg.ServiceName == "" {
|
||||
cfg.ServiceName = "gerbil"
|
||||
}
|
||||
|
||||
res, err := newResource(cfg.ServiceName, cfg.ServiceVersion, cfg.DeploymentEnvironment)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel backend: build resource: %w", err)
|
||||
}
|
||||
|
||||
exp, err := newExporter(context.Background(), cfg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otel backend: create exporter: %w", err)
|
||||
}
|
||||
|
||||
reader := sdkmetric.NewPeriodicReader(exp,
|
||||
sdkmetric.WithInterval(cfg.ExportInterval),
|
||||
)
|
||||
|
||||
provider := sdkmetric.NewMeterProvider(
|
||||
sdkmetric.WithResource(res),
|
||||
sdkmetric.WithReader(reader),
|
||||
)
|
||||
|
||||
meter := provider.Meter("github.com/fosrl/gerbil")
|
||||
|
||||
return &Backend{cfg: cfg, provider: provider, meter: meter}, nil
|
||||
}
|
||||
|
||||
// HTTPHandler returns nil – the OTel backend does not expose an HTTP endpoint.
|
||||
func (b *Backend) HTTPHandler() http.Handler {
|
||||
_ = b
|
||||
return nil
|
||||
}
|
||||
|
||||
// Shutdown flushes pending metrics and shuts down the MeterProvider.
|
||||
func (b *Backend) Shutdown(ctx context.Context) error {
|
||||
return b.provider.Shutdown(ctx)
|
||||
}
|
||||
|
||||
// NewCounter creates an OTel Int64Counter.
|
||||
func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter {
|
||||
c, err := b.meter.Int64Counter(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create counter %q: %v", name, err))
|
||||
}
|
||||
return &Counter{c: c}
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates an OTel Int64UpDownCounter.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter {
|
||||
c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err))
|
||||
}
|
||||
return &UpDownCounter{c: c}
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates an OTel Int64Gauge.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge {
|
||||
g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err))
|
||||
}
|
||||
return &Int64Gauge{g: g}
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates an OTel Float64Gauge.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge {
|
||||
g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc))
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err))
|
||||
}
|
||||
return &Float64Gauge{g: g}
|
||||
}
|
||||
|
||||
// NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram {
|
||||
h, err := b.meter.Float64Histogram(name,
|
||||
metric.WithDescription(desc),
|
||||
metric.WithExplicitBucketBoundaries(buckets...),
|
||||
)
|
||||
if err != nil {
|
||||
panic(fmt.Sprintf("otel: create histogram %q: %v", name, err))
|
||||
}
|
||||
return &Histogram{h: h}
|
||||
}
|
||||
|
||||
// labelsToAttrs converts a Labels map to OTel attribute key-value pairs.
|
||||
func labelsToAttrs(labels map[string]string) []attribute.KeyValue {
|
||||
if len(labels) == 0 {
|
||||
return nil
|
||||
}
|
||||
attrs := make([]attribute.KeyValue, 0, len(labels))
|
||||
for k, v := range labels {
|
||||
attrs = append(attrs, attribute.String(k, v))
|
||||
}
|
||||
return attrs
|
||||
}
|
||||
|
||||
// Counter wraps an OTel Int64Counter.
|
||||
type Counter struct {
|
||||
c metric.Int64Counter
|
||||
}
|
||||
|
||||
// Add increments the counter by value.
|
||||
func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||
c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// UpDownCounter wraps an OTel Int64UpDownCounter.
|
||||
type UpDownCounter struct {
|
||||
c metric.Int64UpDownCounter
|
||||
}
|
||||
|
||||
// Add adjusts the up-down counter by value.
|
||||
func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||
u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// Int64Gauge wraps an OTel Int64Gauge.
|
||||
type Int64Gauge struct {
|
||||
g metric.Int64Gauge
|
||||
}
|
||||
|
||||
// Record sets the gauge to value.
|
||||
func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) {
|
||||
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// Float64Gauge wraps an OTel Float64Gauge.
|
||||
type Float64Gauge struct {
|
||||
g metric.Float64Gauge
|
||||
}
|
||||
|
||||
// Record sets the gauge to value.
|
||||
func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
|
||||
// Histogram wraps an OTel Float64Histogram.
|
||||
type Histogram struct {
|
||||
h metric.Float64Histogram
|
||||
}
|
||||
|
||||
// Record observes value in the histogram.
|
||||
func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||
h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||
}
|
||||
141
internal/observability/otel/backend_test.go
Normal file
141
internal/observability/otel/backend_test.go
Normal file
@@ -0,0 +1,141 @@
|
||||
package otel_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
|
||||
)
|
||||
|
||||
const (
|
||||
defaultGRPCEndpoint = "localhost:4317"
|
||||
defaultServiceName = "gerbil-test"
|
||||
)
|
||||
|
||||
func newInMemoryBackend(t *testing.T) *obsotel.Backend {
|
||||
t.Helper()
|
||||
// Use a very short export interval; an in-process collector (noop exporter)
|
||||
// is used by pointing to a non-existent endpoint with insecure mode.
|
||||
// The backend itself should initialise without error since connection is lazy.
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "grpc",
|
||||
Endpoint: defaultGRPCEndpoint,
|
||||
Insecure: true,
|
||||
ExportInterval: 100 * time.Millisecond,
|
||||
ServiceName: defaultServiceName,
|
||||
ServiceVersion: "0.0.1",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel backend: %v", err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestOtelBackendHTTPHandlerIsNil(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel backend HTTPHandler should return nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendShutdown(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||
defer cancel()
|
||||
if err := b.Shutdown(ctx); err != nil {
|
||||
// Shutdown with unreachable collector may fail to flush; that's acceptable.
|
||||
// What matters is that Shutdown does not panic.
|
||||
t.Logf("Shutdown returned (expected with no collector): %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendCounter(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
c := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
|
||||
// Should not panic
|
||||
c.Add(context.Background(), 1, map[string]string{"result": "ok"})
|
||||
c.Add(context.Background(), 5, nil)
|
||||
}
|
||||
|
||||
func TestOtelBackendUpDownCounter(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
|
||||
u.Add(context.Background(), 3, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -1, map[string]string{"state": "active"})
|
||||
}
|
||||
|
||||
func TestOtelBackendInt64Gauge(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
|
||||
g.Record(context.Background(), 42, nil)
|
||||
}
|
||||
|
||||
func TestOtelBackendFloat64Gauge(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
|
||||
g.Record(context.Background(), 3.14, nil)
|
||||
}
|
||||
|
||||
func TestOtelBackendHistogram(t *testing.T) {
|
||||
b := newInMemoryBackend(t)
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
|
||||
[]float64{0.1, 0.5, 1.0}, "method")
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
}
|
||||
|
||||
func TestOtelBackendHTTPProtocol(t *testing.T) {
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "http",
|
||||
Endpoint: "localhost:4318",
|
||||
Insecure: true,
|
||||
ExportInterval: 100 * time.Millisecond,
|
||||
ServiceName: defaultServiceName,
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create otel http backend: %v", err)
|
||||
}
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
|
||||
if b.HTTPHandler() != nil {
|
||||
t.Error("OTel HTTP backend should not expose a /metrics endpoint")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendInvalidProtocol(t *testing.T) {
|
||||
_, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "tcp",
|
||||
Endpoint: defaultGRPCEndpoint,
|
||||
ExportInterval: 10 * time.Second,
|
||||
})
|
||||
if err == nil {
|
||||
t.Error("expected error for invalid protocol")
|
||||
}
|
||||
}
|
||||
|
||||
func TestOtelBackendDeploymentEnvironment(t *testing.T) {
|
||||
b, err := obsotel.New(obsotel.Config{
|
||||
Protocol: "grpc",
|
||||
Endpoint: defaultGRPCEndpoint,
|
||||
Insecure: true,
|
||||
ExportInterval: 100 * time.Millisecond,
|
||||
ServiceName: defaultServiceName,
|
||||
ServiceVersion: "1.2.3",
|
||||
DeploymentEnvironment: "staging",
|
||||
})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||
}
|
||||
50
internal/observability/otel/exporter.go
Normal file
50
internal/observability/otel/exporter.go
Normal file
@@ -0,0 +1,50 @@
|
||||
package otel
|
||||
|
||||
import (
|
||||
"context"
|
||||
"fmt"
|
||||
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||
)
|
||||
|
||||
// newExporter creates the appropriate OTLP exporter based on cfg.Protocol.
|
||||
func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
switch cfg.Protocol {
|
||||
case "grpc", "":
|
||||
return newGRPCExporter(ctx, cfg)
|
||||
case "http":
|
||||
return newHTTPExporter(ctx, cfg)
|
||||
default:
|
||||
return nil, fmt.Errorf("otel: unknown protocol %q (must be \"grpc\" or \"http\")", cfg.Protocol)
|
||||
}
|
||||
}
|
||||
|
||||
func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
opts := []otlpmetricgrpc.Option{
|
||||
otlpmetricgrpc.WithEndpoint(cfg.Endpoint),
|
||||
}
|
||||
if cfg.Insecure {
|
||||
opts = append(opts, otlpmetricgrpc.WithInsecure())
|
||||
}
|
||||
exp, err := otlpmetricgrpc.New(ctx, opts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp grpc exporter: %w", err)
|
||||
}
|
||||
return exp, nil
|
||||
}
|
||||
|
||||
func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||
opts := []otlpmetrichttp.Option{
|
||||
otlpmetrichttp.WithEndpoint(cfg.Endpoint),
|
||||
}
|
||||
if cfg.Insecure {
|
||||
opts = append(opts, otlpmetrichttp.WithInsecure())
|
||||
}
|
||||
exp, err := otlpmetrichttp.New(ctx, opts...)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("otlp http exporter: %w", err)
|
||||
}
|
||||
return exp, nil
|
||||
}
|
||||
25
internal/observability/otel/resource.go
Normal file
25
internal/observability/otel/resource.go
Normal file
@@ -0,0 +1,25 @@
|
||||
package otel
|
||||
|
||||
import (
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
||||
)
|
||||
|
||||
// newResource builds an OTel resource for the Gerbil service.
|
||||
func newResource(serviceName, serviceVersion, deploymentEnv string) (*resource.Resource, error) {
|
||||
attrs := []attribute.KeyValue{
|
||||
semconv.ServiceName(serviceName),
|
||||
}
|
||||
if serviceVersion != "" {
|
||||
attrs = append(attrs, semconv.ServiceVersion(serviceVersion))
|
||||
}
|
||||
if deploymentEnv != "" {
|
||||
attrs = append(attrs, semconv.DeploymentEnvironmentName(deploymentEnv))
|
||||
}
|
||||
|
||||
return resource.Merge(
|
||||
resource.Default(),
|
||||
resource.NewWithAttributes(semconv.SchemaURL, attrs...),
|
||||
)
|
||||
}
|
||||
185
internal/observability/prometheus/backend.go
Normal file
185
internal/observability/prometheus/backend.go
Normal file
@@ -0,0 +1,185 @@
|
||||
// Package prometheus implements the native Prometheus metrics backend for Gerbil.
|
||||
//
|
||||
// This backend uses the Prometheus Go client directly; it does NOT depend on the
|
||||
// OpenTelemetry SDK. A dedicated Prometheus registry is used so that default
|
||||
// Go/process metrics are not unintentionally included unless the caller opts in.
|
||||
package prometheus
|
||||
|
||||
import (
|
||||
"context"
|
||||
"net/http"
|
||||
|
||||
"github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
)
|
||||
|
||||
// Config holds Prometheus-backend configuration.
|
||||
type Config struct {
|
||||
// Path is the HTTP endpoint path (e.g. "/metrics").
|
||||
Path string
|
||||
|
||||
// IncludeGoMetrics controls whether the standard Go runtime and process
|
||||
// collectors are registered on the dedicated registry.
|
||||
// Defaults to true if not explicitly set.
|
||||
IncludeGoMetrics *bool
|
||||
}
|
||||
|
||||
// Backend is the native Prometheus metrics backend.
|
||||
// Metric instruments are created via the New* family of methods and stored
|
||||
// in the backend-specific instrument types that implement the observability
|
||||
// instrument interfaces.
|
||||
type Backend struct {
|
||||
cfg Config
|
||||
registry *prometheus.Registry
|
||||
handler http.Handler
|
||||
}
|
||||
|
||||
// New creates and initialises a Prometheus backend.
|
||||
//
|
||||
// cfg.Path sets the HTTP endpoint path (defaults to "/metrics" if empty).
|
||||
// cfg.IncludeGoMetrics controls whether standard Go runtime and process metrics
|
||||
// are included; defaults to true when nil.
|
||||
//
|
||||
// Returns an error if the registry cannot be created.
|
||||
func New(cfg Config) (*Backend, error) {
|
||||
if cfg.Path == "" {
|
||||
cfg.Path = "/metrics"
|
||||
}
|
||||
|
||||
registry := prometheus.NewRegistry()
|
||||
|
||||
// Include Go and process metrics by default.
|
||||
includeGo := cfg.IncludeGoMetrics == nil || *cfg.IncludeGoMetrics
|
||||
if includeGo {
|
||||
registry.MustRegister(
|
||||
collectors.NewGoCollector(),
|
||||
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
|
||||
)
|
||||
}
|
||||
|
||||
handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
||||
EnableOpenMetrics: false,
|
||||
})
|
||||
|
||||
return &Backend{cfg: cfg, registry: registry, handler: handler}, nil
|
||||
}
|
||||
|
||||
// HTTPHandler returns the Prometheus /metrics HTTP handler.
|
||||
func (b *Backend) HTTPHandler() http.Handler {
|
||||
return b.handler
|
||||
}
|
||||
|
||||
// Shutdown is a no-op for the Prometheus backend.
|
||||
// The registry does not maintain background goroutines.
|
||||
func (b *Backend) Shutdown(_ context.Context) error {
|
||||
_ = b
|
||||
return nil
|
||||
}
|
||||
|
||||
// NewCounter creates a Prometheus CounterVec registered on the backend's registry.
|
||||
func (b *Backend) NewCounter(name, desc string, labelNames ...string) *Counter {
|
||||
vec := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Counter{vec: vec}
|
||||
}
|
||||
|
||||
// NewUpDownCounter creates a Prometheus GaugeVec (Prometheus gauges are
|
||||
// bidirectional) registered on the backend's registry.
|
||||
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) *UpDownCounter {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &UpDownCounter{vec: vec}
|
||||
}
|
||||
|
||||
// NewInt64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) *Int64Gauge {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Int64Gauge{vec: vec}
|
||||
}
|
||||
|
||||
// NewFloat64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) *Float64Gauge {
|
||||
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Float64Gauge{vec: vec}
|
||||
}
|
||||
|
||||
// NewHistogram creates a Prometheus HistogramVec registered on the backend's registry.
|
||||
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) *Histogram {
|
||||
vec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||
Name: name,
|
||||
Help: desc,
|
||||
Buckets: buckets,
|
||||
}, labelNames)
|
||||
b.registry.MustRegister(vec)
|
||||
return &Histogram{vec: vec}
|
||||
}
|
||||
|
||||
// Counter is a native Prometheus counter instrument.
|
||||
type Counter struct {
|
||||
vec *prometheus.CounterVec
|
||||
}
|
||||
|
||||
// Add increments the counter by value for the given labels.
|
||||
//
|
||||
// value must be non-negative. Negative values are ignored.
|
||||
func (c *Counter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
if value < 0 {
|
||||
return
|
||||
}
|
||||
c.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
}
|
||||
|
||||
// UpDownCounter is a native Prometheus gauge used as a bidirectional counter.
|
||||
type UpDownCounter struct {
|
||||
vec *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Add adjusts the gauge by value for the given labels.
|
||||
func (u *UpDownCounter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||
u.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||
}
|
||||
|
||||
// Int64Gauge is a native Prometheus gauge recording integer snapshot values.
|
||||
type Int64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Int64Gauge) Record(_ context.Context, value int64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(float64(value))
|
||||
}
|
||||
|
||||
// Float64Gauge is a native Prometheus gauge recording float snapshot values.
|
||||
type Float64Gauge struct {
|
||||
vec *prometheus.GaugeVec
|
||||
}
|
||||
|
||||
// Record sets the gauge to value for the given labels.
|
||||
func (g *Float64Gauge) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
g.vec.With(prometheus.Labels(labels)).Set(value)
|
||||
}
|
||||
|
||||
// Histogram is a native Prometheus histogram instrument.
|
||||
type Histogram struct {
|
||||
vec *prometheus.HistogramVec
|
||||
}
|
||||
|
||||
// Record observes value for the given labels.
|
||||
func (h *Histogram) Record(_ context.Context, value float64, labels map[string]string) {
|
||||
h.vec.With(prometheus.Labels(labels)).Observe(value)
|
||||
}
|
||||
173
internal/observability/prometheus/backend_test.go
Normal file
173
internal/observability/prometheus/backend_test.go
Normal file
@@ -0,0 +1,173 @@
|
||||
package prometheus_test
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
|
||||
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
|
||||
)
|
||||
|
||||
func newTestBackend(t *testing.T) *obsprom.Backend {
|
||||
t.Helper()
|
||||
b, err := obsprom.New(obsprom.Config{Path: "/metrics"})
|
||||
if err != nil {
|
||||
t.Fatalf("failed to create prometheus backend: %v", err)
|
||||
}
|
||||
return b
|
||||
}
|
||||
|
||||
func TestPrometheusBackendHTTPHandler(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
if b.HTTPHandler() == nil {
|
||||
t.Error("HTTPHandler should not be nil")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendShutdown(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
if err := b.Shutdown(context.Background()); err != nil {
|
||||
t.Errorf("Shutdown returned error: %v", err)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||
c.Add(context.Background(), 3, map[string]string{"result": "ok"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_counter_total{result="ok"} 3`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
u := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||
u.Add(context.Background(), 5, map[string]string{"state": "active"})
|
||||
u.Add(context.Background(), -2, map[string]string{"state": "active"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_gauge_total{state="active"} 3`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||
g.Record(context.Background(), 42, map[string]string{"ifname": "wg0"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_int_gauge{ifname="wg0"} 42`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
g := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||
g.Record(context.Background(), 7.5, map[string]string{"cert": "example.com"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `test_float_gauge{cert="example.com"} 7.5`)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendHistogram(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
buckets := []float64{0.1, 0.5, 1.0, 5.0}
|
||||
h := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
if !strings.Contains(body, "test_duration_seconds") {
|
||||
t.Errorf("expected histogram metric in output, body:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendMultipleLabels(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||
c.Add(context.Background(), 1, map[string]string{
|
||||
"method": "POST",
|
||||
"route": "/api/peers",
|
||||
"status_code": "200",
|
||||
})
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
if !strings.Contains(body, "multi_label_total") {
|
||||
t.Errorf("expected multi_label_total in output, body:\n%s", body)
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendGoMetrics(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
body := scrapeMetrics(t, b)
|
||||
// Default backend includes Go runtime metrics.
|
||||
if !strings.Contains(body, "go_goroutines") {
|
||||
t.Error("expected go_goroutines in default backend output")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendNoGoMetrics(t *testing.T) {
|
||||
f := false
|
||||
b, err := obsprom.New(obsprom.Config{IncludeGoMetrics: &f})
|
||||
if err != nil {
|
||||
t.Fatalf("unexpected error: %v", err)
|
||||
}
|
||||
body := scrapeMetrics(t, b)
|
||||
if strings.Contains(body, "go_goroutines") {
|
||||
t.Error("expected no go_goroutines when IncludeGoMetrics=false")
|
||||
}
|
||||
}
|
||||
|
||||
func TestPrometheusBackendNilLabels(t *testing.T) {
|
||||
// Adding with nil labels should not panic (treated as empty map).
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||
// nil labels with no label names declared should be safe
|
||||
c.Add(context.Background(), 1, nil)
|
||||
}
|
||||
|
||||
func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||
b := newTestBackend(t)
|
||||
c := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||
|
||||
done := make(chan struct{})
|
||||
for i := 0; i < 10; i++ {
|
||||
go func(_ int) {
|
||||
for j := 0; j < 100; j++ {
|
||||
c.Add(context.Background(), 1, map[string]string{"worker": "w"})
|
||||
}
|
||||
done <- struct{}{}
|
||||
}(i)
|
||||
}
|
||||
for i := 0; i < 10; i++ {
|
||||
<-done
|
||||
}
|
||||
|
||||
body := scrapeMetrics(t, b)
|
||||
assertMetricPresent(t, body, `concurrent_total{worker="w"} 1000`)
|
||||
}
|
||||
|
||||
// --- helpers ---
|
||||
|
||||
func scrapeMetrics(t *testing.T, b *obsprom.Backend) string {
|
||||
t.Helper()
|
||||
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
|
||||
rr := httptest.NewRecorder()
|
||||
b.HTTPHandler().ServeHTTP(rr, req)
|
||||
if rr.Code != http.StatusOK {
|
||||
t.Fatalf("metrics handler returned %d", rr.Code)
|
||||
}
|
||||
body, err := io.ReadAll(rr.Body)
|
||||
if err != nil {
|
||||
t.Fatalf("failed to read response body: %v", err)
|
||||
}
|
||||
return string(body)
|
||||
}
|
||||
|
||||
func assertMetricPresent(t *testing.T, body, expected string) {
|
||||
t.Helper()
|
||||
if !strings.Contains(body, expected) {
|
||||
t.Errorf("expected %q in metrics output\nbody:\n%s", expected, body)
|
||||
}
|
||||
}
|
||||
901
proxy/proxy.go
Normal file
901
proxy/proxy.go
Normal file
@@ -0,0 +1,901 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"crypto/tls"
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"io"
|
||||
"log"
|
||||
"net"
|
||||
"net/http"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/fosrl/gerbil/internal/metrics"
|
||||
"github.com/fosrl/gerbil/logger"
|
||||
"github.com/patrickmn/go-cache"
|
||||
)
|
||||
|
||||
// RouteRecord represents a routing configuration
|
||||
type RouteRecord struct {
|
||||
Hostname string
|
||||
TargetHost string
|
||||
TargetPort int
|
||||
}
|
||||
|
||||
// RouteAPIResponse represents the response from the route API
|
||||
type RouteAPIResponse struct {
|
||||
Endpoints []string `json:"endpoints"`
|
||||
}
|
||||
|
||||
// ProxyProtocolInfo holds information parsed from incoming PROXY protocol header
|
||||
type ProxyProtocolInfo struct {
|
||||
Protocol string // TCP4 or TCP6
|
||||
SrcIP string
|
||||
DestIP string
|
||||
SrcPort int
|
||||
DestPort int
|
||||
OriginalConn net.Conn // The original connection after PROXY protocol parsing
|
||||
}
|
||||
|
||||
// SNIProxy represents the main proxy server
|
||||
type SNIProxy struct {
|
||||
port int
|
||||
cache *cache.Cache
|
||||
listener net.Listener
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
wg sync.WaitGroup
|
||||
localProxyAddr string
|
||||
localProxyPort int
|
||||
remoteConfigURL string
|
||||
publicKey string
|
||||
proxyProtocol bool // Enable PROXY protocol v1
|
||||
|
||||
// New fields for fast local SNI lookup
|
||||
localSNIs map[string]struct{}
|
||||
localSNIsLock sync.RWMutex
|
||||
|
||||
// Local overrides for domains that should always use local proxy
|
||||
localOverrides map[string]struct{}
|
||||
|
||||
// Track active tunnels by SNI
|
||||
activeTunnels map[string]*activeTunnel
|
||||
activeTunnelsLock sync.Mutex
|
||||
|
||||
// Trusted upstream proxies that can send PROXY protocol
|
||||
trustedUpstreams map[string]struct{}
|
||||
|
||||
// Reusable HTTP client for API requests
|
||||
httpClient *http.Client
|
||||
|
||||
// Buffer pool for connection piping
|
||||
bufferPool *sync.Pool
|
||||
}
|
||||
|
||||
type activeTunnel struct {
|
||||
conns []net.Conn
|
||||
}
|
||||
|
||||
// readOnlyConn is a wrapper for io.Reader that implements net.Conn
|
||||
type readOnlyConn struct {
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
func (conn readOnlyConn) Read(p []byte) (int, error) { return conn.reader.Read(p) }
|
||||
func (conn readOnlyConn) Write(p []byte) (int, error) { return 0, io.ErrClosedPipe }
|
||||
func (conn readOnlyConn) Close() error { return nil }
|
||||
func (conn readOnlyConn) LocalAddr() net.Addr { return nil }
|
||||
func (conn readOnlyConn) RemoteAddr() net.Addr { return nil }
|
||||
func (conn readOnlyConn) SetDeadline(t time.Time) error { return nil }
|
||||
func (conn readOnlyConn) SetReadDeadline(t time.Time) error { return nil }
|
||||
func (conn readOnlyConn) SetWriteDeadline(t time.Time) error { return nil }
|
||||
|
||||
// parseProxyProtocolHeader parses a PROXY protocol v1 header from the connection
|
||||
func (p *SNIProxy) parseProxyProtocolHeader(conn net.Conn) (*ProxyProtocolInfo, net.Conn, error) {
|
||||
// Check if the connection comes from a trusted upstream
|
||||
remoteHost, _, err := net.SplitHostPort(conn.RemoteAddr().String())
|
||||
if err != nil {
|
||||
return nil, conn, fmt.Errorf("failed to parse remote address: %w", err)
|
||||
}
|
||||
|
||||
// Resolve the remote IP to hostname to check if it's trusted
|
||||
// For simplicity, we'll check the IP directly in trusted upstreams
|
||||
// In production, you might want to do reverse DNS lookup
|
||||
if _, isTrusted := p.trustedUpstreams[remoteHost]; !isTrusted {
|
||||
// Not from trusted upstream, return original connection
|
||||
return nil, conn, nil
|
||||
}
|
||||
|
||||
// Set read timeout for PROXY protocol parsing
|
||||
if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||
return nil, conn, fmt.Errorf("failed to set read deadline: %w", err)
|
||||
}
|
||||
|
||||
// Read the first line (PROXY protocol header)
|
||||
buffer := make([]byte, 512) // PROXY protocol header should be much smaller
|
||||
n, err := conn.Read(buffer)
|
||||
if err != nil {
|
||||
// If we can't read from trusted upstream, treat as regular connection
|
||||
logger.Debug("Could not read from trusted upstream %s, treating as regular connection: %v", remoteHost, err)
|
||||
// Clear read timeout before returning
|
||||
if clearErr := conn.SetReadDeadline(time.Time{}); clearErr != nil {
|
||||
logger.Debug("Failed to clear read deadline: %v", clearErr)
|
||||
}
|
||||
return nil, conn, nil
|
||||
}
|
||||
|
||||
// Find the end of the first line (CRLF)
|
||||
headerEnd := bytes.Index(buffer[:n], []byte("\r\n"))
|
||||
if headerEnd == -1 {
|
||||
// No PROXY protocol header found, treat as regular TLS connection
|
||||
// Return the connection with the buffered data prepended
|
||||
logger.Debug("No PROXY protocol header from trusted upstream %s, treating as regular TLS connection", remoteHost)
|
||||
|
||||
// Clear read timeout
|
||||
if err := conn.SetReadDeadline(time.Time{}); err != nil {
|
||||
logger.Debug("Failed to clear read deadline: %v", err)
|
||||
}
|
||||
|
||||
// Create a reader that includes the buffered data + original connection
|
||||
newReader := io.MultiReader(bytes.NewReader(buffer[:n]), conn)
|
||||
wrappedConn := &proxyProtocolConn{
|
||||
Conn: conn,
|
||||
reader: newReader,
|
||||
}
|
||||
return nil, wrappedConn, nil
|
||||
}
|
||||
|
||||
headerLine := string(buffer[:headerEnd])
|
||||
remainingData := buffer[headerEnd+2 : n]
|
||||
|
||||
// Parse PROXY protocol line: "PROXY TCP4/TCP6 srcIP destIP srcPort destPort"
|
||||
parts := strings.Fields(headerLine)
|
||||
if len(parts) != 6 || parts[0] != "PROXY" {
|
||||
// Check for PROXY UNKNOWN
|
||||
if len(parts) == 2 && parts[0] == "PROXY" && parts[1] == "UNKNOWN" {
|
||||
// PROXY UNKNOWN - use original connection info
|
||||
return nil, conn, nil
|
||||
}
|
||||
// Invalid PROXY protocol, but might be regular TLS - treat as such
|
||||
logger.Debug("Invalid PROXY protocol from trusted upstream %s, treating as regular TLS connection: %s", remoteHost, headerLine)
|
||||
|
||||
// Clear read timeout
|
||||
if err := conn.SetReadDeadline(time.Time{}); err != nil {
|
||||
logger.Debug("Failed to clear read deadline: %v", err)
|
||||
}
|
||||
|
||||
// Return the connection with all buffered data prepended
|
||||
newReader := io.MultiReader(bytes.NewReader(buffer[:n]), conn)
|
||||
wrappedConn := &proxyProtocolConn{
|
||||
Conn: conn,
|
||||
reader: newReader,
|
||||
}
|
||||
return nil, wrappedConn, nil
|
||||
}
|
||||
|
||||
protocol := parts[1]
|
||||
srcIP := parts[2]
|
||||
destIP := parts[3]
|
||||
srcPort, err := strconv.Atoi(parts[4])
|
||||
if err != nil {
|
||||
return nil, conn, fmt.Errorf("invalid source port in PROXY header: %s", parts[4])
|
||||
}
|
||||
destPort, err := strconv.Atoi(parts[5])
|
||||
if err != nil {
|
||||
return nil, conn, fmt.Errorf("invalid destination port in PROXY header: %s", parts[5])
|
||||
}
|
||||
|
||||
// Create a new reader that includes remaining data + original connection
|
||||
var newReader io.Reader
|
||||
if len(remainingData) > 0 {
|
||||
newReader = io.MultiReader(bytes.NewReader(remainingData), conn)
|
||||
} else {
|
||||
newReader = conn
|
||||
}
|
||||
|
||||
// Create a wrapper connection that reads from the combined reader
|
||||
wrappedConn := &proxyProtocolConn{
|
||||
Conn: conn,
|
||||
reader: newReader,
|
||||
}
|
||||
|
||||
proxyInfo := &ProxyProtocolInfo{
|
||||
Protocol: protocol,
|
||||
SrcIP: srcIP,
|
||||
DestIP: destIP,
|
||||
SrcPort: srcPort,
|
||||
DestPort: destPort,
|
||||
OriginalConn: wrappedConn,
|
||||
}
|
||||
|
||||
// Clear read timeout
|
||||
if err := conn.SetReadDeadline(time.Time{}); err != nil {
|
||||
return nil, conn, fmt.Errorf("failed to clear read deadline: %w", err)
|
||||
}
|
||||
|
||||
return proxyInfo, wrappedConn, nil
|
||||
}
|
||||
|
||||
// proxyProtocolConn wraps a connection to read from a custom reader
|
||||
type proxyProtocolConn struct {
|
||||
net.Conn
|
||||
reader io.Reader
|
||||
}
|
||||
|
||||
func (c *proxyProtocolConn) Read(b []byte) (int, error) {
|
||||
return c.reader.Read(b)
|
||||
}
|
||||
|
||||
// buildProxyProtocolHeaderFromInfo creates a PROXY protocol v1 header using ProxyProtocolInfo
|
||||
func (p *SNIProxy) buildProxyProtocolHeaderFromInfo(proxyInfo *ProxyProtocolInfo, targetAddr net.Addr) string {
|
||||
targetTCP, ok := targetAddr.(*net.TCPAddr)
|
||||
if !ok {
|
||||
// Fallback for unknown address types
|
||||
return "PROXY UNKNOWN\r\n"
|
||||
}
|
||||
|
||||
// Use the original client information from the PROXY protocol
|
||||
var targetIP string
|
||||
var protocol string
|
||||
|
||||
// Parse source IP to determine protocol family
|
||||
srcIP := net.ParseIP(proxyInfo.SrcIP)
|
||||
if srcIP == nil {
|
||||
return "PROXY UNKNOWN\r\n"
|
||||
}
|
||||
|
||||
if srcIP.To4() != nil {
|
||||
// Source is IPv4, use TCP4 protocol
|
||||
protocol = "TCP4"
|
||||
if targetTCP.IP.To4() != nil {
|
||||
// Target is also IPv4, use as-is
|
||||
targetIP = targetTCP.IP.String()
|
||||
} else {
|
||||
// Target is IPv6, but we need IPv4 for consistent protocol family
|
||||
if targetTCP.IP.IsLoopback() {
|
||||
targetIP = "127.0.0.1"
|
||||
} else {
|
||||
targetIP = "127.0.0.1" // Safe fallback
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Source is IPv6, use TCP6 protocol
|
||||
protocol = "TCP6"
|
||||
if targetTCP.IP.To4() != nil {
|
||||
// Target is IPv4, convert to IPv6 representation
|
||||
targetIP = "::ffff:" + targetTCP.IP.String()
|
||||
} else {
|
||||
// Target is also IPv6, use as-is
|
||||
targetIP = targetTCP.IP.String()
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Sprintf("PROXY %s %s %s %d %d\r\n",
|
||||
protocol,
|
||||
proxyInfo.SrcIP,
|
||||
targetIP,
|
||||
proxyInfo.SrcPort,
|
||||
targetTCP.Port)
|
||||
}
|
||||
|
||||
// buildProxyProtocolHeader creates a PROXY protocol v1 header
|
||||
func buildProxyProtocolHeader(clientAddr, targetAddr net.Addr) string {
|
||||
clientTCP, ok := clientAddr.(*net.TCPAddr)
|
||||
if !ok {
|
||||
// Fallback for unknown address types
|
||||
return "PROXY UNKNOWN\r\n"
|
||||
}
|
||||
|
||||
targetTCP, ok := targetAddr.(*net.TCPAddr)
|
||||
if !ok {
|
||||
// Fallback for unknown address types
|
||||
return "PROXY UNKNOWN\r\n"
|
||||
}
|
||||
|
||||
// Determine protocol family based on client IP and normalize target IP accordingly
|
||||
var protocol string
|
||||
var targetIP string
|
||||
|
||||
if clientTCP.IP.To4() != nil {
|
||||
// Client is IPv4, use TCP4 protocol
|
||||
protocol = "TCP4"
|
||||
if targetTCP.IP.To4() != nil {
|
||||
// Target is also IPv4, use as-is
|
||||
targetIP = targetTCP.IP.String()
|
||||
} else {
|
||||
// Target is IPv6, but we need IPv4 for consistent protocol family
|
||||
// Use the IPv4 loopback if target is IPv6 loopback, otherwise use 127.0.0.1
|
||||
if targetTCP.IP.IsLoopback() {
|
||||
targetIP = "127.0.0.1"
|
||||
} else {
|
||||
// For non-loopback IPv6 targets, we could try to extract embedded IPv4
|
||||
// or fall back to a sensible IPv4 address based on the target
|
||||
targetIP = "127.0.0.1" // Safe fallback
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// Client is IPv6, use TCP6 protocol
|
||||
protocol = "TCP6"
|
||||
if targetTCP.IP.To4() != nil {
|
||||
// Target is IPv4, convert to IPv6 representation
|
||||
targetIP = "::ffff:" + targetTCP.IP.String()
|
||||
} else {
|
||||
// Target is also IPv6, use as-is
|
||||
targetIP = targetTCP.IP.String()
|
||||
}
|
||||
}
|
||||
|
||||
return fmt.Sprintf("PROXY %s %s %s %d %d\r\n",
|
||||
protocol,
|
||||
clientTCP.IP.String(),
|
||||
targetIP,
|
||||
clientTCP.Port,
|
||||
targetTCP.Port)
|
||||
}
|
||||
|
||||
// NewSNIProxy creates a new SNI proxy instance
|
||||
func NewSNIProxy(port int, remoteConfigURL, publicKey, localProxyAddr string, localProxyPort int, localOverrides []string, proxyProtocol bool, trustedUpstreams []string) (*SNIProxy, error) {
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
// Create local overrides map
|
||||
overridesMap := make(map[string]struct{})
|
||||
for _, domain := range localOverrides {
|
||||
if domain != "" {
|
||||
overridesMap[domain] = struct{}{}
|
||||
}
|
||||
}
|
||||
|
||||
// Create trusted upstreams map
|
||||
trustedMap := make(map[string]struct{})
|
||||
for _, upstream := range trustedUpstreams {
|
||||
if upstream != "" {
|
||||
// Add both the domain and potentially resolved IPs
|
||||
trustedMap[upstream] = struct{}{}
|
||||
|
||||
// Try to resolve the domain to IPs and add them too
|
||||
if ips, err := net.LookupIP(upstream); err == nil {
|
||||
for _, ip := range ips {
|
||||
trustedMap[ip.String()] = struct{}{}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
proxy := &SNIProxy{
|
||||
port: port,
|
||||
cache: cache.New(3*time.Second, 10*time.Minute),
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
localProxyAddr: localProxyAddr,
|
||||
localProxyPort: localProxyPort,
|
||||
remoteConfigURL: remoteConfigURL,
|
||||
publicKey: publicKey,
|
||||
proxyProtocol: proxyProtocol,
|
||||
localSNIs: make(map[string]struct{}),
|
||||
localOverrides: overridesMap,
|
||||
activeTunnels: make(map[string]*activeTunnel),
|
||||
trustedUpstreams: trustedMap,
|
||||
httpClient: &http.Client{
|
||||
Timeout: 5 * time.Second,
|
||||
Transport: &http.Transport{
|
||||
MaxIdleConns: 100,
|
||||
MaxIdleConnsPerHost: 10,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
},
|
||||
},
|
||||
bufferPool: &sync.Pool{
|
||||
New: func() interface{} {
|
||||
buf := make([]byte, 32*1024)
|
||||
return &buf
|
||||
},
|
||||
},
|
||||
}
|
||||
|
||||
return proxy, nil
|
||||
}
|
||||
|
||||
// Start begins listening for connections
|
||||
func (p *SNIProxy) Start() error {
|
||||
listener, err := net.Listen("tcp", fmt.Sprintf(":%d", p.port))
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to listen on port %d: %w", p.port, err)
|
||||
}
|
||||
|
||||
p.listener = listener
|
||||
logger.Debug("SNI Proxy listening on port %d", p.port)
|
||||
|
||||
// Accept connections in a goroutine
|
||||
go p.acceptConnections()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// Stop gracefully shuts down the proxy
|
||||
func (p *SNIProxy) Stop() error {
|
||||
log.Println("Stopping SNI Proxy...")
|
||||
|
||||
p.cancel()
|
||||
|
||||
if p.listener != nil {
|
||||
p.listener.Close()
|
||||
}
|
||||
|
||||
// Wait for all goroutines to finish with timeout
|
||||
done := make(chan struct{})
|
||||
go func() {
|
||||
p.wg.Wait()
|
||||
close(done)
|
||||
}()
|
||||
|
||||
select {
|
||||
case <-done:
|
||||
log.Println("All connections closed gracefully")
|
||||
case <-time.After(30 * time.Second):
|
||||
log.Println("Timeout waiting for connections to close")
|
||||
}
|
||||
|
||||
log.Println("SNI Proxy stopped")
|
||||
return nil
|
||||
}
|
||||
|
||||
// acceptConnections handles incoming connections
|
||||
func (p *SNIProxy) acceptConnections() {
|
||||
for {
|
||||
conn, err := p.listener.Accept()
|
||||
if err != nil {
|
||||
select {
|
||||
case <-p.ctx.Done():
|
||||
return
|
||||
default:
|
||||
logger.Debug("Accept error: %v", err)
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
p.wg.Add(1)
|
||||
go p.handleConnection(conn)
|
||||
}
|
||||
}
|
||||
|
||||
// readClientHello reads and parses the TLS ClientHello message
|
||||
func (p *SNIProxy) readClientHello(reader io.Reader) (*tls.ClientHelloInfo, error) {
|
||||
var hello *tls.ClientHelloInfo
|
||||
err := tls.Server(readOnlyConn{reader: reader}, &tls.Config{
|
||||
GetConfigForClient: func(argHello *tls.ClientHelloInfo) (*tls.Config, error) {
|
||||
hello = new(tls.ClientHelloInfo)
|
||||
*hello = *argHello
|
||||
return nil, nil
|
||||
},
|
||||
}).Handshake()
|
||||
if hello == nil {
|
||||
return nil, err
|
||||
}
|
||||
return hello, nil
|
||||
}
|
||||
|
||||
// peekClientHello reads the ClientHello while preserving the data for forwarding
|
||||
func (p *SNIProxy) peekClientHello(reader io.Reader) (*tls.ClientHelloInfo, io.Reader, error) {
|
||||
peekedBytes := new(bytes.Buffer)
|
||||
hello, err := p.readClientHello(io.TeeReader(reader, peekedBytes))
|
||||
if err != nil {
|
||||
return nil, nil, err
|
||||
}
|
||||
return hello, io.MultiReader(peekedBytes, reader), nil
|
||||
}
|
||||
|
||||
// extractSNI extracts the SNI hostname from the TLS ClientHello
|
||||
func (p *SNIProxy) extractSNI(conn net.Conn) (string, io.Reader, error) {
|
||||
clientHello, clientReader, err := p.peekClientHello(conn)
|
||||
if err != nil {
|
||||
return "", nil, fmt.Errorf("failed to peek ClientHello: %w", err)
|
||||
}
|
||||
|
||||
if clientHello.ServerName == "" {
|
||||
return "", clientReader, fmt.Errorf("no SNI hostname found in ClientHello")
|
||||
}
|
||||
|
||||
return clientHello.ServerName, clientReader, nil
|
||||
}
|
||||
|
||||
// handleConnection processes a single client connection
|
||||
func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
||||
defer p.wg.Done()
|
||||
defer clientConn.Close()
|
||||
|
||||
metrics.RecordSNIConnection("accepted")
|
||||
|
||||
logger.Debug("Accepted connection from %s", clientConn.RemoteAddr())
|
||||
|
||||
// Check for PROXY protocol from trusted upstream
|
||||
var proxyInfo *ProxyProtocolInfo
|
||||
var actualClientConn net.Conn = clientConn
|
||||
|
||||
if len(p.trustedUpstreams) > 0 {
|
||||
var err error
|
||||
proxyInfo, actualClientConn, err = p.parseProxyProtocolHeader(clientConn)
|
||||
if err != nil {
|
||||
metrics.RecordSNIProxyProtocolParseError()
|
||||
logger.Debug("Failed to parse PROXY protocol: %v", err)
|
||||
return
|
||||
}
|
||||
if proxyInfo != nil {
|
||||
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
|
||||
logger.Debug("Received PROXY protocol from trusted upstream: %s:%d -> %s:%d",
|
||||
proxyInfo.SrcIP, proxyInfo.SrcPort, proxyInfo.DestIP, proxyInfo.DestPort)
|
||||
} else {
|
||||
// No PROXY protocol detected, but connection is from trusted upstream
|
||||
// This is fine - treat as regular connection
|
||||
logger.Debug("No PROXY protocol detected from trusted upstream, treating as regular connection")
|
||||
}
|
||||
}
|
||||
|
||||
// Set read timeout for SNI extraction
|
||||
if err := actualClientConn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||
logger.Debug("Failed to set read deadline: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Extract SNI hostname
|
||||
clientHelloStart := time.Now()
|
||||
hostname, clientReader, err := p.extractSNI(actualClientConn)
|
||||
if err != nil {
|
||||
logger.Debug("SNI extraction failed: %v", err)
|
||||
return
|
||||
}
|
||||
metrics.RecordProxyTLSHandshake(hostname, time.Since(clientHelloStart).Seconds())
|
||||
|
||||
if hostname == "" {
|
||||
log.Println("No SNI hostname found")
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debug("SNI hostname detected: %s", hostname)
|
||||
|
||||
// Remove read timeout for normal operation
|
||||
if err := actualClientConn.SetReadDeadline(time.Time{}); err != nil {
|
||||
logger.Debug("Failed to clear read deadline: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
// Get routing information - use original client address if available from PROXY protocol
|
||||
var clientAddrStr string
|
||||
if proxyInfo != nil {
|
||||
clientAddrStr = fmt.Sprintf("%s:%d", proxyInfo.SrcIP, proxyInfo.SrcPort)
|
||||
} else {
|
||||
clientAddrStr = clientConn.RemoteAddr().String()
|
||||
}
|
||||
|
||||
route, err := p.getRoute(hostname, clientAddrStr)
|
||||
if err != nil {
|
||||
logger.Debug("Failed to get route for %s: %v", hostname, err)
|
||||
return
|
||||
}
|
||||
|
||||
if route == nil {
|
||||
logger.Debug("No route found for hostname: %s", hostname)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debug("Routing %s to %s:%d", hostname, route.TargetHost, route.TargetPort)
|
||||
|
||||
// Connect to target server
|
||||
targetConn, err := net.DialTimeout("tcp",
|
||||
fmt.Sprintf("%s:%d", route.TargetHost, route.TargetPort),
|
||||
10*time.Second)
|
||||
if err != nil {
|
||||
logger.Debug("Failed to connect to target %s:%d: %v",
|
||||
route.TargetHost, route.TargetPort, err)
|
||||
return
|
||||
}
|
||||
defer targetConn.Close()
|
||||
|
||||
logger.Debug("Connected to target: %s:%d", route.TargetHost, route.TargetPort)
|
||||
metrics.RecordActiveProxyConnection(hostname, 1)
|
||||
defer metrics.RecordActiveProxyConnection(hostname, -1)
|
||||
|
||||
// Send PROXY protocol header if enabled
|
||||
if p.proxyProtocol {
|
||||
var proxyHeader string
|
||||
if proxyInfo != nil {
|
||||
// Use original client info from PROXY protocol
|
||||
proxyHeader = p.buildProxyProtocolHeaderFromInfo(proxyInfo, targetConn.LocalAddr())
|
||||
} else {
|
||||
// Use direct client connection info
|
||||
proxyHeader = buildProxyProtocolHeader(clientConn.RemoteAddr(), targetConn.LocalAddr())
|
||||
}
|
||||
logger.Debug("Sending PROXY protocol header: %s", strings.TrimSpace(proxyHeader))
|
||||
|
||||
if _, err := targetConn.Write([]byte(proxyHeader)); err != nil {
|
||||
logger.Debug("Failed to send PROXY protocol header: %v", err)
|
||||
return
|
||||
}
|
||||
}
|
||||
|
||||
// Track this tunnel by SNI
|
||||
p.activeTunnelsLock.Lock()
|
||||
tunnel, ok := p.activeTunnels[hostname]
|
||||
if !ok {
|
||||
tunnel = &activeTunnel{}
|
||||
p.activeTunnels[hostname] = tunnel
|
||||
}
|
||||
tunnel.conns = append(tunnel.conns, actualClientConn)
|
||||
p.activeTunnelsLock.Unlock()
|
||||
|
||||
defer func() {
|
||||
// Remove this conn from active tunnels
|
||||
p.activeTunnelsLock.Lock()
|
||||
if tunnel, ok := p.activeTunnels[hostname]; ok {
|
||||
newConns := make([]net.Conn, 0, len(tunnel.conns))
|
||||
for _, c := range tunnel.conns {
|
||||
if c != actualClientConn {
|
||||
newConns = append(newConns, c)
|
||||
}
|
||||
}
|
||||
if len(newConns) == 0 {
|
||||
delete(p.activeTunnels, hostname)
|
||||
} else {
|
||||
tunnel.conns = newConns
|
||||
}
|
||||
}
|
||||
p.activeTunnelsLock.Unlock()
|
||||
}()
|
||||
|
||||
// Start bidirectional data transfer
|
||||
p.pipe(hostname, actualClientConn, targetConn, clientReader)
|
||||
}
|
||||
|
||||
// getRoute retrieves routing information for a hostname
|
||||
func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
||||
// Check local overrides first
|
||||
if _, isOverride := p.localOverrides[hostname]; isOverride {
|
||||
logger.Debug("Local override matched for hostname: %s", hostname)
|
||||
metrics.RecordProxyRouteLookup("local_override", hostname)
|
||||
return &RouteRecord{
|
||||
Hostname: hostname,
|
||||
TargetHost: p.localProxyAddr,
|
||||
TargetPort: p.localProxyPort,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Fast path: check if hostname is in localSNIs
|
||||
p.localSNIsLock.RLock()
|
||||
_, isLocal := p.localSNIs[hostname]
|
||||
p.localSNIsLock.RUnlock()
|
||||
if isLocal {
|
||||
metrics.RecordProxyRouteLookup("local", hostname)
|
||||
return &RouteRecord{
|
||||
Hostname: hostname,
|
||||
TargetHost: p.localProxyAddr,
|
||||
TargetPort: p.localProxyPort,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Check cache first
|
||||
if cached, found := p.cache.Get(hostname); found {
|
||||
if cached == nil {
|
||||
metrics.RecordProxyRouteLookup("cached_not_found", hostname)
|
||||
return nil, nil // Cached negative result
|
||||
}
|
||||
logger.Debug("Cache hit for hostname: %s", hostname)
|
||||
metrics.RecordProxyRouteLookup("cache_hit", hostname)
|
||||
return cached.(*RouteRecord), nil
|
||||
}
|
||||
|
||||
logger.Debug("Cache miss for hostname: %s, querying API", hostname)
|
||||
metrics.RecordProxyRouteLookup("cache_miss", hostname)
|
||||
|
||||
// Query API with timeout
|
||||
ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Construct API URL (without hostname in path)
|
||||
apiURL := fmt.Sprintf("%s/gerbil/get-resolved-hostname", p.remoteConfigURL)
|
||||
|
||||
// Create request body with hostname and public key
|
||||
requestBody := map[string]string{
|
||||
"hostname": hostname,
|
||||
"publicKey": p.publicKey,
|
||||
}
|
||||
|
||||
jsonBody, err := json.Marshal(requestBody)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to marshal request body: %w", err)
|
||||
}
|
||||
|
||||
// Create HTTP request
|
||||
req, err := http.NewRequestWithContext(ctx, "POST", apiURL, bytes.NewBuffer(jsonBody))
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create request: %w", err)
|
||||
}
|
||||
req.Header.Set("Content-Type", "application/json")
|
||||
|
||||
// Make HTTP request
|
||||
apiStart := time.Now()
|
||||
// Make HTTP request using reusable client
|
||||
resp, err := p.httpClient.Do(req)
|
||||
if err != nil {
|
||||
metrics.RecordSNIRouteAPIRequest("error")
|
||||
return nil, fmt.Errorf("API request failed: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
metrics.RecordSNIRouteAPILatency(time.Since(apiStart).Seconds())
|
||||
|
||||
if resp.StatusCode == http.StatusNotFound {
|
||||
metrics.RecordSNIRouteAPIRequest("not_found")
|
||||
// Cache negative result for shorter time (1 minute)
|
||||
p.cache.Set(hostname, nil, 1*time.Minute)
|
||||
return nil, nil
|
||||
}
|
||||
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
metrics.RecordSNIRouteAPIRequest("error")
|
||||
return nil, fmt.Errorf("API returned status %d", resp.StatusCode)
|
||||
}
|
||||
metrics.RecordSNIRouteAPIRequest("success")
|
||||
|
||||
// Parse response
|
||||
var apiResponse RouteAPIResponse
|
||||
if err := json.NewDecoder(resp.Body).Decode(&apiResponse); err != nil {
|
||||
return nil, fmt.Errorf("failed to decode API response: %w", err)
|
||||
}
|
||||
|
||||
endpoints := apiResponse.Endpoints
|
||||
|
||||
// Default target configuration
|
||||
targetHost := p.localProxyAddr
|
||||
targetPort := p.localProxyPort
|
||||
|
||||
// If no endpoints returned, use local node
|
||||
if len(endpoints) == 0 {
|
||||
logger.Debug("No endpoints returned for hostname: %s, using local node", hostname)
|
||||
} else {
|
||||
// Select endpoint using consistent hashing for stickiness
|
||||
selectedEndpoint := p.selectStickyEndpoint(clientAddr, endpoints)
|
||||
targetHost = selectedEndpoint
|
||||
targetPort = 443 // Default HTTPS port
|
||||
logger.Debug("Selected endpoint %s for hostname %s from client %s", selectedEndpoint, hostname, clientAddr)
|
||||
}
|
||||
|
||||
route := &RouteRecord{
|
||||
Hostname: hostname,
|
||||
TargetHost: targetHost,
|
||||
TargetPort: targetPort,
|
||||
}
|
||||
|
||||
// Cache the result
|
||||
p.cache.Set(hostname, route, cache.DefaultExpiration)
|
||||
logger.Debug("Cached route for hostname: %s", hostname)
|
||||
|
||||
return route, nil
|
||||
}
|
||||
|
||||
// selectStickyEndpoint selects an endpoint using consistent hashing to ensure
|
||||
// the same client always routes to the same endpoint for load balancing
|
||||
func (p *SNIProxy) selectStickyEndpoint(clientAddr string, endpoints []string) string {
|
||||
if len(endpoints) == 0 {
|
||||
return p.localProxyAddr
|
||||
}
|
||||
if len(endpoints) == 1 {
|
||||
return endpoints[0]
|
||||
}
|
||||
|
||||
// Use FNV hash for consistent selection based on client address
|
||||
hash := fnv.New32a()
|
||||
hash.Write([]byte(clientAddr))
|
||||
index := hash.Sum32() % uint32(len(endpoints))
|
||||
|
||||
return endpoints[index]
|
||||
}
|
||||
|
||||
// pipe handles bidirectional data transfer between connections
|
||||
func (p *SNIProxy) pipe(hostname string, clientConn, targetConn net.Conn, clientReader io.Reader) {
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
|
||||
// closeOnce ensures we only close connections once
|
||||
var closeOnce sync.Once
|
||||
closeConns := func() {
|
||||
closeOnce.Do(func() {
|
||||
// Close both connections to unblock any pending reads
|
||||
clientConn.Close()
|
||||
targetConn.Close()
|
||||
})
|
||||
}
|
||||
|
||||
// Copy data from client to target (using the buffered reader)
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer closeConns()
|
||||
|
||||
// Get buffer from pool and return when done
|
||||
bufPtr := p.bufferPool.Get().(*[]byte)
|
||||
defer func() {
|
||||
// Clear buffer before returning to pool to prevent data leakage
|
||||
clear(*bufPtr)
|
||||
p.bufferPool.Put(bufPtr)
|
||||
}()
|
||||
|
||||
bytesCopied, err := io.CopyBuffer(targetConn, clientReader, *bufPtr)
|
||||
metrics.RecordProxyBytesTransmitted(hostname, "client_to_target", bytesCopied)
|
||||
if err != nil && err != io.EOF {
|
||||
logger.Debug("Copy client->target error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
// Copy data from target to client
|
||||
go func() {
|
||||
defer wg.Done()
|
||||
defer closeConns()
|
||||
|
||||
// Get buffer from pool and return when done
|
||||
bufPtr := p.bufferPool.Get().(*[]byte)
|
||||
defer func() {
|
||||
// Clear buffer before returning to pool to prevent data leakage
|
||||
clear(*bufPtr)
|
||||
p.bufferPool.Put(bufPtr)
|
||||
}()
|
||||
|
||||
bytesCopied, err := io.CopyBuffer(clientConn, targetConn, *bufPtr)
|
||||
metrics.RecordProxyBytesTransmitted(hostname, "target_to_client", bytesCopied)
|
||||
if err != nil && err != io.EOF {
|
||||
logger.Debug("Copy target->client error: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
wg.Wait()
|
||||
}
|
||||
|
||||
// GetCacheStats returns cache statistics
|
||||
func (p *SNIProxy) GetCacheStats() (int, int) {
|
||||
return p.cache.ItemCount(), len(p.cache.Items())
|
||||
}
|
||||
|
||||
// ClearCache clears all cached entries
|
||||
func (p *SNIProxy) ClearCache() {
|
||||
p.cache.Flush()
|
||||
log.Println("Cache cleared")
|
||||
}
|
||||
|
||||
// UpdateLocalSNIs updates the local SNIs and invalidates cache for changed domains
|
||||
func (p *SNIProxy) UpdateLocalSNIs(fullDomains []string) {
|
||||
newSNIs := make(map[string]struct{})
|
||||
for _, domain := range fullDomains {
|
||||
newSNIs[domain] = struct{}{}
|
||||
// Invalidate any cached route for this domain
|
||||
p.cache.Delete(domain)
|
||||
}
|
||||
|
||||
// Update localSNIs
|
||||
p.localSNIsLock.Lock()
|
||||
removed := make([]string, 0)
|
||||
for sni := range p.localSNIs {
|
||||
if _, stillLocal := newSNIs[sni]; !stillLocal {
|
||||
removed = append(removed, sni)
|
||||
}
|
||||
}
|
||||
p.localSNIs = newSNIs
|
||||
p.localSNIsLock.Unlock()
|
||||
|
||||
logger.Debug("Updated local SNIs, added %d, removed %d", len(newSNIs), len(removed))
|
||||
|
||||
// Terminate tunnels for removed SNIs
|
||||
if len(removed) > 0 {
|
||||
p.activeTunnelsLock.Lock()
|
||||
for _, sni := range removed {
|
||||
if tunnels, ok := p.activeTunnels[sni]; ok {
|
||||
for _, conn := range tunnels.conns {
|
||||
conn.Close()
|
||||
}
|
||||
delete(p.activeTunnels, sni)
|
||||
logger.Debug("Closed tunnels for SNI target change: %s", sni)
|
||||
}
|
||||
}
|
||||
p.activeTunnelsLock.Unlock()
|
||||
}
|
||||
}
|
||||
119
proxy/proxy_test.go
Normal file
119
proxy/proxy_test.go
Normal file
@@ -0,0 +1,119 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"net"
|
||||
"testing"
|
||||
)
|
||||
|
||||
func TestBuildProxyProtocolHeader(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
clientAddr string
|
||||
targetAddr string
|
||||
expected string
|
||||
}{
|
||||
{
|
||||
name: "IPv4 client and target",
|
||||
clientAddr: "192.168.1.100:12345",
|
||||
targetAddr: "10.0.0.1:443",
|
||||
expected: "PROXY TCP4 192.168.1.100 10.0.0.1 12345 443\r\n",
|
||||
},
|
||||
{
|
||||
name: "IPv6 client and target",
|
||||
clientAddr: "[2001:db8::1]:12345",
|
||||
targetAddr: "[2001:db8::2]:443",
|
||||
expected: "PROXY TCP6 2001:db8::1 2001:db8::2 12345 443\r\n",
|
||||
},
|
||||
{
|
||||
name: "IPv4 client with IPv6 loopback target",
|
||||
clientAddr: "192.168.1.100:12345",
|
||||
targetAddr: "[::1]:443",
|
||||
expected: "PROXY TCP4 192.168.1.100 127.0.0.1 12345 443\r\n",
|
||||
},
|
||||
{
|
||||
name: "IPv4 client with IPv6 target",
|
||||
clientAddr: "192.168.1.100:12345",
|
||||
targetAddr: "[2001:db8::2]:443",
|
||||
expected: "PROXY TCP4 192.168.1.100 127.0.0.1 12345 443\r\n",
|
||||
},
|
||||
{
|
||||
name: "IPv6 client with IPv4 target",
|
||||
clientAddr: "[2001:db8::1]:12345",
|
||||
targetAddr: "10.0.0.1:443",
|
||||
expected: "PROXY TCP6 2001:db8::1 ::ffff:10.0.0.1 12345 443\r\n",
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
clientTCP, err := net.ResolveTCPAddr("tcp", tt.clientAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to resolve client address: %v", err)
|
||||
}
|
||||
|
||||
targetTCP, err := net.ResolveTCPAddr("tcp", tt.targetAddr)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to resolve target address: %v", err)
|
||||
}
|
||||
|
||||
result := buildProxyProtocolHeader(clientTCP, targetTCP)
|
||||
if result != tt.expected {
|
||||
t.Errorf("Expected %q, got %q", tt.expected, result)
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildProxyProtocolHeaderUnknownType(t *testing.T) {
|
||||
// Test with non-TCP address type
|
||||
clientAddr := &net.UDPAddr{IP: net.ParseIP("192.168.1.100"), Port: 12345}
|
||||
targetAddr := &net.UDPAddr{IP: net.ParseIP("10.0.0.1"), Port: 443}
|
||||
|
||||
result := buildProxyProtocolHeader(clientAddr, targetAddr)
|
||||
expected := "PROXY UNKNOWN\r\n"
|
||||
|
||||
if result != expected {
|
||||
t.Errorf("Expected %q, got %q", expected, result)
|
||||
}
|
||||
}
|
||||
|
||||
func TestBuildProxyProtocolHeaderFromInfo(t *testing.T) {
|
||||
proxy, err := NewSNIProxy(8443, "", "", "127.0.0.1", 443, nil, true, nil)
|
||||
if err != nil {
|
||||
t.Fatalf("Failed to create SNI proxy: %v", err)
|
||||
}
|
||||
|
||||
// Test IPv4 case
|
||||
proxyInfo := &ProxyProtocolInfo{
|
||||
Protocol: "TCP4",
|
||||
SrcIP: "10.0.0.1",
|
||||
DestIP: "192.168.1.100",
|
||||
SrcPort: 12345,
|
||||
DestPort: 443,
|
||||
}
|
||||
|
||||
targetAddr, _ := net.ResolveTCPAddr("tcp", "127.0.0.1:8080")
|
||||
header := proxy.buildProxyProtocolHeaderFromInfo(proxyInfo, targetAddr)
|
||||
|
||||
expected := "PROXY TCP4 10.0.0.1 127.0.0.1 12345 8080\r\n"
|
||||
if header != expected {
|
||||
t.Errorf("Expected header '%s', got '%s'", expected, header)
|
||||
}
|
||||
|
||||
// Test IPv6 case
|
||||
proxyInfo = &ProxyProtocolInfo{
|
||||
Protocol: "TCP6",
|
||||
SrcIP: "2001:db8::1",
|
||||
DestIP: "2001:db8::2",
|
||||
SrcPort: 12345,
|
||||
DestPort: 443,
|
||||
}
|
||||
|
||||
targetAddr, _ = net.ResolveTCPAddr("tcp6", "[::1]:8080")
|
||||
header = proxy.buildProxyProtocolHeaderFromInfo(proxyInfo, targetAddr)
|
||||
|
||||
expected = "PROXY TCP6 2001:db8::1 ::1 12345 8080\r\n"
|
||||
if header != expected {
|
||||
t.Errorf("Expected header '%s', got '%s'", expected, header)
|
||||
}
|
||||
}
|
||||
1251
relay/relay.go
Normal file
1251
relay/relay.go
Normal file
File diff suppressed because it is too large
Load Diff
Reference in New Issue
Block a user