mirror of
https://github.com/fosrl/gerbil.git
synced 2026-04-25 10:07:37 -05:00
Compare commits
130 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b642df3e1e | ||
|
|
6c91e99497 | ||
|
|
58415dee7e | ||
|
|
c3ed355127 | ||
|
|
c6e1881e6a | ||
|
|
eedd813e2f | ||
|
|
3cf2ccdc54 | ||
|
|
726b6b171c | ||
|
|
037618acbc | ||
|
|
1a6bc81ddd | ||
|
|
a3dbdef7cc | ||
|
|
f07c83fde4 | ||
|
|
652d9c5c68 | ||
|
|
e47a57cb4f | ||
|
|
4357ddf64b | ||
|
|
f322b4c921 | ||
|
|
56f72d6643 | ||
|
|
367e5bfa08 | ||
|
|
aeb8b7c56f | ||
|
|
f5c77d7df8 | ||
|
|
a37aadddb5 | ||
|
|
80747bf98b | ||
|
|
69418a439c | ||
|
|
d065897c4d | ||
|
|
b57574cc4b | ||
|
|
a3862260c9 | ||
|
|
40da38708c | ||
|
|
3af64d8bd3 | ||
|
|
fcead8cc15 | ||
|
|
20dad7bb8e | ||
|
|
a955aa6169 | ||
|
|
c7d9c72f29 | ||
|
|
abc744c647 | ||
|
|
b118fef265 | ||
|
|
7985f97eb6 | ||
|
|
986a2c6bb6 | ||
|
|
58674ec025 | ||
|
|
5dbe3dbb84 | ||
|
|
32d7af44ca | ||
|
|
b9261b8fea | ||
|
|
fdc398eb9c | ||
|
|
c3e73d0189 | ||
|
|
df2fbdf160 | ||
|
|
cb4ac8199d | ||
|
|
dd4b86b3e5 | ||
|
|
bad290aa4e | ||
|
|
8c27d5e3bf | ||
|
|
7e7a37d49c | ||
|
|
d44aa97f32 | ||
|
|
b57ad74589 | ||
|
|
82256a3f6f | ||
|
|
9e140a94db | ||
|
|
d0c9ea5a57 | ||
|
|
c88810ef24 | ||
|
|
fcd290272f | ||
|
|
463a4eea79 | ||
|
|
4576a2e8a7 | ||
|
|
69c13adcdb | ||
|
|
3886c1a8c1 | ||
|
|
06eb4d4310 | ||
|
|
247c47b27f | ||
|
|
060038c29b | ||
|
|
5414d21dcd | ||
|
|
364fa020aa | ||
|
|
b96ee16fbf | ||
|
|
467d69aa7c | ||
|
|
7c7762ebc5 | ||
|
|
526f9c8b4e | ||
|
|
905983cf61 | ||
|
|
a0879114e2 | ||
|
|
0d54a07973 | ||
|
|
4cb2fde961 | ||
|
|
9602599565 | ||
|
|
11f858b341 | ||
|
|
29b2cb33a2 | ||
|
|
34290ffe09 | ||
|
|
1013d0591e | ||
|
|
2f6d62ab45 | ||
|
|
8d6ba79408 | ||
|
|
208b434cb7 | ||
|
|
39ce0ac407 | ||
|
|
72bee56412 | ||
|
|
b32da3a714 | ||
|
|
971452e5d3 | ||
|
|
bba4345b0f | ||
|
|
b2392fb250 | ||
|
|
697f4131e7 | ||
|
|
e282715251 | ||
|
|
709df6db3e | ||
|
|
cf2b436470 | ||
|
|
2a29021572 | ||
|
|
a3f9a89079 | ||
|
|
ee27bf3153 | ||
|
|
a90f681957 | ||
|
|
3afc82ef9a | ||
|
|
d3a16f4c59 | ||
|
|
2a1911a66f | ||
|
|
08341b2385 | ||
|
|
6cde07d479 | ||
|
|
06b1e84f99 | ||
|
|
2b7e93ec92 | ||
|
|
ca23ae7a30 | ||
|
|
661fd86305 | ||
|
|
594a499b95 | ||
|
|
44aed84827 | ||
|
|
bf038eb4a2 | ||
|
|
6da3129b4e | ||
|
|
ac0f9b6a82 | ||
|
|
16aef10cca | ||
|
|
19031ebdfd | ||
|
|
0eebbc51d5 | ||
|
|
d321a8ba7e | ||
|
|
3ea86222ca | ||
|
|
c3ebe930d9 | ||
|
|
f2b96f2a38 | ||
|
|
9038239bbe | ||
|
|
3e64eb9c4f | ||
|
|
92992b8c14 | ||
|
|
4ee9d77532 | ||
|
|
bd7a5bd4b0 | ||
|
|
1cd49f8ee3 | ||
|
|
7a919d867b | ||
|
|
ce50c627a7 | ||
|
|
691d5f0271 | ||
|
|
56151089e3 | ||
|
|
af7c1caf98 | ||
|
|
dd208ab67c | ||
|
|
8189d41a45 | ||
|
|
ea3477c8ce | ||
|
|
b03f8911a5 |
1
.github/CODEOWNERS
vendored
Normal file
1
.github/CODEOWNERS
vendored
Normal file
@@ -0,0 +1 @@
|
|||||||
|
* @oschwartz10612 @miloschwartz
|
||||||
47
.github/DISCUSSION_TEMPLATE/feature-requests.yml
vendored
Normal file
47
.github/DISCUSSION_TEMPLATE/feature-requests.yml
vendored
Normal file
@@ -0,0 +1,47 @@
|
|||||||
|
body:
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Summary
|
||||||
|
description: A clear and concise summary of the requested feature.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Motivation
|
||||||
|
description: |
|
||||||
|
Why is this feature important?
|
||||||
|
Explain the problem this feature would solve or what use case it would enable.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Proposed Solution
|
||||||
|
description: |
|
||||||
|
How would you like to see this feature implemented?
|
||||||
|
Provide as much detail as possible about the desired behavior, configuration, or changes.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Alternatives Considered
|
||||||
|
description: Describe any alternative solutions or workarounds you've thought about.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Additional Context
|
||||||
|
description: Add any other context, mockups, or screenshots about the feature request here.
|
||||||
|
validations:
|
||||||
|
required: false
|
||||||
|
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Before submitting, please:
|
||||||
|
- Check if there is an existing issue for this feature.
|
||||||
|
- Clearly explain the benefit and use case.
|
||||||
|
- Be as specific as possible to help contributors evaluate and implement.
|
||||||
51
.github/ISSUE_TEMPLATE/1.bug_report.yml
vendored
Normal file
51
.github/ISSUE_TEMPLATE/1.bug_report.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
name: Bug Report
|
||||||
|
description: Create a bug report
|
||||||
|
labels: []
|
||||||
|
body:
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Describe the Bug
|
||||||
|
description: A clear and concise description of what the bug is.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Environment
|
||||||
|
description: Please fill out the relevant details below for your environment.
|
||||||
|
value: |
|
||||||
|
- OS Type & Version: (e.g., Ubuntu 22.04)
|
||||||
|
- Pangolin Version:
|
||||||
|
- Gerbil Version:
|
||||||
|
- Traefik Version:
|
||||||
|
- Newt Version:
|
||||||
|
- Olm Version: (if applicable)
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: To Reproduce
|
||||||
|
description: |
|
||||||
|
Steps to reproduce the behavior, please provide a clear description of how to reproduce the issue, based on the linked minimal reproduction. Screenshots can be provided in the issue body below.
|
||||||
|
|
||||||
|
If using code blocks, make sure syntax highlighting is correct and double-check that the rendered preview is not broken.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: textarea
|
||||||
|
attributes:
|
||||||
|
label: Expected Behavior
|
||||||
|
description: A clear and concise description of what you expected to happen.
|
||||||
|
validations:
|
||||||
|
required: true
|
||||||
|
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Before posting the issue go through the steps you've written down to make sure the steps provided are detailed and clear.
|
||||||
|
|
||||||
|
- type: markdown
|
||||||
|
attributes:
|
||||||
|
value: |
|
||||||
|
Contributors should be able to follow the steps provided in order to reproduce the bug.
|
||||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
blank_issues_enabled: false
|
||||||
|
contact_links:
|
||||||
|
- name: Need help or have questions?
|
||||||
|
url: https://github.com/orgs/fosrl/discussions
|
||||||
|
about: Ask questions, get help, and discuss with other community members
|
||||||
|
- name: Request a Feature
|
||||||
|
url: https://github.com/orgs/fosrl/discussions/new?category=feature-requests
|
||||||
|
about: Feature requests should be opened as discussions so others can upvote and comment
|
||||||
179
.github/workflows/cicd.yml
vendored
179
.github/workflows/cicd.yml
vendored
@@ -1,52 +1,161 @@
|
|||||||
name: CI/CD Pipeline
|
name: CI/CD Pipeline
|
||||||
|
|
||||||
|
# CI/CD workflow for building, publishing, mirroring, signing container images and building release binaries.
|
||||||
|
# Actions are pinned to specific SHAs to reduce supply-chain risk. This workflow triggers on tag push events.
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write # for GHCR push
|
||||||
|
id-token: write # for Cosign Keyless (OIDC) Signing
|
||||||
|
|
||||||
|
# Required secrets:
|
||||||
|
# - DOCKER_HUB_USERNAME / DOCKER_HUB_ACCESS_TOKEN: push to Docker Hub
|
||||||
|
# - GITHUB_TOKEN: used for GHCR login and OIDC keyless signing
|
||||||
|
# - COSIGN_PRIVATE_KEY / COSIGN_PASSWORD / COSIGN_PUBLIC_KEY: for key-based signing
|
||||||
|
|
||||||
on:
|
on:
|
||||||
push:
|
push:
|
||||||
tags:
|
tags:
|
||||||
- "*"
|
- "[0-9]+.[0-9]+.[0-9]+"
|
||||||
|
- "[0-9]+.[0-9]+.[0-9]+.rc.[0-9]+"
|
||||||
|
|
||||||
|
concurrency:
|
||||||
|
group: ${{ github.ref }}
|
||||||
|
cancel-in-progress: true
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
release:
|
release:
|
||||||
name: Build and Release
|
name: Build and Release
|
||||||
runs-on: ubuntu-latest
|
runs-on: amd64-runner
|
||||||
|
# Job-level timeout to avoid runaway or stuck runs
|
||||||
|
timeout-minutes: 120
|
||||||
|
env:
|
||||||
|
# Target images
|
||||||
|
DOCKERHUB_IMAGE: docker.io/fosrl/${{ github.event.repository.name }}
|
||||||
|
GHCR_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout code
|
- name: Checkout code
|
||||||
uses: actions/checkout@v4
|
uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
|
|
||||||
- name: Set up QEMU
|
- name: Set up QEMU
|
||||||
uses: docker/setup-qemu-action@v3
|
uses: docker/setup-qemu-action@c7c53464625b32c7a7e944ae62b3e17d2b600130 # v3.7.0
|
||||||
|
|
||||||
- name: Set up Docker Buildx
|
- name: Set up Docker Buildx
|
||||||
uses: docker/setup-buildx-action@v3
|
uses: docker/setup-buildx-action@8d2750c68a42422c14e847fe6c8ac0403b4cbd6f # v3.12.0
|
||||||
|
|
||||||
- name: Log in to Docker Hub
|
- name: Log in to Docker Hub
|
||||||
uses: docker/login-action@v3
|
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||||
with:
|
with:
|
||||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
registry: docker.io
|
||||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||||
|
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||||
|
|
||||||
- name: Extract tag name
|
- name: Extract tag name
|
||||||
id: get-tag
|
id: get-tag
|
||||||
run: echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
run: echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||||
|
shell: bash
|
||||||
|
|
||||||
- name: Install Go
|
- name: Install Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
|
||||||
with:
|
with:
|
||||||
go-version: 1.25
|
go-version: 1.25
|
||||||
|
|
||||||
- name: Build and push Docker images
|
- name: Update version in main.go
|
||||||
run: |
|
run: |
|
||||||
TAG=${{ env.TAG }}
|
TAG=${{ env.TAG }}
|
||||||
make docker-build-release tag=$TAG
|
if [ -f main.go ]; then
|
||||||
|
sed -i 's/version_replaceme/'"$TAG"'/' main.go
|
||||||
|
echo "Updated main.go with version $TAG"
|
||||||
|
else
|
||||||
|
echo "main.go not found"
|
||||||
|
fi
|
||||||
|
shell: bash
|
||||||
|
|
||||||
- name: Build binaries
|
- name: Build and push Docker images (Docker Hub)
|
||||||
run: |
|
run: |
|
||||||
make go-build-release
|
TAG=${{ env.TAG }}
|
||||||
|
make docker-build-release tag=$TAG
|
||||||
|
echo "Built & pushed to: ${{ env.DOCKERHUB_IMAGE }}:${TAG}"
|
||||||
|
shell: bash
|
||||||
|
|
||||||
- name: Upload artifacts from /bin
|
- name: Login in to GHCR
|
||||||
uses: actions/upload-artifact@v4
|
uses: docker/login-action@b45d80f862d83dbcd57f89517bcf500b2ab88fb2 # v4.0.0
|
||||||
with:
|
with:
|
||||||
name: binaries
|
registry: ghcr.io
|
||||||
path: bin/
|
username: ${{ github.actor }}
|
||||||
|
password: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
|
||||||
|
- name: Install skopeo + jq
|
||||||
|
# skopeo: copy/inspect images between registries
|
||||||
|
# jq: JSON parsing tool used to extract digest values
|
||||||
|
run: |
|
||||||
|
sudo apt-get update -y
|
||||||
|
sudo apt-get install -y skopeo jq
|
||||||
|
skopeo --version
|
||||||
|
shell: bash
|
||||||
|
|
||||||
|
- name: Copy tag from Docker Hub to GHCR
|
||||||
|
# Mirror the already-built image (all architectures) to GHCR so we can sign it
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
TAG=${{ env.TAG }}
|
||||||
|
echo "Copying ${{ env.DOCKERHUB_IMAGE }}:${TAG} -> ${{ env.GHCR_IMAGE }}:${TAG}"
|
||||||
|
skopeo copy --all --retry-times 3 \
|
||||||
|
docker://$DOCKERHUB_IMAGE:$TAG \
|
||||||
|
docker://$GHCR_IMAGE:$TAG
|
||||||
|
shell: bash
|
||||||
|
|
||||||
|
- name: Install cosign
|
||||||
|
# cosign is used to sign and verify container images (key and keyless)
|
||||||
|
uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0
|
||||||
|
|
||||||
|
- name: Dual-sign and verify (GHCR & Docker Hub)
|
||||||
|
# Sign each image by digest using keyless (OIDC) and key-based signing,
|
||||||
|
# then verify both the public key signature and the keyless OIDC signature.
|
||||||
|
env:
|
||||||
|
TAG: ${{ env.TAG }}
|
||||||
|
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||||
|
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||||
|
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
|
||||||
|
COSIGN_YES: "true"
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
|
||||||
|
issuer="https://token.actions.githubusercontent.com"
|
||||||
|
id_regex="^https://github.com/${{ github.repository }}/.+" # accept this repo (all workflows/refs)
|
||||||
|
|
||||||
|
for IMAGE in "${GHCR_IMAGE}" "${DOCKERHUB_IMAGE}"; do
|
||||||
|
echo "Processing ${IMAGE}:${TAG}"
|
||||||
|
|
||||||
|
DIGEST="$(skopeo inspect --retry-times 3 docker://${IMAGE}:${TAG} | jq -r '.Digest')"
|
||||||
|
REF="${IMAGE}@${DIGEST}"
|
||||||
|
echo "Resolved digest: ${REF}"
|
||||||
|
|
||||||
|
echo "==> cosign sign (keyless) --recursive ${REF}"
|
||||||
|
cosign sign --recursive "${REF}"
|
||||||
|
|
||||||
|
echo "==> cosign sign (key) --recursive ${REF}"
|
||||||
|
cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${REF}"
|
||||||
|
|
||||||
|
echo "==> cosign verify (public key) ${REF}"
|
||||||
|
cosign verify --key env://COSIGN_PUBLIC_KEY "${REF}" -o text
|
||||||
|
|
||||||
|
echo "==> cosign verify (keyless policy) ${REF}"
|
||||||
|
cosign verify \
|
||||||
|
--certificate-oidc-issuer "${issuer}" \
|
||||||
|
--certificate-identity-regexp "${id_regex}" \
|
||||||
|
"${REF}" -o text
|
||||||
|
done
|
||||||
|
shell: bash
|
||||||
|
|
||||||
|
- name: Build binaries
|
||||||
|
run: |
|
||||||
|
make go-build-release
|
||||||
|
shell: bash
|
||||||
|
|
||||||
|
- name: Upload artifacts from /bin
|
||||||
|
uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0
|
||||||
|
with:
|
||||||
|
name: binaries
|
||||||
|
path: bin/
|
||||||
|
|||||||
132
.github/workflows/mirror.yaml
vendored
Normal file
132
.github/workflows/mirror.yaml
vendored
Normal file
@@ -0,0 +1,132 @@
|
|||||||
|
name: Mirror & Sign (Docker Hub to GHCR)
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch: {}
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
packages: write
|
||||||
|
id-token: write # for keyless OIDC
|
||||||
|
|
||||||
|
env:
|
||||||
|
SOURCE_IMAGE: docker.io/fosrl/gerbil
|
||||||
|
DEST_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
mirror-and-dual-sign:
|
||||||
|
runs-on: amd64-runner
|
||||||
|
steps:
|
||||||
|
- name: Install skopeo + jq
|
||||||
|
run: |
|
||||||
|
sudo apt-get update -y
|
||||||
|
sudo apt-get install -y skopeo jq
|
||||||
|
skopeo --version
|
||||||
|
|
||||||
|
- name: Install cosign
|
||||||
|
uses: sigstore/cosign-installer@ba7bc0a3fef59531c69a25acd34668d6d3fe6f22 # v4.1.0
|
||||||
|
|
||||||
|
- name: Input check
|
||||||
|
run: |
|
||||||
|
test -n "${SOURCE_IMAGE}" || (echo "SOURCE_IMAGE is empty" && exit 1)
|
||||||
|
echo "Source : ${SOURCE_IMAGE}"
|
||||||
|
echo "Target : ${DEST_IMAGE}"
|
||||||
|
|
||||||
|
# Auth for skopeo (containers-auth)
|
||||||
|
- name: Skopeo login to GHCR
|
||||||
|
run: |
|
||||||
|
skopeo login ghcr.io -u "${{ github.actor }}" -p "${{ secrets.GITHUB_TOKEN }}"
|
||||||
|
|
||||||
|
# Auth for cosign (docker-config)
|
||||||
|
- name: Docker login to GHCR (for cosign)
|
||||||
|
run: |
|
||||||
|
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
|
||||||
|
|
||||||
|
- name: List source tags
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
skopeo list-tags --retry-times 3 docker://"${SOURCE_IMAGE}" \
|
||||||
|
| jq -r '.Tags[]' | sort -u > src-tags.txt
|
||||||
|
echo "Found source tags: $(wc -l < src-tags.txt)"
|
||||||
|
head -n 20 src-tags.txt || true
|
||||||
|
|
||||||
|
- name: List destination tags (skip existing)
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
if skopeo list-tags --retry-times 3 docker://"${DEST_IMAGE}" >/tmp/dst.json 2>/dev/null; then
|
||||||
|
jq -r '.Tags[]' /tmp/dst.json | sort -u > dst-tags.txt
|
||||||
|
else
|
||||||
|
: > dst-tags.txt
|
||||||
|
fi
|
||||||
|
echo "Existing destination tags: $(wc -l < dst-tags.txt)"
|
||||||
|
|
||||||
|
- name: Mirror, dual-sign, and verify
|
||||||
|
env:
|
||||||
|
# keyless
|
||||||
|
COSIGN_YES: "true"
|
||||||
|
# key-based
|
||||||
|
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||||
|
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||||
|
# verify
|
||||||
|
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
|
||||||
|
run: |
|
||||||
|
set -euo pipefail
|
||||||
|
copied=0; skipped=0; v_ok=0; errs=0
|
||||||
|
|
||||||
|
issuer="https://token.actions.githubusercontent.com"
|
||||||
|
id_regex="^https://github.com/${{ github.repository }}/.+"
|
||||||
|
|
||||||
|
while read -r tag; do
|
||||||
|
[ -z "$tag" ] && continue
|
||||||
|
|
||||||
|
if grep -Fxq "$tag" dst-tags.txt; then
|
||||||
|
echo "::notice ::Skip (exists) ${DEST_IMAGE}:${tag}"
|
||||||
|
skipped=$((skipped+1))
|
||||||
|
continue
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> Copy ${SOURCE_IMAGE}:${tag} → ${DEST_IMAGE}:${tag}"
|
||||||
|
if ! skopeo copy --all --retry-times 3 \
|
||||||
|
docker://"${SOURCE_IMAGE}:${tag}" docker://"${DEST_IMAGE}:${tag}"; then
|
||||||
|
echo "::warning title=Copy failed::${SOURCE_IMAGE}:${tag}"
|
||||||
|
errs=$((errs+1)); continue
|
||||||
|
fi
|
||||||
|
copied=$((copied+1))
|
||||||
|
|
||||||
|
digest="$(skopeo inspect --retry-times 3 docker://"${DEST_IMAGE}:${tag}" | jq -r '.Digest')"
|
||||||
|
ref="${DEST_IMAGE}@${digest}"
|
||||||
|
|
||||||
|
echo "==> cosign sign (keyless) --recursive ${ref}"
|
||||||
|
if ! cosign sign --recursive "${ref}"; then
|
||||||
|
echo "::warning title=Keyless sign failed::${ref}"
|
||||||
|
errs=$((errs+1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> cosign sign (key) --recursive ${ref}"
|
||||||
|
if ! cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${ref}"; then
|
||||||
|
echo "::warning title=Key sign failed::${ref}"
|
||||||
|
errs=$((errs+1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> cosign verify (public key) ${ref}"
|
||||||
|
if ! cosign verify --key env://COSIGN_PUBLIC_KEY "${ref}" -o text; then
|
||||||
|
echo "::warning title=Verify(pubkey) failed::${ref}"
|
||||||
|
errs=$((errs+1))
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "==> cosign verify (keyless policy) ${ref}"
|
||||||
|
if ! cosign verify \
|
||||||
|
--certificate-oidc-issuer "${issuer}" \
|
||||||
|
--certificate-identity-regexp "${id_regex}" \
|
||||||
|
"${ref}" -o text; then
|
||||||
|
echo "::warning title=Verify(keyless) failed::${ref}"
|
||||||
|
errs=$((errs+1))
|
||||||
|
else
|
||||||
|
v_ok=$((v_ok+1))
|
||||||
|
fi
|
||||||
|
done < src-tags.txt
|
||||||
|
|
||||||
|
echo "---- Summary ----"
|
||||||
|
echo "Copied : $copied"
|
||||||
|
echo "Skipped : $skipped"
|
||||||
|
echo "Verified OK : $v_ok"
|
||||||
|
echo "Errors : $errs"
|
||||||
11
.github/workflows/test.yml
vendored
11
.github/workflows/test.yml
vendored
@@ -1,5 +1,8 @@
|
|||||||
name: Run Tests
|
name: Run Tests
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: read
|
||||||
|
|
||||||
on:
|
on:
|
||||||
pull_request:
|
pull_request:
|
||||||
branches:
|
branches:
|
||||||
@@ -8,15 +11,15 @@ on:
|
|||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test:
|
test:
|
||||||
runs-on: ubuntu-latest
|
runs-on: amd64-runner
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2
|
||||||
|
|
||||||
- name: Set up Go
|
- name: Set up Go
|
||||||
uses: actions/setup-go@v5
|
uses: actions/setup-go@4b73464bb391d4059bd26b0524d20df3927bd417 # v6.3.0
|
||||||
with:
|
with:
|
||||||
go-version: '1.25'
|
go-version: 1.26
|
||||||
|
|
||||||
- name: Build go
|
- name: Build go
|
||||||
run: go build
|
run: go build
|
||||||
|
|||||||
@@ -4,11 +4,7 @@ Contributions are welcome!
|
|||||||
|
|
||||||
Please see the contribution and local development guide on the docs page before getting started:
|
Please see the contribution and local development guide on the docs page before getting started:
|
||||||
|
|
||||||
https://docs.fossorial.io/development
|
https://docs.pangolin.net/development/contributing
|
||||||
|
|
||||||
For ideas about what features to work on and our future plans, please see the roadmap:
|
|
||||||
|
|
||||||
https://docs.fossorial.io/roadmap
|
|
||||||
|
|
||||||
### Licensing Considerations
|
### Licensing Considerations
|
||||||
|
|
||||||
|
|||||||
11
Dockerfile
11
Dockerfile
@@ -1,4 +1,4 @@
|
|||||||
FROM golang:1.25-alpine AS builder
|
FROM golang:1.26-alpine AS builder
|
||||||
|
|
||||||
# Set the working directory inside the container
|
# Set the working directory inside the container
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
@@ -16,18 +16,13 @@ COPY . .
|
|||||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /gerbil
|
RUN CGO_ENABLED=0 GOOS=linux go build -o /gerbil
|
||||||
|
|
||||||
# Start a new stage from scratch
|
# Start a new stage from scratch
|
||||||
FROM ubuntu:24.04 AS runner
|
FROM alpine:3.23 AS runner
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y iptables iproute2 && rm -rf /var/lib/apt/lists/*
|
RUN apk add --no-cache iptables iproute2
|
||||||
|
|
||||||
# Copy the pre-built binary file from the previous stage and the entrypoint script
|
|
||||||
COPY --from=builder /gerbil /usr/local/bin/
|
COPY --from=builder /gerbil /usr/local/bin/
|
||||||
COPY entrypoint.sh /
|
COPY entrypoint.sh /
|
||||||
|
|
||||||
RUN chmod +x /entrypoint.sh
|
RUN chmod +x /entrypoint.sh
|
||||||
|
|
||||||
# Copy the entrypoint script
|
|
||||||
ENTRYPOINT ["/entrypoint.sh"]
|
ENTRYPOINT ["/entrypoint.sh"]
|
||||||
|
|
||||||
# Command to run the executable
|
|
||||||
CMD ["gerbil"]
|
CMD ["gerbil"]
|
||||||
36
README.md
36
README.md
@@ -6,7 +6,7 @@ Gerbil is a simple [WireGuard](https://www.wireguard.com/) interface management
|
|||||||
|
|
||||||
Gerbil works with Pangolin, Newt, and Olm as part of the larger system. See documentation below:
|
Gerbil works with Pangolin, Newt, and Olm as part of the larger system. See documentation below:
|
||||||
|
|
||||||
- [Full Documentation](https://docs.fossorial.io)
|
- [Full Documentation](https://docs.pangolin.net)
|
||||||
|
|
||||||
## Key Functions
|
## Key Functions
|
||||||
|
|
||||||
@@ -20,7 +20,7 @@ Gerbil will create the peers defined in the config on the WireGuard interface. T
|
|||||||
|
|
||||||
### Report Bandwidth
|
### Report Bandwidth
|
||||||
|
|
||||||
Bytes transmitted in and out of each peer are collected every 10 seconds, and incremental usage is reported via the "reportBandwidthTo" endpoint. This can be used to track data usage of each peer on the remote server.
|
Bytes transmitted in and out of each peer are collected every 10 seconds, and incremental usage is reported via the api endpoint. This can be used to track data usage of each peer on the remote server.
|
||||||
|
|
||||||
### Handle client relaying
|
### Handle client relaying
|
||||||
|
|
||||||
@@ -40,18 +40,35 @@ The PROXY protocol allows downstream proxies to know the real client IP address
|
|||||||
|
|
||||||
In single node (self hosted) Pangolin deployments this can be bypassed by using port 443:443 to route to Traefik instead of the SNI proxy at 8443.
|
In single node (self hosted) Pangolin deployments this can be bypassed by using port 443:443 to route to Traefik instead of the SNI proxy at 8443.
|
||||||
|
|
||||||
|
### Observability with OpenTelemetry
|
||||||
|
|
||||||
|
Gerbil includes comprehensive OpenTelemetry metrics instrumentation for monitoring and observability. Metrics can be exported via:
|
||||||
|
|
||||||
|
- **Prometheus**: Pull-based metrics at the `/metrics` endpoint (enabled by default)
|
||||||
|
- **OTLP**: Push-based metrics to any OpenTelemetry-compatible collector
|
||||||
|
|
||||||
|
Key metrics include:
|
||||||
|
|
||||||
|
- WireGuard interface and peer status
|
||||||
|
- Bandwidth usage per peer
|
||||||
|
- Active relay sessions and proxy connections
|
||||||
|
- Handshake success/failure rates
|
||||||
|
- Route lookup cache hit/miss ratios
|
||||||
|
- Go runtime metrics (GC, goroutines, memory)
|
||||||
|
|
||||||
|
See [docs/observability.md](docs/observability.md) for complete documentation, metrics reference, and examples.
|
||||||
|
|
||||||
## CLI Args
|
## CLI Args
|
||||||
|
|
||||||
|
Important:
|
||||||
- `reachableAt`: How should the remote server reach Gerbil's API?
|
- `reachableAt`: How should the remote server reach Gerbil's API?
|
||||||
- `generateAndSaveKeyTo`: Where to save the generated WireGuard private key to persist across restarts.
|
- `generateAndSaveKeyTo`: Where to save the generated WireGuard private key to persist across restarts.
|
||||||
- `remoteConfig` (optional): Remote config location to HTTP get the JSON based config from. See `example_config.json`
|
- `remoteConfig`: Remote config location to HTTP get the JSON based config from.
|
||||||
- `config` (optional): Local JSON file path to load config. Used if remote config is not supplied. See `example_config.json`
|
|
||||||
|
|
||||||
Note: You must use either `config` or `remoteConfig` to configure WireGuard.
|
|
||||||
|
|
||||||
|
Others:
|
||||||
- `reportBandwidthTo` (optional): **DEPRECATED** - Use `remoteConfig` instead. Remote HTTP endpoint to send peer bandwidth data
|
- `reportBandwidthTo` (optional): **DEPRECATED** - Use `remoteConfig` instead. Remote HTTP endpoint to send peer bandwidth data
|
||||||
- `interface` (optional): Name of the WireGuard interface created by Gerbil. Default: `wg0`
|
- `interface` (optional): Name of the WireGuard interface created by Gerbil. Default: `wg0`
|
||||||
- `listen` (optional): Port to listen on for HTTP server. Default: `:3003`
|
- `listen` (optional): Port to listen on for HTTP server. Default: `:3004`
|
||||||
- `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: `INFO`
|
- `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: `INFO`
|
||||||
- `mtu` (optional): MTU of the WireGuard interface. Default: `1280`
|
- `mtu` (optional): MTU of the WireGuard interface. Default: `1280`
|
||||||
- `notify` (optional): URL to notify on peer changes
|
- `notify` (optional): URL to notify on peer changes
|
||||||
@@ -66,7 +83,6 @@ Note: You must use either `config` or `remoteConfig` to configure WireGuard.
|
|||||||
All CLI arguments can also be provided via environment variables:
|
All CLI arguments can also be provided via environment variables:
|
||||||
|
|
||||||
- `INTERFACE`: Name of the WireGuard interface
|
- `INTERFACE`: Name of the WireGuard interface
|
||||||
- `CONFIG`: Path to local configuration file
|
|
||||||
- `REMOTE_CONFIG`: URL of the remote config server
|
- `REMOTE_CONFIG`: URL of the remote config server
|
||||||
- `LISTEN`: Address to listen on for HTTP server
|
- `LISTEN`: Address to listen on for HTTP server
|
||||||
- `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key
|
- `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key
|
||||||
@@ -84,7 +100,7 @@ Example:
|
|||||||
|
|
||||||
```bash
|
```bash
|
||||||
./gerbil \
|
./gerbil \
|
||||||
--reachableAt=http://gerbil:3003 \
|
--reachableAt=http://gerbil:3004 \
|
||||||
--generateAndSaveKeyTo=/var/config/key \
|
--generateAndSaveKeyTo=/var/config/key \
|
||||||
--remoteConfig=http://pangolin:3001/api/v1/
|
--remoteConfig=http://pangolin:3001/api/v1/
|
||||||
```
|
```
|
||||||
@@ -96,7 +112,7 @@ services:
|
|||||||
container_name: gerbil
|
container_name: gerbil
|
||||||
restart: unless-stopped
|
restart: unless-stopped
|
||||||
command:
|
command:
|
||||||
- --reachableAt=http://gerbil:3003
|
- --reachableAt=http://gerbil:3004
|
||||||
- --generateAndSaveKeyTo=/var/config/key
|
- --generateAndSaveKeyTo=/var/config/key
|
||||||
- --remoteConfig=http://pangolin:3001/api/v1/
|
- --remoteConfig=http://pangolin:3001/api/v1/
|
||||||
volumes:
|
volumes:
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
If you discover a security vulnerability, please follow the steps below to responsibly disclose it to us:
|
If you discover a security vulnerability, please follow the steps below to responsibly disclose it to us:
|
||||||
|
|
||||||
1. **Do not create a public GitHub issue or discussion post.** This could put the security of other users at risk.
|
1. **Do not create a public GitHub issue or discussion post.** This could put the security of other users at risk.
|
||||||
2. Send a detailed report to [security@fossorial.io](mailto:security@fossorial.io) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
|
2. Send a detailed report to [security@pangolin.net](mailto:security@pangolin.net) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
|
||||||
|
|
||||||
- Description and location of the vulnerability.
|
- Description and location of the vulnerability.
|
||||||
- Potential impact of the vulnerability.
|
- Potential impact of the vulnerability.
|
||||||
|
|||||||
@@ -1,23 +0,0 @@
|
|||||||
{
|
|
||||||
"privateKey": "kBGTgk7c+zncEEoSnMl+jsLjVh5ZVoL/HwBSQem+d1M=",
|
|
||||||
"listenPort": 51820,
|
|
||||||
"ipAddress": "10.0.0.1/24",
|
|
||||||
"peers": [
|
|
||||||
{
|
|
||||||
"publicKey": "5UzzoeveFVSzuqK3nTMS5bA1jIMs1fQffVQzJ8MXUQM=",
|
|
||||||
"allowedIps": ["10.0.0.0/28"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"publicKey": "kYrZpuO2NsrFoBh1GMNgkhd1i9Rgtu1rAjbJ7qsfngU=",
|
|
||||||
"allowedIps": ["10.0.0.16/28"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"publicKey": "1YfPUVr9ZF4zehkbI2BQhCxaRLz+Vtwa4vJwH+mpK0A=",
|
|
||||||
"allowedIps": ["10.0.0.32/28"]
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"publicKey": "2/U4oyZ+sai336Dal/yExCphL8AxyqvIxMk4qsUy4iI=",
|
|
||||||
"allowedIps": ["10.0.0.48/28"]
|
|
||||||
}
|
|
||||||
]
|
|
||||||
}
|
|
||||||
269
docs/observability.md
Normal file
269
docs/observability.md
Normal file
@@ -0,0 +1,269 @@
|
|||||||
|
<!-- markdownlint-disable MD036 MD060 -->
|
||||||
|
# Gerbil Observability Architecture
|
||||||
|
|
||||||
|
This document describes the metrics subsystem for Gerbil, explains the design
|
||||||
|
decisions, and shows how to configure each backend.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Architecture Overview
|
||||||
|
|
||||||
|
Gerbil's metrics subsystem uses a **pluggable backend** design:
|
||||||
|
|
||||||
|
```text
|
||||||
|
main.go ─── internal/metrics ─── internal/observability ─── backend
|
||||||
|
(facade) (interface) Prometheus
|
||||||
|
OR OTel/OTLP
|
||||||
|
OR Noop (disabled)
|
||||||
|
```
|
||||||
|
|
||||||
|
Application code (main, relay, proxy) calls only the `metrics.Record*`
|
||||||
|
functions in `internal/metrics`. That package delegates to whichever backend
|
||||||
|
was selected at startup via `internal/observability.Backend`.
|
||||||
|
|
||||||
|
### Why Prometheus-native and OTel are mutually exclusive
|
||||||
|
|
||||||
|
**Exactly one** metrics backend may be active at runtime:
|
||||||
|
|
||||||
|
| Mode | What happens |
|
||||||
|
|------|-------------|
|
||||||
|
| `prometheus` | Native Prometheus client registers metrics on a dedicated registry and exposes `/metrics`. No OTel SDK is initialised. |
|
||||||
|
| `otel` | OTel SDK pushes metrics via OTLP/gRPC or OTLP/HTTP to an external collector. No `/metrics` endpoint is exposed. |
|
||||||
|
| `none` | A safe noop backend is used. All `Record*` calls are discarded. |
|
||||||
|
|
||||||
|
Running both simultaneously would mean every metric is recorded twice through
|
||||||
|
two different code paths, with differing semantics (pull vs. push, different
|
||||||
|
naming rules, different cardinality handling). The design enforces a single
|
||||||
|
source of truth.
|
||||||
|
|
||||||
|
### Future OTel tracing and logging
|
||||||
|
|
||||||
|
The `internal/observability/otel/` package is designed so that tracing and
|
||||||
|
logging support can be added **beside** the existing metrics code without
|
||||||
|
touching the Prometheus-native path:
|
||||||
|
|
||||||
|
```bash
|
||||||
|
internal/observability/otel/
|
||||||
|
backend.go ← metrics
|
||||||
|
exporter.go ← OTLP exporter creation
|
||||||
|
resource.go ← OTel resource
|
||||||
|
trace.go ← future: TracerProvider setup
|
||||||
|
log.go ← future: LoggerProvider setup
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Configuration
|
||||||
|
|
||||||
|
### Config precedence
|
||||||
|
|
||||||
|
1. CLI flags (highest priority)
|
||||||
|
2. Environment variables
|
||||||
|
3. Defaults
|
||||||
|
|
||||||
|
### Config struct
|
||||||
|
|
||||||
|
```go
|
||||||
|
type MetricsConfig struct {
|
||||||
|
Enabled bool
|
||||||
|
Backend string // "prometheus" | "otel" | "none"
|
||||||
|
Prometheus PrometheusConfig
|
||||||
|
OTel OTelConfig
|
||||||
|
ServiceName string
|
||||||
|
ServiceVersion string
|
||||||
|
DeploymentEnvironment string
|
||||||
|
}
|
||||||
|
|
||||||
|
type PrometheusConfig struct {
|
||||||
|
Path string // default: "/metrics"
|
||||||
|
}
|
||||||
|
|
||||||
|
type OTelConfig struct {
|
||||||
|
Protocol string // "grpc" (default) or "http"
|
||||||
|
Endpoint string // default: "localhost:4317"
|
||||||
|
Insecure bool // default: true
|
||||||
|
ExportInterval time.Duration // default: 60s
|
||||||
|
}
|
||||||
|
```
|
||||||
|
|
||||||
|
### Environment variables
|
||||||
|
|
||||||
|
| Variable | Default | Description |
|
||||||
|
|----------|---------|-------------|
|
||||||
|
| `METRICS_ENABLED` | `true` | Enable/disable metrics |
|
||||||
|
| `METRICS_BACKEND` | `prometheus` | Backend: `prometheus`, `otel`, or `none` |
|
||||||
|
| `METRICS_PATH` | `/metrics` | HTTP path for Prometheus endpoint |
|
||||||
|
| `OTEL_METRICS_PROTOCOL` | `grpc` | OTLP transport: `grpc` or `http` |
|
||||||
|
| `OTEL_METRICS_ENDPOINT` | `localhost:4317` | OTLP collector address |
|
||||||
|
| `OTEL_METRICS_INSECURE` | `true` | Disable TLS for OTLP |
|
||||||
|
| `OTEL_METRICS_EXPORT_INTERVAL` | `60s` | Push interval (e.g. `10s`, `1m`) |
|
||||||
|
| `DEPLOYMENT_ENVIRONMENT` | _(unset)_ | OTel deployment.environment attribute |
|
||||||
|
|
||||||
|
### CLI flags
|
||||||
|
|
||||||
|
```bash
|
||||||
|
--metrics-enabled bool (default: true)
|
||||||
|
--metrics-backend string (default: prometheus)
|
||||||
|
--metrics-path string (default: /metrics)
|
||||||
|
--otel-metrics-protocol string (default: grpc)
|
||||||
|
--otel-metrics-endpoint string (default: localhost:4317)
|
||||||
|
--otel-metrics-insecure bool (default: true)
|
||||||
|
--otel-metrics-export-interval duration (default: 1m0s)
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## When to choose each backend
|
||||||
|
|
||||||
|
| Criterion | Prometheus | OTel/OTLP |
|
||||||
|
|-----------|-----------|-----------|
|
||||||
|
| Existing Prometheus/Grafana stack | ✅ | |
|
||||||
|
| Pull-based scraping | ✅ | |
|
||||||
|
| No external collector required | ✅ | |
|
||||||
|
| Vendor-neutral telemetry | | ✅ |
|
||||||
|
| Push-based export | | ✅ |
|
||||||
|
| Grafana Cloud / managed OTLP | | ✅ |
|
||||||
|
| Future traces + logs via same pipeline | | ✅ |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Enabling Prometheus-native mode
|
||||||
|
|
||||||
|
### Environment variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
METRICS_ENABLED=true
|
||||||
|
METRICS_BACKEND=prometheus
|
||||||
|
METRICS_PATH=/metrics
|
||||||
|
```
|
||||||
|
|
||||||
|
### CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./gerbil --metrics-enabled --metrics-backend=prometheus --metrics-path=/metrics \
|
||||||
|
--config=/etc/gerbil/config.json
|
||||||
|
```
|
||||||
|
|
||||||
|
The metrics config is supplied separately via env/flags; it is not embedded
|
||||||
|
in the WireGuard config file.
|
||||||
|
|
||||||
|
The Prometheus `/metrics` endpoint is registered only when
|
||||||
|
`--metrics-backend=prometheus`. All gerbil_* metrics plus Go runtime metrics
|
||||||
|
are available.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Enabling OTel mode
|
||||||
|
|
||||||
|
### Environment variables
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export METRICS_ENABLED=true
|
||||||
|
export METRICS_BACKEND=otel
|
||||||
|
export OTEL_METRICS_PROTOCOL=grpc
|
||||||
|
export OTEL_METRICS_ENDPOINT=otel-collector:4317
|
||||||
|
export OTEL_METRICS_INSECURE=true
|
||||||
|
export OTEL_METRICS_EXPORT_INTERVAL=10s
|
||||||
|
export DEPLOYMENT_ENVIRONMENT=production
|
||||||
|
```
|
||||||
|
|
||||||
|
### CLI
|
||||||
|
|
||||||
|
```bash
|
||||||
|
./gerbil --metrics-enabled \
|
||||||
|
--metrics-backend=otel \
|
||||||
|
--otel-metrics-protocol=grpc \
|
||||||
|
--otel-metrics-endpoint=otel-collector:4317 \
|
||||||
|
--otel-metrics-insecure \
|
||||||
|
--otel-metrics-export-interval=10s \
|
||||||
|
--config=/etc/gerbil/config.json
|
||||||
|
```
|
||||||
|
|
||||||
|
### HTTP mode (OTLP/HTTP)
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export OTEL_METRICS_PROTOCOL=http
|
||||||
|
export OTEL_METRICS_ENDPOINT=otel-collector:4318
|
||||||
|
```
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Disabling metrics
|
||||||
|
|
||||||
|
```bash
|
||||||
|
export METRICS_ENABLED=false
|
||||||
|
# or
|
||||||
|
./gerbil --metrics-enabled=false
|
||||||
|
# or
|
||||||
|
./gerbil --metrics-backend=none
|
||||||
|
```
|
||||||
|
|
||||||
|
When disabled, all `Record*` calls are directed to a safe noop backend that
|
||||||
|
discards observations without allocating or locking.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Metric catalog
|
||||||
|
|
||||||
|
All metrics use the prefix `gerbil_<component>_<name>`.
|
||||||
|
|
||||||
|
### WireGuard metrics
|
||||||
|
|
||||||
|
| Metric | Type | Labels | Description |
|
||||||
|
|--------|------|--------|-------------|
|
||||||
|
| `gerbil_wg_interface_up` | Gauge | `ifname`, `instance` | 1=up, 0=down |
|
||||||
|
| `gerbil_wg_peers_total` | UpDownCounter | `ifname` | Configured peers |
|
||||||
|
| `gerbil_wg_peer_connected` | Gauge | `ifname`, `peer` | 1=connected, 0=disconnected |
|
||||||
|
| `gerbil_wg_bytes_received_total` | Counter | `ifname`, `peer` | Bytes received |
|
||||||
|
| `gerbil_wg_bytes_transmitted_total` | Counter | `ifname`, `peer` | Bytes transmitted |
|
||||||
|
| `gerbil_wg_handshakes_total` | Counter | `ifname`, `peer`, `result` | Handshake attempts |
|
||||||
|
| `gerbil_wg_handshake_latency_seconds` | Histogram | `ifname`, `peer` | Handshake duration |
|
||||||
|
| `gerbil_wg_peer_rtt_seconds` | Histogram | `ifname`, `peer` | Peer round-trip time |
|
||||||
|
|
||||||
|
### Relay metrics
|
||||||
|
|
||||||
|
| Metric | Type | Labels |
|
||||||
|
|--------|------|--------|
|
||||||
|
| `gerbil_proxy_mapping_active` | UpDownCounter | `ifname` |
|
||||||
|
| `gerbil_session_active` | UpDownCounter | `ifname` |
|
||||||
|
| `gerbil_active_sessions` | UpDownCounter | `ifname` |
|
||||||
|
| `gerbil_udp_packets_total` | Counter | `ifname`, `type`, `direction` |
|
||||||
|
| `gerbil_hole_punch_events_total` | Counter | `ifname`, `result` |
|
||||||
|
|
||||||
|
### SNI proxy metrics
|
||||||
|
|
||||||
|
| Metric | Type | Labels |
|
||||||
|
|--------|------|--------|
|
||||||
|
| `gerbil_sni_connections_total` | Counter | `result` |
|
||||||
|
| `gerbil_sni_active_connections` | UpDownCounter | _(none)_ |
|
||||||
|
| `gerbil_sni_route_cache_hits_total` | Counter | `result` |
|
||||||
|
| `gerbil_sni_route_api_requests_total` | Counter | `result` |
|
||||||
|
| `gerbil_proxy_route_lookups_total` | Counter | `result`, `hostname` |
|
||||||
|
|
||||||
|
### HTTP metrics
|
||||||
|
|
||||||
|
| Metric | Type | Labels |
|
||||||
|
|--------|------|--------|
|
||||||
|
| `gerbil_http_requests_total` | Counter | `endpoint`, `method`, `status_code` |
|
||||||
|
| `gerbil_http_request_duration_seconds` | Histogram | `endpoint`, `method` |
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
## Using Docker Compose
|
||||||
|
|
||||||
|
The `docker-compose.metrics.yml` provides a complete observability stack.
|
||||||
|
|
||||||
|
**Prometheus mode:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
METRICS_BACKEND=prometheus docker-compose -f docker-compose.metrics.yml up -d
|
||||||
|
# Scrape at http://localhost:3003/metrics
|
||||||
|
# Grafana at http://localhost:3000 (admin/admin)
|
||||||
|
```
|
||||||
|
|
||||||
|
**OTel mode:**
|
||||||
|
|
||||||
|
```bash
|
||||||
|
METRICS_BACKEND=otel OTEL_METRICS_ENDPOINT=otel-collector:4317 \
|
||||||
|
docker-compose -f docker-compose.metrics.yml up -d
|
||||||
|
```
|
||||||
46
examples/otel-collector-config.yaml
Normal file
46
examples/otel-collector-config.yaml
Normal file
@@ -0,0 +1,46 @@
|
|||||||
|
receivers:
|
||||||
|
otlp:
|
||||||
|
protocols:
|
||||||
|
grpc:
|
||||||
|
endpoint: 0.0.0.0:4317
|
||||||
|
http:
|
||||||
|
endpoint: 0.0.0.0:4318
|
||||||
|
|
||||||
|
processors:
|
||||||
|
batch:
|
||||||
|
timeout: 10s
|
||||||
|
send_batch_size: 1024
|
||||||
|
|
||||||
|
# Add resource attributes
|
||||||
|
resource:
|
||||||
|
attributes:
|
||||||
|
- key: service.environment
|
||||||
|
value: "development"
|
||||||
|
action: insert
|
||||||
|
|
||||||
|
exporters:
|
||||||
|
# Prometheus exporter for scraping
|
||||||
|
prometheus:
|
||||||
|
endpoint: "0.0.0.0:8889"
|
||||||
|
namespace: "gerbil"
|
||||||
|
send_timestamps: true
|
||||||
|
metric_expiration: 5m
|
||||||
|
resource_to_telemetry_conversion:
|
||||||
|
enabled: true
|
||||||
|
|
||||||
|
# Prometheus remote write (optional)
|
||||||
|
prometheusremotewrite:
|
||||||
|
endpoint: "http://prometheus:9090/api/v1/write"
|
||||||
|
tls:
|
||||||
|
insecure: true
|
||||||
|
|
||||||
|
# Debug exporter for debugging
|
||||||
|
debug:
|
||||||
|
verbosity: normal
|
||||||
|
|
||||||
|
service:
|
||||||
|
pipelines:
|
||||||
|
metrics:
|
||||||
|
receivers: [otlp]
|
||||||
|
processors: [batch, resource]
|
||||||
|
exporters: [prometheus, prometheusremotewrite, debug]
|
||||||
24
examples/prometheus.yml
Normal file
24
examples/prometheus.yml
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
global:
|
||||||
|
scrape_interval: 15s
|
||||||
|
evaluation_interval: 15s
|
||||||
|
external_labels:
|
||||||
|
cluster: 'gerbil-dev'
|
||||||
|
|
||||||
|
scrape_configs:
|
||||||
|
# Scrape Gerbil's /metrics endpoint directly
|
||||||
|
- job_name: 'gerbil'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['gerbil:3003']
|
||||||
|
labels:
|
||||||
|
service: 'gerbil'
|
||||||
|
environment: 'development'
|
||||||
|
|
||||||
|
# Scrape OpenTelemetry Collector metrics
|
||||||
|
- job_name: 'otel-collector'
|
||||||
|
static_configs:
|
||||||
|
- targets: ['otel-collector:8888']
|
||||||
|
labels:
|
||||||
|
service: 'otel-collector'
|
||||||
|
- targets: ['otel-collector:8889']
|
||||||
|
labels:
|
||||||
|
service: 'otel-collector-prometheus-exporter'
|
||||||
41
go.mod
41
go.mod
@@ -1,23 +1,50 @@
|
|||||||
module github.com/fosrl/gerbil
|
module github.com/fosrl/gerbil
|
||||||
|
|
||||||
go 1.25
|
go 1.26.0
|
||||||
|
|
||||||
require (
|
require (
|
||||||
|
github.com/patrickmn/go-cache v2.1.0+incompatible
|
||||||
|
github.com/prometheus/client_golang v1.20.5
|
||||||
github.com/vishvananda/netlink v1.3.1
|
github.com/vishvananda/netlink v1.3.1
|
||||||
golang.org/x/crypto v0.36.0
|
go.opentelemetry.io/otel v1.42.0
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0
|
||||||
|
go.opentelemetry.io/otel/metric v1.42.0
|
||||||
|
go.opentelemetry.io/otel/sdk v1.42.0
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.42.0
|
||||||
|
golang.org/x/crypto v0.49.0
|
||||||
|
golang.org/x/sync v0.20.0
|
||||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6
|
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6
|
||||||
)
|
)
|
||||||
|
|
||||||
require (
|
require (
|
||||||
github.com/google/go-cmp v0.5.9 // indirect
|
github.com/beorn7/perks v1.0.1 // indirect
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||||
|
github.com/go-logr/logr v1.4.3 // indirect
|
||||||
|
github.com/go-logr/stdr v1.2.2 // indirect
|
||||||
|
github.com/google/go-cmp v0.7.0 // indirect
|
||||||
|
github.com/google/uuid v1.6.0 // indirect
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 // indirect
|
||||||
github.com/josharian/native v1.1.0 // indirect
|
github.com/josharian/native v1.1.0 // indirect
|
||||||
|
github.com/klauspost/compress v1.17.9 // indirect
|
||||||
github.com/mdlayher/genetlink v1.3.2 // indirect
|
github.com/mdlayher/genetlink v1.3.2 // indirect
|
||||||
github.com/mdlayher/netlink v1.7.2 // indirect
|
github.com/mdlayher/netlink v1.7.2 // indirect
|
||||||
github.com/mdlayher/socket v0.4.1 // indirect
|
github.com/mdlayher/socket v0.4.1 // indirect
|
||||||
github.com/patrickmn/go-cache v2.1.0+incompatible // indirect
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||||
|
github.com/prometheus/client_model v0.6.1 // indirect
|
||||||
|
github.com/prometheus/common v0.61.0 // indirect
|
||||||
|
github.com/prometheus/procfs v0.15.1 // indirect
|
||||||
github.com/vishvananda/netns v0.0.5 // indirect
|
github.com/vishvananda/netns v0.0.5 // indirect
|
||||||
golang.org/x/net v0.38.0 // indirect
|
go.opentelemetry.io/auto/sdk v1.2.1 // indirect
|
||||||
golang.org/x/sync v0.1.0 // indirect
|
go.opentelemetry.io/otel/trace v1.42.0 // indirect
|
||||||
golang.org/x/sys v0.31.0 // indirect
|
go.opentelemetry.io/proto/otlp v1.9.0 // indirect
|
||||||
|
golang.org/x/net v0.51.0 // indirect
|
||||||
|
golang.org/x/sys v0.42.0 // indirect
|
||||||
|
golang.org/x/text v0.35.0 // indirect
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b // indirect
|
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b // indirect
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 // indirect
|
||||||
|
google.golang.org/grpc v1.79.3 // indirect
|
||||||
|
google.golang.org/protobuf v1.36.11 // indirect
|
||||||
)
|
)
|
||||||
|
|||||||
89
go.sum
89
go.sum
@@ -1,7 +1,30 @@
|
|||||||
github.com/google/go-cmp v0.5.9 h1:O2Tfq5qg4qc4AmwVlvv0oLiVAGB7enBSJ2x2DqQFi38=
|
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||||
github.com/google/go-cmp v0.5.9/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY=
|
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||||
|
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||||
|
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||||
|
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||||
|
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||||
|
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||||
|
github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||||
|
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||||
|
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||||
|
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||||
|
github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek=
|
||||||
|
github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps=
|
||||||
|
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||||
|
github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU=
|
||||||
|
github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||||
|
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0 h1:HWRh5R2+9EifMyIHV7ZV+MIZqgz+PMpZ14Jynv3O2Zs=
|
||||||
|
github.com/grpc-ecosystem/grpc-gateway/v2 v2.28.0/go.mod h1:JfhWUomR1baixubs02l85lZYYOm7LV6om4ceouMv45c=
|
||||||
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
|
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
|
||||||
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
|
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
|
||||||
|
github.com/klauspost/compress v1.17.9 h1:6KIumPrER1LHsvBVuDa0r5xaG0Es51mhhB9BQB2qeMA=
|
||||||
|
github.com/klauspost/compress v1.17.9/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw=
|
||||||
|
github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc=
|
||||||
|
github.com/kylelemons/godebug v1.1.0/go.mod h1:9/0rRGxNHcop5bhtWyNeEfOS8JIWk580+fNqagV/RAw=
|
||||||
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
|
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
|
||||||
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
|
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
|
||||||
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
|
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
|
||||||
@@ -10,23 +33,69 @@ github.com/mdlayher/socket v0.4.1 h1:eM9y2/jlbs1M615oshPQOHZzj6R6wMT7bX5NPiQvn2U
|
|||||||
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
|
github.com/mdlayher/socket v0.4.1/go.mod h1:cAqeGjoufqdxWkD7DkpyS+wcefOtmu5OQ8KuoJGIReA=
|
||||||
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws=
|
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws=
|
||||||
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc=
|
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc=
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||||
|
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||||
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
|
github.com/patrickmn/go-cache v2.1.0+incompatible h1:HRMgzkcYKYpi3C8ajMPV8OFXaaRUnok+kx1WdO15EQc=
|
||||||
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
github.com/patrickmn/go-cache v2.1.0+incompatible/go.mod h1:3Qf8kWWT7OJRJbdiICTKqZju1ZixQ/KpMGzzAfe6+WQ=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||||
|
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||||
|
github.com/prometheus/client_golang v1.20.5 h1:cxppBPuYhUnsO6yo/aoRol4L7q7UFfdm+bR9r+8l63Y=
|
||||||
|
github.com/prometheus/client_golang v1.20.5/go.mod h1:PIEt8X02hGcP8JWbeHyeZ53Y/jReSnHgO035n//V5WE=
|
||||||
|
github.com/prometheus/client_model v0.6.1 h1:ZKSh/rekM+n3CeS952MLRAdFwIKqeY8b62p8ais2e9E=
|
||||||
|
github.com/prometheus/client_model v0.6.1/go.mod h1:OrxVMOVHjw3lKMa8+x6HeMGkHMQyHDk9E3jmP2AmGiY=
|
||||||
|
github.com/prometheus/common v0.61.0 h1:3gv/GThfX0cV2lpO7gkTUwZru38mxevy90Bj8YFSRQQ=
|
||||||
|
github.com/prometheus/common v0.61.0/go.mod h1:zr29OCN/2BsJRaFwG8QOBr41D6kkchKbpeNH7pAjb/s=
|
||||||
|
github.com/prometheus/procfs v0.15.1 h1:YagwOFzUgYfKKHX6Dr+sHT7km/hxC76UB0learggepc=
|
||||||
|
github.com/prometheus/procfs v0.15.1/go.mod h1:fB45yRUv8NstnjriLhBQLuOUt+WW4BsoGhij/e3PBqk=
|
||||||
|
github.com/stretchr/testify v1.11.1 h1:7s2iGBzp5EwR7/aIZr8ao5+dra3wiQyKjjFuvgVKu7U=
|
||||||
|
github.com/stretchr/testify v1.11.1/go.mod h1:wZwfW3scLgRK+23gO65QZefKpKQRnfz6sD981Nm4B6U=
|
||||||
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
|
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
|
||||||
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
|
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
|
||||||
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
|
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
|
||||||
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||||
golang.org/x/crypto v0.36.0 h1:AnAEvhDddvBdpY+uR+MyHmuZzzNqXSe/GvuDeob5L34=
|
go.opentelemetry.io/auto/sdk v1.2.1 h1:jXsnJ4Lmnqd11kwkBV2LgLoFMZKizbCi5fNZ/ipaZ64=
|
||||||
golang.org/x/crypto v0.36.0/go.mod h1:Y4J0ReaxCR1IMaabaSMugxJES1EpwhBHhv2bDHklZvc=
|
go.opentelemetry.io/auto/sdk v1.2.1/go.mod h1:KRTj+aOaElaLi+wW1kO/DZRXwkF4C5xPbEe3ZiIhN7Y=
|
||||||
golang.org/x/net v0.38.0 h1:vRMAPTMaeGqVhG5QyLJHqNDwecKTomGeqbnfZyKlBI8=
|
go.opentelemetry.io/otel v1.42.0 h1:lSQGzTgVR3+sgJDAU/7/ZMjN9Z+vUip7leaqBKy4sho=
|
||||||
golang.org/x/net v0.38.0/go.mod h1:ivrbrMbzFq5J41QOQh0siUuly180yBYtLp+CKbEaFx8=
|
go.opentelemetry.io/otel v1.42.0/go.mod h1:lJNsdRMxCUIWuMlVJWzecSMuNjE7dOYyWlqOXWkdqCc=
|
||||||
golang.org/x/sync v0.1.0 h1:wsuoTGHzEhffawBOhz5CYhcrV4IdKZbEyZjBMuTp12o=
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0 h1:MdKucPl/HbzckWWEisiNqMPhRrAOQX8r4jTuGr636gk=
|
||||||
golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.42.0/go.mod h1:RolT8tWtfHcjajEH5wFIZ4Dgh5jpPdFXYV9pTAk/qjc=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0 h1:H7O6RlGOMTizyl3R08Kn5pdM06bnH8oscSj7o11tmLA=
|
||||||
|
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp v1.42.0/go.mod h1:mBFWu/WOVDkWWsR7Tx7h6EpQB8wsv7P0Yrh0Pb7othc=
|
||||||
|
go.opentelemetry.io/otel/metric v1.42.0 h1:2jXG+3oZLNXEPfNmnpxKDeZsFI5o4J+nz6xUlaFdF/4=
|
||||||
|
go.opentelemetry.io/otel/metric v1.42.0/go.mod h1:RlUN/7vTU7Ao/diDkEpQpnz3/92J9ko05BIwxYa2SSI=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.42.0 h1:LyC8+jqk6UJwdrI/8VydAq/hvkFKNHZVIWuslJXYsDo=
|
||||||
|
go.opentelemetry.io/otel/sdk v1.42.0/go.mod h1:rGHCAxd9DAph0joO4W6OPwxjNTYWghRWmkHuGbayMts=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.42.0 h1:D/1QR46Clz6ajyZ3G8SgNlTJKBdGp84q9RKCAZ3YGuA=
|
||||||
|
go.opentelemetry.io/otel/sdk/metric v1.42.0/go.mod h1:Ua6AAlDKdZ7tdvaQKfSmnFTdHx37+J4ba8MwVCYM5hc=
|
||||||
|
go.opentelemetry.io/otel/trace v1.42.0 h1:OUCgIPt+mzOnaUTpOQcBiM/PLQ/Op7oq6g4LenLmOYY=
|
||||||
|
go.opentelemetry.io/otel/trace v1.42.0/go.mod h1:f3K9S+IFqnumBkKhRJMeaZeNk9epyhnCmQh/EysQCdc=
|
||||||
|
go.opentelemetry.io/proto/otlp v1.9.0 h1:l706jCMITVouPOqEnii2fIAuO3IVGBRPV5ICjceRb/A=
|
||||||
|
go.opentelemetry.io/proto/otlp v1.9.0/go.mod h1:xE+Cx5E/eEHw+ISFkwPLwCZefwVjY+pqKg1qcK03+/4=
|
||||||
|
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
|
||||||
|
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
|
||||||
|
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
|
||||||
|
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
|
||||||
|
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
|
||||||
|
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
|
||||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||||
golang.org/x/sys v0.31.0 h1:ioabZlmFYtWhL+TRYpcnNlLwhyxaM9kWTDEmfnprqik=
|
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
|
||||||
golang.org/x/sys v0.31.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
|
||||||
|
golang.org/x/text v0.35.0 h1:JOVx6vVDFokkpaq1AEptVzLTpDe9KGpj5tR4/X+ybL8=
|
||||||
|
golang.org/x/text v0.35.0/go.mod h1:khi/HExzZJ2pGnjenulevKNX1W67CUy0AsXcNubPGCA=
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx3wnr6L04cJFbWoceSK9JWBdglINo=
|
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b h1:J1CaxgLerRR5lgx3wnr6L04cJFbWoceSK9JWBdglINo=
|
||||||
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
|
golang.zx2c4.com/wireguard v0.0.0-20230325221338-052af4a8072b/go.mod h1:tqur9LnfstdR9ep2LaJT4lFUl0EjlHtge+gAjmsHUG4=
|
||||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6 h1:CawjfCvYQH2OU3/TnxLx97WDSUDRABfT18pCOYwc2GE=
|
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6 h1:CawjfCvYQH2OU3/TnxLx97WDSUDRABfT18pCOYwc2GE=
|
||||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6/go.mod h1:3rxYc4HtVcSG9gVaTs2GEBdehh+sYPOwKtyUWEOTb80=
|
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20230429144221-925a1e7659e6/go.mod h1:3rxYc4HtVcSG9gVaTs2GEBdehh+sYPOwKtyUWEOTb80=
|
||||||
|
gonum.org/v1/gonum v0.16.0 h1:5+ul4Swaf3ESvrOnidPp4GZbzf0mxVQpDCYUQE7OJfk=
|
||||||
|
gonum.org/v1/gonum v0.16.0/go.mod h1:fef3am4MQ93R2HHpKnLk4/Tbh/s0+wqD5nfa6Pnwy4E=
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57 h1:JLQynH/LBHfCTSbDWl+py8C+Rg/k1OVH3xfcaiANuF0=
|
||||||
|
google.golang.org/genproto/googleapis/api v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:kSJwQxqmFXeo79zOmbrALdflXQeAYcUbgS7PbpMknCY=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57 h1:mWPCjDEyshlQYzBpMNHaEof6UX1PmHcaUODUywQ0uac=
|
||||||
|
google.golang.org/genproto/googleapis/rpc v0.0.0-20260209200024-4cfbd4190f57/go.mod h1:j9x/tPzZkyxcgEFkiKEEGxfvyumM01BEtsW8xzOahRQ=
|
||||||
|
google.golang.org/grpc v1.79.3 h1:sybAEdRIEtvcD68Gx7dmnwjZKlyfuc61Dyo9pGXXkKE=
|
||||||
|
google.golang.org/grpc v1.79.3/go.mod h1:KmT0Kjez+0dde/v2j9vzwoAScgEPx/Bw1CYChhHLrHQ=
|
||||||
|
google.golang.org/protobuf v1.36.11 h1:fV6ZwhNocDyBLK0dj+fg8ektcVegBBuEolpbTQyBNVE=
|
||||||
|
google.golang.org/protobuf v1.36.11/go.mod h1:HTf+CrKn2C3g5S8VImy6tdcUvCska2kB7j23XfzDpco=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||||
|
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||||
|
|||||||
506
internal/metrics/metrics.go
Normal file
506
internal/metrics/metrics.go
Normal file
@@ -0,0 +1,506 @@
|
|||||||
|
// Package metrics provides the application-level metrics facade for Gerbil.
|
||||||
|
//
|
||||||
|
// Application code (main, relay, proxy) uses only the Record* functions in this
|
||||||
|
// package. The actual recording is delegated to the backend selected in
|
||||||
|
// internal/observability. Neither Prometheus nor OTel packages are imported here.
|
||||||
|
package metrics
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/fosrl/gerbil/internal/observability"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config is the metrics configuration type. It is an alias for
|
||||||
|
// observability.MetricsConfig so callers do not need to import observability.
|
||||||
|
type Config = observability.MetricsConfig
|
||||||
|
|
||||||
|
// PrometheusConfig is re-exported for convenience.
|
||||||
|
type PrometheusConfig = observability.PrometheusConfig
|
||||||
|
|
||||||
|
// OTelConfig is re-exported for convenience.
|
||||||
|
type OTelConfig = observability.OTelConfig
|
||||||
|
|
||||||
|
var (
|
||||||
|
backend observability.Backend
|
||||||
|
|
||||||
|
// Interface and peer metrics
|
||||||
|
wgInterfaceUp observability.Int64Gauge
|
||||||
|
wgPeersTotal observability.UpDownCounter
|
||||||
|
wgPeerConnected observability.Int64Gauge
|
||||||
|
wgHandshakesTotal observability.Counter
|
||||||
|
wgHandshakeLatency observability.Histogram
|
||||||
|
wgPeerRTT observability.Histogram
|
||||||
|
wgBytesReceived observability.Counter
|
||||||
|
wgBytesTransmitted observability.Counter
|
||||||
|
allowedIPsCount observability.UpDownCounter
|
||||||
|
keyRotationTotal observability.Counter
|
||||||
|
|
||||||
|
// System and proxy metrics
|
||||||
|
netlinkEventsTotal observability.Counter
|
||||||
|
netlinkErrorsTotal observability.Counter
|
||||||
|
syncDuration observability.Histogram
|
||||||
|
workqueueDepth observability.UpDownCounter
|
||||||
|
kernelModuleLoads observability.Counter
|
||||||
|
firewallRulesApplied observability.Counter
|
||||||
|
activeSessions observability.UpDownCounter
|
||||||
|
activeProxyConnections observability.UpDownCounter
|
||||||
|
proxyRouteLookups observability.Counter
|
||||||
|
proxyTLSHandshake observability.Histogram
|
||||||
|
proxyBytesTransmitted observability.Counter
|
||||||
|
|
||||||
|
// UDP Relay / Proxy Metrics
|
||||||
|
udpPacketsTotal observability.Counter
|
||||||
|
udpPacketSizeBytes observability.Histogram
|
||||||
|
holePunchEventsTotal observability.Counter
|
||||||
|
proxyMappingActive observability.UpDownCounter
|
||||||
|
sessionActive observability.UpDownCounter
|
||||||
|
sessionRebuiltTotal observability.Counter
|
||||||
|
commPatternActive observability.UpDownCounter
|
||||||
|
proxyCleanupRemovedTotal observability.Counter
|
||||||
|
proxyConnectionErrorsTotal observability.Counter
|
||||||
|
proxyInitialMappingsTotal observability.Int64Gauge
|
||||||
|
proxyMappingUpdatesTotal observability.Counter
|
||||||
|
proxyIdleCleanupDuration observability.Histogram
|
||||||
|
|
||||||
|
// SNI Proxy Metrics
|
||||||
|
sniConnectionsTotal observability.Counter
|
||||||
|
sniConnectionDuration observability.Histogram
|
||||||
|
sniActiveConnections observability.UpDownCounter
|
||||||
|
sniRouteCacheHitsTotal observability.Counter
|
||||||
|
sniRouteAPIRequestsTotal observability.Counter
|
||||||
|
sniRouteAPILatency observability.Histogram
|
||||||
|
sniLocalOverrideTotal observability.Counter
|
||||||
|
sniTrustedProxyEventsTotal observability.Counter
|
||||||
|
sniProxyProtocolParseErrorsTotal observability.Counter
|
||||||
|
sniDataBytesTotal observability.Counter
|
||||||
|
sniTunnelTerminationsTotal observability.Counter
|
||||||
|
|
||||||
|
// HTTP API & Peer Management Metrics
|
||||||
|
httpRequestsTotal observability.Counter
|
||||||
|
httpRequestDuration observability.Histogram
|
||||||
|
peerOperationsTotal observability.Counter
|
||||||
|
proxyMappingUpdateRequestsTotal observability.Counter
|
||||||
|
destinationsUpdateRequestsTotal observability.Counter
|
||||||
|
|
||||||
|
// Remote Configuration, Reporting & Housekeeping
|
||||||
|
remoteConfigFetchesTotal observability.Counter
|
||||||
|
bandwidthReportsTotal observability.Counter
|
||||||
|
peerBandwidthBytesTotal observability.Counter
|
||||||
|
memorySpikeTotal observability.Counter
|
||||||
|
heapProfilesWrittenTotal observability.Counter
|
||||||
|
|
||||||
|
// Operational metrics
|
||||||
|
configReloadsTotal observability.Counter
|
||||||
|
restartTotal observability.Counter
|
||||||
|
authFailuresTotal observability.Counter
|
||||||
|
aclDeniedTotal observability.Counter
|
||||||
|
certificateExpiryDays observability.Float64Gauge
|
||||||
|
)
|
||||||
|
|
||||||
|
// DefaultConfig returns a default metrics configuration.
|
||||||
|
func DefaultConfig() Config {
|
||||||
|
return observability.DefaultMetricsConfig()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initialize sets up the metrics system using the selected backend.
|
||||||
|
// It returns the /metrics HTTP handler (non-nil only for Prometheus backend).
|
||||||
|
func Initialize(cfg Config) (http.Handler, error) {
|
||||||
|
b, err := observability.New(cfg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
backend = b
|
||||||
|
|
||||||
|
if err := createInstruments(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
return backend.HTTPHandler(), nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown gracefully shuts down the metrics backend.
|
||||||
|
func Shutdown(ctx context.Context) error {
|
||||||
|
if backend != nil {
|
||||||
|
return backend.Shutdown(ctx)
|
||||||
|
}
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func createInstruments() error {
|
||||||
|
durationBuckets := []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}
|
||||||
|
sizeBuckets := []float64{512, 1024, 4096, 16384, 65536, 262144, 1048576}
|
||||||
|
sniDurationBuckets := []float64{0.1, 0.5, 1, 2.5, 5, 10, 30, 60, 120}
|
||||||
|
|
||||||
|
b := backend
|
||||||
|
|
||||||
|
wgInterfaceUp = b.NewInt64Gauge("gerbil_wg_interface_up",
|
||||||
|
"Operational state of a WireGuard interface (1=up, 0=down)", "ifname", "instance")
|
||||||
|
wgPeersTotal = b.NewUpDownCounter("gerbil_wg_peers_total",
|
||||||
|
"Total number of configured peers per interface", "ifname")
|
||||||
|
wgPeerConnected = b.NewInt64Gauge("gerbil_wg_peer_connected",
|
||||||
|
"Whether a specific peer is connected (1=connected, 0=disconnected)", "ifname", "peer")
|
||||||
|
allowedIPsCount = b.NewUpDownCounter("gerbil_allowed_ips_count",
|
||||||
|
"Number of allowed IPs configured per peer", "ifname", "peer")
|
||||||
|
keyRotationTotal = b.NewCounter("gerbil_key_rotation_total",
|
||||||
|
"Key rotation events", "ifname", "reason")
|
||||||
|
wgHandshakesTotal = b.NewCounter("gerbil_wg_handshakes_total",
|
||||||
|
"Count of handshake attempts with their result status", "ifname", "peer", "result")
|
||||||
|
wgHandshakeLatency = b.NewHistogram("gerbil_wg_handshake_latency_seconds",
|
||||||
|
"Distribution of handshake latencies in seconds", durationBuckets, "ifname", "peer")
|
||||||
|
wgPeerRTT = b.NewHistogram("gerbil_wg_peer_rtt_seconds",
|
||||||
|
"Observed round-trip time to a peer in seconds", durationBuckets, "ifname", "peer")
|
||||||
|
wgBytesReceived = b.NewCounter("gerbil_wg_bytes_received_total",
|
||||||
|
"Number of bytes received from a peer", "ifname", "peer")
|
||||||
|
wgBytesTransmitted = b.NewCounter("gerbil_wg_bytes_transmitted_total",
|
||||||
|
"Number of bytes transmitted to a peer", "ifname", "peer")
|
||||||
|
netlinkEventsTotal = b.NewCounter("gerbil_netlink_events_total",
|
||||||
|
"Number of netlink events processed", "event_type")
|
||||||
|
netlinkErrorsTotal = b.NewCounter("gerbil_netlink_errors_total",
|
||||||
|
"Count of netlink or kernel errors", "component", "error_type")
|
||||||
|
syncDuration = b.NewHistogram("gerbil_sync_duration_seconds",
|
||||||
|
"Duration of reconciliation/sync loops in seconds", durationBuckets, "component")
|
||||||
|
workqueueDepth = b.NewUpDownCounter("gerbil_workqueue_depth",
|
||||||
|
"Current length of internal work queues", "queue")
|
||||||
|
kernelModuleLoads = b.NewCounter("gerbil_kernel_module_loads_total",
|
||||||
|
"Count of kernel module load attempts", "result")
|
||||||
|
firewallRulesApplied = b.NewCounter("gerbil_firewall_rules_applied_total",
|
||||||
|
"IPTables/NFT rules applied", "result", "chain")
|
||||||
|
activeSessions = b.NewUpDownCounter("gerbil_active_sessions",
|
||||||
|
"Number of active UDP relay sessions", "ifname")
|
||||||
|
activeProxyConnections = b.NewUpDownCounter("gerbil_active_proxy_connections",
|
||||||
|
"Active SNI proxy connections")
|
||||||
|
proxyRouteLookups = b.NewCounter("gerbil_proxy_route_lookups_total",
|
||||||
|
"Number of route lookups", "result")
|
||||||
|
proxyTLSHandshake = b.NewHistogram("gerbil_proxy_tls_handshake_seconds",
|
||||||
|
"TLS handshake duration for SNI proxy in seconds", durationBuckets)
|
||||||
|
proxyBytesTransmitted = b.NewCounter("gerbil_proxy_bytes_transmitted_total",
|
||||||
|
"Bytes sent/received by the SNI proxy", "direction")
|
||||||
|
configReloadsTotal = b.NewCounter("gerbil_config_reloads_total",
|
||||||
|
"Number of configuration reloads", "result")
|
||||||
|
restartTotal = b.NewCounter("gerbil_restart_total",
|
||||||
|
"Process restart count")
|
||||||
|
authFailuresTotal = b.NewCounter("gerbil_auth_failures_total",
|
||||||
|
"Count of authentication or peer validation failures", "peer", "reason")
|
||||||
|
aclDeniedTotal = b.NewCounter("gerbil_acl_denied_total",
|
||||||
|
"Access control denied events", "ifname", "peer", "policy")
|
||||||
|
certificateExpiryDays = b.NewFloat64Gauge("gerbil_certificate_expiry_days",
|
||||||
|
"Days until certificate expiry", "cert_name", "ifname")
|
||||||
|
udpPacketsTotal = b.NewCounter("gerbil_udp_packets_total",
|
||||||
|
"Count of UDP packets processed by relay workers", "ifname", "type", "direction")
|
||||||
|
udpPacketSizeBytes = b.NewHistogram("gerbil_udp_packet_size_bytes",
|
||||||
|
"Size distribution of packets forwarded through relay", sizeBuckets, "ifname", "type")
|
||||||
|
holePunchEventsTotal = b.NewCounter("gerbil_hole_punch_events_total",
|
||||||
|
"Count of hole punch messages processed", "ifname", "result")
|
||||||
|
proxyMappingActive = b.NewUpDownCounter("gerbil_proxy_mapping_active",
|
||||||
|
"Number of active proxy mappings", "ifname")
|
||||||
|
sessionActive = b.NewUpDownCounter("gerbil_session_active",
|
||||||
|
"Number of active WireGuard sessions", "ifname")
|
||||||
|
sessionRebuiltTotal = b.NewCounter("gerbil_session_rebuilt_total",
|
||||||
|
"Count of sessions rebuilt from communication patterns", "ifname")
|
||||||
|
commPatternActive = b.NewUpDownCounter("gerbil_comm_pattern_active",
|
||||||
|
"Number of active communication patterns", "ifname")
|
||||||
|
proxyCleanupRemovedTotal = b.NewCounter("gerbil_proxy_cleanup_removed_total",
|
||||||
|
"Count of items removed during cleanup routines", "ifname", "component")
|
||||||
|
proxyConnectionErrorsTotal = b.NewCounter("gerbil_proxy_connection_errors_total",
|
||||||
|
"Count of connection errors in proxy operations", "ifname", "error_type")
|
||||||
|
proxyInitialMappingsTotal = b.NewInt64Gauge("gerbil_proxy_initial_mappings",
|
||||||
|
"Number of initial proxy mappings loaded", "ifname")
|
||||||
|
proxyMappingUpdatesTotal = b.NewCounter("gerbil_proxy_mapping_updates_total",
|
||||||
|
"Count of proxy mapping updates", "ifname")
|
||||||
|
proxyIdleCleanupDuration = b.NewHistogram("gerbil_proxy_idle_cleanup_duration_seconds",
|
||||||
|
"Duration of cleanup cycles", durationBuckets, "ifname", "component")
|
||||||
|
sniConnectionsTotal = b.NewCounter("gerbil_sni_connections_total",
|
||||||
|
"Count of connections processed by SNI proxy", "result")
|
||||||
|
sniConnectionDuration = b.NewHistogram("gerbil_sni_connection_duration_seconds",
|
||||||
|
"Lifetime distribution of proxied TLS connections", sniDurationBuckets)
|
||||||
|
sniActiveConnections = b.NewUpDownCounter("gerbil_sni_active_connections",
|
||||||
|
"Number of active SNI tunnels")
|
||||||
|
sniRouteCacheHitsTotal = b.NewCounter("gerbil_sni_route_cache_hits_total",
|
||||||
|
"Count of route cache hits and misses", "result")
|
||||||
|
sniRouteAPIRequestsTotal = b.NewCounter("gerbil_sni_route_api_requests_total",
|
||||||
|
"Count of route API requests", "result")
|
||||||
|
sniRouteAPILatency = b.NewHistogram("gerbil_sni_route_api_latency_seconds",
|
||||||
|
"Distribution of route API call latencies", durationBuckets)
|
||||||
|
sniLocalOverrideTotal = b.NewCounter("gerbil_sni_local_override_total",
|
||||||
|
"Count of routes using local overrides", "hit")
|
||||||
|
sniTrustedProxyEventsTotal = b.NewCounter("gerbil_sni_trusted_proxy_events_total",
|
||||||
|
"Count of PROXY protocol events", "event")
|
||||||
|
sniProxyProtocolParseErrorsTotal = b.NewCounter("gerbil_sni_proxy_protocol_parse_errors_total",
|
||||||
|
"Count of PROXY protocol parse failures")
|
||||||
|
sniDataBytesTotal = b.NewCounter("gerbil_sni_data_bytes_total",
|
||||||
|
"Count of bytes proxied through SNI tunnels", "direction")
|
||||||
|
sniTunnelTerminationsTotal = b.NewCounter("gerbil_sni_tunnel_terminations_total",
|
||||||
|
"Count of tunnel terminations by reason", "reason")
|
||||||
|
httpRequestsTotal = b.NewCounter("gerbil_http_requests_total",
|
||||||
|
"Count of HTTP requests to management API", "endpoint", "method", "status_code")
|
||||||
|
httpRequestDuration = b.NewHistogram("gerbil_http_request_duration_seconds",
|
||||||
|
"Distribution of HTTP request handling time", durationBuckets, "endpoint", "method")
|
||||||
|
peerOperationsTotal = b.NewCounter("gerbil_peer_operations_total",
|
||||||
|
"Count of peer lifecycle operations", "operation", "result")
|
||||||
|
proxyMappingUpdateRequestsTotal = b.NewCounter("gerbil_proxy_mapping_update_requests_total",
|
||||||
|
"Count of proxy mapping update API calls", "result")
|
||||||
|
destinationsUpdateRequestsTotal = b.NewCounter("gerbil_destinations_update_requests_total",
|
||||||
|
"Count of destinations update API calls", "result")
|
||||||
|
remoteConfigFetchesTotal = b.NewCounter("gerbil_remote_config_fetches_total",
|
||||||
|
"Count of remote configuration fetch attempts", "result")
|
||||||
|
bandwidthReportsTotal = b.NewCounter("gerbil_bandwidth_reports_total",
|
||||||
|
"Count of bandwidth report transmissions", "result")
|
||||||
|
peerBandwidthBytesTotal = b.NewCounter("gerbil_peer_bandwidth_bytes_total",
|
||||||
|
"Bytes per peer tracked by bandwidth calculation", "peer", "direction")
|
||||||
|
memorySpikeTotal = b.NewCounter("gerbil_memory_spike_total",
|
||||||
|
"Count of memory spikes detected", "severity")
|
||||||
|
heapProfilesWrittenTotal = b.NewCounter("gerbil_heap_profiles_written_total",
|
||||||
|
"Count of heap profile files generated")
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordInterfaceUp(ifname, instance string, up bool) {
|
||||||
|
value := int64(0)
|
||||||
|
if up {
|
||||||
|
value = 1
|
||||||
|
}
|
||||||
|
wgInterfaceUp.Record(context.Background(), value, observability.Labels{"ifname": ifname, "instance": instance})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordPeersTotal(ifname string, delta int64) {
|
||||||
|
wgPeersTotal.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordPeerConnected(ifname, peer string, connected bool) {
|
||||||
|
value := int64(0)
|
||||||
|
if connected {
|
||||||
|
value = 1
|
||||||
|
}
|
||||||
|
wgPeerConnected.Record(context.Background(), value, observability.Labels{"ifname": ifname, "peer": peer})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordHandshake(ifname, peer, result string) {
|
||||||
|
wgHandshakesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordHandshakeLatency(ifname, peer string, seconds float64) {
|
||||||
|
wgHandshakeLatency.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordPeerRTT(ifname, peer string, seconds float64) {
|
||||||
|
wgPeerRTT.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "peer": peer})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordBytesReceived(ifname, peer string, bytes int64) {
|
||||||
|
wgBytesReceived.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordBytesTransmitted(ifname, peer string, bytes int64) {
|
||||||
|
wgBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"ifname": ifname, "peer": peer})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordAllowedIPsCount(ifname, peer string, delta int64) {
|
||||||
|
allowedIPsCount.Add(context.Background(), delta, observability.Labels{"ifname": ifname, "peer": peer})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordKeyRotation(ifname, reason string) {
|
||||||
|
keyRotationTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "reason": reason})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordNetlinkEvent(eventType string) {
|
||||||
|
netlinkEventsTotal.Add(context.Background(), 1, observability.Labels{"event_type": eventType})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordNetlinkError(component, errorType string) {
|
||||||
|
netlinkErrorsTotal.Add(context.Background(), 1, observability.Labels{"component": component, "error_type": errorType})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSyncDuration(component string, seconds float64) {
|
||||||
|
syncDuration.Record(context.Background(), seconds, observability.Labels{"component": component})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordWorkqueueDepth(queue string, delta int64) {
|
||||||
|
workqueueDepth.Add(context.Background(), delta, observability.Labels{"queue": queue})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordKernelModuleLoad(result string) {
|
||||||
|
kernelModuleLoads.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordFirewallRuleApplied(result, chain string) {
|
||||||
|
firewallRulesApplied.Add(context.Background(), 1, observability.Labels{"result": result, "chain": chain})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordActiveSession(ifname string, delta int64) {
|
||||||
|
activeSessions.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordActiveProxyConnection(hostname string, delta int64) {
|
||||||
|
_ = hostname
|
||||||
|
activeProxyConnections.Add(context.Background(), delta, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyRouteLookup(result, hostname string) {
|
||||||
|
_ = hostname
|
||||||
|
proxyRouteLookups.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyTLSHandshake(hostname string, seconds float64) {
|
||||||
|
_ = hostname
|
||||||
|
proxyTLSHandshake.Record(context.Background(), seconds, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyBytesTransmitted(hostname, direction string, bytes int64) {
|
||||||
|
_ = hostname
|
||||||
|
proxyBytesTransmitted.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordConfigReload(result string) {
|
||||||
|
configReloadsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordRestart() {
|
||||||
|
restartTotal.Add(context.Background(), 1, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordAuthFailure(peer, reason string) {
|
||||||
|
authFailuresTotal.Add(context.Background(), 1, observability.Labels{"peer": peer, "reason": reason})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordACLDenied(ifname, peer, policy string) {
|
||||||
|
aclDeniedTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "peer": peer, "policy": policy})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordCertificateExpiry(certName, ifname string, days float64) {
|
||||||
|
certificateExpiryDays.Record(context.Background(), days, observability.Labels{"cert_name": certName, "ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordUDPPacket(ifname, packetType, direction string) {
|
||||||
|
udpPacketsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "type": packetType, "direction": direction})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordUDPPacketSize(ifname, packetType string, bytes float64) {
|
||||||
|
udpPacketSizeBytes.Record(context.Background(), bytes, observability.Labels{"ifname": ifname, "type": packetType})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordHolePunchEvent(ifname, result string) {
|
||||||
|
holePunchEventsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyMapping(ifname string, delta int64) {
|
||||||
|
proxyMappingActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSession(ifname string, delta int64) {
|
||||||
|
sessionActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSessionRebuilt(ifname string) {
|
||||||
|
sessionRebuiltTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordCommPattern(ifname string, delta int64) {
|
||||||
|
commPatternActive.Add(context.Background(), delta, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyCleanupRemoved(ifname, component string, count int64) {
|
||||||
|
proxyCleanupRemovedTotal.Add(context.Background(), count, observability.Labels{"ifname": ifname, "component": component})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyConnectionError(ifname, errorType string) {
|
||||||
|
proxyConnectionErrorsTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname, "error_type": errorType})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyInitialMappings(ifname string, count int64) {
|
||||||
|
proxyInitialMappingsTotal.Record(context.Background(), count, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyMappingUpdate(ifname string) {
|
||||||
|
proxyMappingUpdatesTotal.Add(context.Background(), 1, observability.Labels{"ifname": ifname})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyIdleCleanupDuration(ifname, component string, seconds float64) {
|
||||||
|
proxyIdleCleanupDuration.Record(context.Background(), seconds, observability.Labels{"ifname": ifname, "component": component})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIConnection(result string) {
|
||||||
|
sniConnectionsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIConnectionDuration(seconds float64) {
|
||||||
|
sniConnectionDuration.Record(context.Background(), seconds, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIActiveConnection(delta int64) {
|
||||||
|
sniActiveConnections.Add(context.Background(), delta, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIRouteCacheHit(result string) {
|
||||||
|
sniRouteCacheHitsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIRouteAPIRequest(result string) {
|
||||||
|
sniRouteAPIRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIRouteAPILatency(seconds float64) {
|
||||||
|
sniRouteAPILatency.Record(context.Background(), seconds, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNILocalOverride(hit string) {
|
||||||
|
sniLocalOverrideTotal.Add(context.Background(), 1, observability.Labels{"hit": hit})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNITrustedProxyEvent(event string) {
|
||||||
|
sniTrustedProxyEventsTotal.Add(context.Background(), 1, observability.Labels{"event": event})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIProxyProtocolParseError() {
|
||||||
|
sniProxyProtocolParseErrorsTotal.Add(context.Background(), 1, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNIDataBytes(direction string, bytes int64) {
|
||||||
|
sniDataBytesTotal.Add(context.Background(), bytes, observability.Labels{"direction": direction})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordSNITunnelTermination(reason string) {
|
||||||
|
sniTunnelTerminationsTotal.Add(context.Background(), 1, observability.Labels{"reason": reason})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordHTTPRequest(endpoint, method, statusCode string) {
|
||||||
|
httpRequestsTotal.Add(context.Background(), 1, observability.Labels{"endpoint": endpoint, "method": method, "status_code": statusCode})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordHTTPRequestDuration(endpoint, method string, seconds float64) {
|
||||||
|
httpRequestDuration.Record(context.Background(), seconds, observability.Labels{"endpoint": endpoint, "method": method})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordPeerOperation(operation, result string) {
|
||||||
|
peerOperationsTotal.Add(context.Background(), 1, observability.Labels{"operation": operation, "result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordProxyMappingUpdateRequest(result string) {
|
||||||
|
proxyMappingUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordDestinationsUpdateRequest(result string) {
|
||||||
|
destinationsUpdateRequestsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordRemoteConfigFetch(result string) {
|
||||||
|
remoteConfigFetchesTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordBandwidthReport(result string) {
|
||||||
|
bandwidthReportsTotal.Add(context.Background(), 1, observability.Labels{"result": result})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordPeerBandwidthBytes(peer, direction string, bytes int64) {
|
||||||
|
peerBandwidthBytesTotal.Add(context.Background(), bytes, observability.Labels{"peer": peer, "direction": direction})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordMemorySpike(severity string) {
|
||||||
|
memorySpikeTotal.Add(context.Background(), 1, observability.Labels{"severity": severity})
|
||||||
|
}
|
||||||
|
|
||||||
|
func RecordHeapProfileWritten() {
|
||||||
|
heapProfilesWrittenTotal.Add(context.Background(), 1, nil)
|
||||||
|
}
|
||||||
258
internal/metrics/metrics_test.go
Normal file
258
internal/metrics/metrics_test.go
Normal file
@@ -0,0 +1,258 @@
|
|||||||
|
package metrics_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/fosrl/gerbil/internal/metrics"
|
||||||
|
"github.com/fosrl/gerbil/internal/observability"
|
||||||
|
)
|
||||||
|
|
||||||
|
const exampleHostname = "example.com"
|
||||||
|
|
||||||
|
func initPrometheus(t *testing.T) http.Handler {
|
||||||
|
t.Helper()
|
||||||
|
cfg := metrics.DefaultConfig()
|
||||||
|
cfg.Enabled = true
|
||||||
|
cfg.Backend = "prometheus"
|
||||||
|
cfg.Prometheus.Path = "/metrics"
|
||||||
|
|
||||||
|
h, err := metrics.Initialize(cfg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Initialize failed: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
})
|
||||||
|
return h
|
||||||
|
}
|
||||||
|
|
||||||
|
func initNoop(t *testing.T) {
|
||||||
|
t.Helper()
|
||||||
|
cfg := metrics.DefaultConfig()
|
||||||
|
cfg.Enabled = false
|
||||||
|
_, err := metrics.Initialize(cfg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Initialize noop failed: %v", err)
|
||||||
|
}
|
||||||
|
t.Cleanup(func() {
|
||||||
|
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func scrape(t *testing.T, h http.Handler) string {
|
||||||
|
t.Helper()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
h.ServeHTTP(rr, req)
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("scrape returned %d", rr.Code)
|
||||||
|
}
|
||||||
|
b, _ := io.ReadAll(rr.Body)
|
||||||
|
return string(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertContains(t *testing.T, body, substr string) {
|
||||||
|
t.Helper()
|
||||||
|
if !strings.Contains(body, substr) {
|
||||||
|
t.Errorf("expected %q in output\nbody:\n%s", substr, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- Tests ---
|
||||||
|
|
||||||
|
func TestInitializePrometheus(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
if h == nil {
|
||||||
|
t.Error("expected non-nil HTTP handler for prometheus backend")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInitializeNoop(t *testing.T) {
|
||||||
|
initNoop(t)
|
||||||
|
// All Record* functions must not panic when noop backend is active.
|
||||||
|
metrics.RecordRestart()
|
||||||
|
metrics.RecordHTTPRequest("/test", "GET", "200")
|
||||||
|
metrics.RecordSNIConnection("accepted")
|
||||||
|
metrics.RecordPeersTotal("wg0", 1)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestDefaultConfig(t *testing.T) {
|
||||||
|
cfg := metrics.DefaultConfig()
|
||||||
|
if cfg.Backend != "prometheus" {
|
||||||
|
t.Errorf("expected prometheus default backend, got %q", cfg.Backend)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestShutdownNoInit(t *testing.T) {
|
||||||
|
// Shutdown without Initialize should not panic or error.
|
||||||
|
if err := metrics.Shutdown(context.Background()); err != nil {
|
||||||
|
t.Errorf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordHTTPRequest(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordHTTPRequest("/peers", "POST", "201")
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_http_requests_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordHTTPRequestDuration(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordHTTPRequestDuration("/peers", "POST", 0.05)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_http_request_duration_seconds")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordInterfaceUp(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordInterfaceUp("wg0", "host1", true)
|
||||||
|
metrics.RecordInterfaceUp("wg0", "host1", false)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_wg_interface_up")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordPeersTotal(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordPeersTotal("wg0", 3)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_wg_peers_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordBytesReceivedTransmitted(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordBytesReceived("wg0", "peer1", 1024)
|
||||||
|
metrics.RecordBytesTransmitted("wg0", "peer1", 512)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_wg_bytes_received_total")
|
||||||
|
assertContains(t, body, "gerbil_wg_bytes_transmitted_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordSNI(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordSNIConnection("accepted")
|
||||||
|
metrics.RecordSNIActiveConnection(1)
|
||||||
|
metrics.RecordSNIConnectionDuration(1.5)
|
||||||
|
metrics.RecordSNIRouteCacheHit("hit")
|
||||||
|
metrics.RecordSNIRouteAPIRequest("success")
|
||||||
|
metrics.RecordSNIRouteAPILatency(0.01)
|
||||||
|
metrics.RecordSNILocalOverride("yes")
|
||||||
|
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
|
||||||
|
metrics.RecordSNIProxyProtocolParseError()
|
||||||
|
metrics.RecordSNIDataBytes("client_to_target", 2048)
|
||||||
|
metrics.RecordSNITunnelTermination("eof")
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_sni_connections_total")
|
||||||
|
assertContains(t, body, "gerbil_sni_active_connections")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordRelay(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordUDPPacket("relay", "data", "in")
|
||||||
|
metrics.RecordUDPPacketSize("relay", "data", 256)
|
||||||
|
metrics.RecordHolePunchEvent("relay", "success")
|
||||||
|
metrics.RecordProxyMapping("relay", 1)
|
||||||
|
metrics.RecordSession("relay", 1)
|
||||||
|
metrics.RecordSessionRebuilt("relay")
|
||||||
|
metrics.RecordCommPattern("relay", 1)
|
||||||
|
metrics.RecordProxyCleanupRemoved("relay", "session", 2)
|
||||||
|
metrics.RecordProxyConnectionError("relay", "dial_udp")
|
||||||
|
metrics.RecordProxyInitialMappings("relay", 5)
|
||||||
|
metrics.RecordProxyMappingUpdate("relay")
|
||||||
|
metrics.RecordProxyIdleCleanupDuration("relay", "conn", 0.1)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_udp_packets_total")
|
||||||
|
assertContains(t, body, "gerbil_proxy_mapping_active")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordWireGuard(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordHandshake("wg0", "peer1", "success")
|
||||||
|
metrics.RecordHandshakeLatency("wg0", "peer1", 0.02)
|
||||||
|
metrics.RecordPeerRTT("wg0", "peer1", 0.005)
|
||||||
|
metrics.RecordPeerConnected("wg0", "peer1", true)
|
||||||
|
metrics.RecordAllowedIPsCount("wg0", "peer1", 2)
|
||||||
|
metrics.RecordKeyRotation("wg0", "scheduled")
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_wg_handshakes_total")
|
||||||
|
assertContains(t, body, "gerbil_wg_peer_connected")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordHousekeeping(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordRemoteConfigFetch("success")
|
||||||
|
metrics.RecordBandwidthReport("success")
|
||||||
|
metrics.RecordPeerBandwidthBytes("peer1", "rx", 512)
|
||||||
|
metrics.RecordMemorySpike("warning")
|
||||||
|
metrics.RecordHeapProfileWritten()
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_remote_config_fetches_total")
|
||||||
|
assertContains(t, body, "gerbil_memory_spike_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordOperational(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordConfigReload("success")
|
||||||
|
metrics.RecordRestart()
|
||||||
|
metrics.RecordAuthFailure("peer1", "bad_key")
|
||||||
|
metrics.RecordACLDenied("wg0", "peer1", "default-deny")
|
||||||
|
metrics.RecordCertificateExpiry(exampleHostname, "wg0", 90.0)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_config_reloads_total")
|
||||||
|
assertContains(t, body, "gerbil_restart_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordNetlink(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordNetlinkEvent("link_up")
|
||||||
|
metrics.RecordNetlinkError("wg", "timeout")
|
||||||
|
metrics.RecordSyncDuration("config", 0.1)
|
||||||
|
metrics.RecordWorkqueueDepth("main", 3)
|
||||||
|
metrics.RecordKernelModuleLoad("success")
|
||||||
|
metrics.RecordFirewallRuleApplied("success", "INPUT")
|
||||||
|
metrics.RecordActiveSession("wg0", 1)
|
||||||
|
metrics.RecordActiveProxyConnection(exampleHostname, 1)
|
||||||
|
metrics.RecordProxyRouteLookup("hit", exampleHostname)
|
||||||
|
metrics.RecordProxyTLSHandshake(exampleHostname, 0.05)
|
||||||
|
metrics.RecordProxyBytesTransmitted(exampleHostname, "tx", 1024)
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_netlink_events_total")
|
||||||
|
assertContains(t, body, "gerbil_active_sessions")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestRecordPeerOperation(t *testing.T) {
|
||||||
|
h := initPrometheus(t)
|
||||||
|
metrics.RecordPeerOperation("add", "success")
|
||||||
|
metrics.RecordProxyMappingUpdateRequest("success")
|
||||||
|
metrics.RecordDestinationsUpdateRequest("success")
|
||||||
|
body := scrape(t, h)
|
||||||
|
assertContains(t, body, "gerbil_peer_operations_total")
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInitializeInvalidBackend(t *testing.T) {
|
||||||
|
cfg := observability.MetricsConfig{Enabled: true, Backend: "invalid"}
|
||||||
|
_, err := metrics.Initialize(cfg)
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for invalid backend")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestInitializeBackendNone(t *testing.T) {
|
||||||
|
cfg := metrics.DefaultConfig()
|
||||||
|
cfg.Backend = "none"
|
||||||
|
h, err := metrics.Initialize(cfg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
if h != nil {
|
||||||
|
t.Error("none backend should return nil handler")
|
||||||
|
}
|
||||||
|
// All Record* calls should be noop
|
||||||
|
metrics.RecordRestart()
|
||||||
|
metrics.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
}
|
||||||
119
internal/observability/config.go
Normal file
119
internal/observability/config.go
Normal file
@@ -0,0 +1,119 @@
|
|||||||
|
// Package observability provides a backend-neutral metrics abstraction for Gerbil.
|
||||||
|
//
|
||||||
|
// Exactly one metrics backend may be enabled at runtime:
|
||||||
|
// - "prometheus" – native Prometheus client; exposes /metrics (no OTel SDK required)
|
||||||
|
// - "otel" – OpenTelemetry metrics pushed via OTLP (gRPC or HTTP)
|
||||||
|
// - "none" – metrics disabled; a safe noop implementation is used
|
||||||
|
//
|
||||||
|
// Future OTel tracing and logging can be added to this package alongside the
|
||||||
|
// existing otel sub-package without touching the Prometheus-native path.
|
||||||
|
package observability
|
||||||
|
|
||||||
|
import (
|
||||||
|
"fmt"
|
||||||
|
"time"
|
||||||
|
)
|
||||||
|
|
||||||
|
// MetricsConfig is the top-level metrics configuration.
|
||||||
|
type MetricsConfig struct {
|
||||||
|
// Enabled controls whether any metrics backend is started.
|
||||||
|
// When false the noop backend is used regardless of Backend.
|
||||||
|
Enabled bool
|
||||||
|
|
||||||
|
// Backend selects the active backend: "prometheus", "otel", or "none".
|
||||||
|
Backend string
|
||||||
|
|
||||||
|
// Prometheus holds settings used only by the Prometheus-native backend.
|
||||||
|
Prometheus PrometheusConfig
|
||||||
|
|
||||||
|
// OTel holds settings used only by the OTel backend.
|
||||||
|
OTel OTelConfig
|
||||||
|
|
||||||
|
// ServiceName is propagated to OTel resource attributes.
|
||||||
|
ServiceName string
|
||||||
|
|
||||||
|
// ServiceVersion is propagated to OTel resource attributes.
|
||||||
|
ServiceVersion string
|
||||||
|
|
||||||
|
// DeploymentEnvironment is an optional OTel resource attribute.
|
||||||
|
DeploymentEnvironment string
|
||||||
|
}
|
||||||
|
|
||||||
|
// PrometheusConfig holds Prometheus-native backend settings.
|
||||||
|
type PrometheusConfig struct {
|
||||||
|
// Path is the HTTP path to expose the /metrics endpoint.
|
||||||
|
// Defaults to "/metrics".
|
||||||
|
Path string
|
||||||
|
}
|
||||||
|
|
||||||
|
// OTelConfig holds OpenTelemetry backend settings.
|
||||||
|
type OTelConfig struct {
|
||||||
|
// Protocol is the OTLP transport: "grpc" (default) or "http".
|
||||||
|
Protocol string
|
||||||
|
|
||||||
|
// Endpoint is the OTLP collector address (e.g. "localhost:4317").
|
||||||
|
Endpoint string
|
||||||
|
|
||||||
|
// Insecure disables TLS for the OTLP connection.
|
||||||
|
Insecure bool
|
||||||
|
|
||||||
|
// ExportInterval is how often metrics are pushed to the collector.
|
||||||
|
// Defaults to 60 s.
|
||||||
|
ExportInterval time.Duration
|
||||||
|
}
|
||||||
|
|
||||||
|
// DefaultMetricsConfig returns a MetricsConfig with sensible defaults.
|
||||||
|
func DefaultMetricsConfig() MetricsConfig {
|
||||||
|
return MetricsConfig{
|
||||||
|
Enabled: true,
|
||||||
|
Backend: "prometheus",
|
||||||
|
Prometheus: PrometheusConfig{
|
||||||
|
Path: "/metrics",
|
||||||
|
},
|
||||||
|
OTel: OTelConfig{
|
||||||
|
Protocol: "grpc",
|
||||||
|
Endpoint: "localhost:4317",
|
||||||
|
Insecure: true,
|
||||||
|
ExportInterval: 60 * time.Second,
|
||||||
|
},
|
||||||
|
ServiceName: "gerbil",
|
||||||
|
ServiceVersion: "1.0.0",
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Validate checks the configuration for logical errors.
|
||||||
|
func (c *MetricsConfig) Validate() error {
|
||||||
|
if !c.Enabled {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
switch c.Backend {
|
||||||
|
case "prometheus", "none", "":
|
||||||
|
// valid
|
||||||
|
case "otel":
|
||||||
|
if c.OTel.Endpoint == "" {
|
||||||
|
return fmt.Errorf("metrics: backend=otel requires a non-empty OTel endpoint")
|
||||||
|
}
|
||||||
|
if c.OTel.Protocol != "grpc" && c.OTel.Protocol != "http" {
|
||||||
|
return fmt.Errorf("metrics: otel protocol must be \"grpc\" or \"http\", got %q", c.OTel.Protocol)
|
||||||
|
}
|
||||||
|
if c.OTel.ExportInterval <= 0 {
|
||||||
|
return fmt.Errorf("metrics: otel export interval must be positive")
|
||||||
|
}
|
||||||
|
default:
|
||||||
|
return fmt.Errorf("metrics: unknown backend %q (must be \"prometheus\", \"otel\", or \"none\")", c.Backend)
|
||||||
|
}
|
||||||
|
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// effectiveBackend resolves the backend string, treating "" and "none" as noop.
|
||||||
|
func (c *MetricsConfig) effectiveBackend() string {
|
||||||
|
if !c.Enabled {
|
||||||
|
return "none"
|
||||||
|
}
|
||||||
|
if c.Backend == "" {
|
||||||
|
return "none"
|
||||||
|
}
|
||||||
|
return c.Backend
|
||||||
|
}
|
||||||
152
internal/observability/metrics.go
Normal file
152
internal/observability/metrics.go
Normal file
@@ -0,0 +1,152 @@
|
|||||||
|
package observability
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
|
||||||
|
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Labels is a set of key-value pairs attached to a metric observation.
|
||||||
|
// Use only stable, bounded-cardinality label values.
|
||||||
|
type Labels = map[string]string
|
||||||
|
|
||||||
|
// Counter is a monotonically increasing instrument.
|
||||||
|
type Counter interface {
|
||||||
|
Add(ctx context.Context, value int64, labels Labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpDownCounter is a bidirectional integer instrument (can go up or down).
|
||||||
|
type UpDownCounter interface {
|
||||||
|
Add(ctx context.Context, value int64, labels Labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Int64Gauge records a snapshot integer value.
|
||||||
|
type Int64Gauge interface {
|
||||||
|
Record(ctx context.Context, value int64, labels Labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Float64Gauge records a snapshot float value.
|
||||||
|
type Float64Gauge interface {
|
||||||
|
Record(ctx context.Context, value float64, labels Labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Histogram records a distribution of values.
|
||||||
|
type Histogram interface {
|
||||||
|
Record(ctx context.Context, value float64, labels Labels)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend is the single interface that each metrics implementation must satisfy.
|
||||||
|
// Application code must not import backend-specific packages (prometheus, otel).
|
||||||
|
type Backend interface {
|
||||||
|
// NewCounter creates a counter metric.
|
||||||
|
// labelNames declares the set of label keys that will be passed at observation time.
|
||||||
|
NewCounter(name, desc string, labelNames ...string) Counter
|
||||||
|
|
||||||
|
// NewUpDownCounter creates an up-down counter metric.
|
||||||
|
NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter
|
||||||
|
|
||||||
|
// NewInt64Gauge creates an integer gauge metric.
|
||||||
|
NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge
|
||||||
|
|
||||||
|
// NewFloat64Gauge creates a float gauge metric.
|
||||||
|
NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge
|
||||||
|
|
||||||
|
// NewHistogram creates a histogram metric.
|
||||||
|
// buckets are the explicit upper-bound bucket boundaries.
|
||||||
|
NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram
|
||||||
|
|
||||||
|
// HTTPHandler returns the /metrics HTTP handler.
|
||||||
|
// Implementations that do not expose an HTTP endpoint return nil.
|
||||||
|
HTTPHandler() http.Handler
|
||||||
|
|
||||||
|
// Shutdown performs a graceful flush / shutdown of the backend.
|
||||||
|
Shutdown(ctx context.Context) error
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates the backend selected by cfg and returns it.
|
||||||
|
// Exactly one backend is created; the selection is mutually exclusive.
|
||||||
|
func New(cfg MetricsConfig) (Backend, error) {
|
||||||
|
if err := cfg.Validate(); err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
switch cfg.effectiveBackend() {
|
||||||
|
case "prometheus":
|
||||||
|
b, err := obsprom.New(obsprom.Config{
|
||||||
|
Path: cfg.Prometheus.Path,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &promAdapter{b: b}, nil
|
||||||
|
case "otel":
|
||||||
|
b, err := obsotel.New(obsotel.Config{
|
||||||
|
Protocol: cfg.OTel.Protocol,
|
||||||
|
Endpoint: cfg.OTel.Endpoint,
|
||||||
|
Insecure: cfg.OTel.Insecure,
|
||||||
|
ExportInterval: cfg.OTel.ExportInterval,
|
||||||
|
ServiceName: cfg.ServiceName,
|
||||||
|
ServiceVersion: cfg.ServiceVersion,
|
||||||
|
DeploymentEnvironment: cfg.DeploymentEnvironment,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
return &otelAdapter{b: b}, nil
|
||||||
|
case "none":
|
||||||
|
return &NoopBackend{}, nil
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("observability: unknown backend %q", cfg.effectiveBackend())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// promAdapter wraps obsprom.Backend to implement the observability.Backend interface.
|
||||||
|
// The concrete instrument types from the prometheus sub-package satisfy the instrument
|
||||||
|
// interfaces via Go's structural (duck) typing without importing this package.
|
||||||
|
type promAdapter struct {
|
||||||
|
b *obsprom.Backend
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *promAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||||
|
return a.b.NewCounter(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *promAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||||
|
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *promAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||||
|
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *promAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||||
|
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *promAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||||
|
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *promAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||||
|
func (a *promAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
|
||||||
|
|
||||||
|
// otelAdapter wraps obsotel.Backend to implement the observability.Backend interface.
|
||||||
|
type otelAdapter struct {
|
||||||
|
b *obsotel.Backend
|
||||||
|
}
|
||||||
|
|
||||||
|
func (a *otelAdapter) NewCounter(name, desc string, labelNames ...string) Counter {
|
||||||
|
return a.b.NewCounter(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *otelAdapter) NewUpDownCounter(name, desc string, labelNames ...string) UpDownCounter {
|
||||||
|
return a.b.NewUpDownCounter(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *otelAdapter) NewInt64Gauge(name, desc string, labelNames ...string) Int64Gauge {
|
||||||
|
return a.b.NewInt64Gauge(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *otelAdapter) NewFloat64Gauge(name, desc string, labelNames ...string) Float64Gauge {
|
||||||
|
return a.b.NewFloat64Gauge(name, desc, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *otelAdapter) NewHistogram(name, desc string, buckets []float64, labelNames ...string) Histogram {
|
||||||
|
return a.b.NewHistogram(name, desc, buckets, labelNames...)
|
||||||
|
}
|
||||||
|
func (a *otelAdapter) HTTPHandler() http.Handler { return a.b.HTTPHandler() }
|
||||||
|
func (a *otelAdapter) Shutdown(ctx context.Context) error { return a.b.Shutdown(ctx) }
|
||||||
198
internal/observability/metrics_test.go
Normal file
198
internal/observability/metrics_test.go
Normal file
@@ -0,0 +1,198 @@
|
|||||||
|
package observability_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"github.com/fosrl/gerbil/internal/observability"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultMetricsPath = "/metrics"
|
||||||
|
otelGRPCEndpoint = "localhost:4317"
|
||||||
|
errUnexpectedFmt = "unexpected error: %v"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestDefaultMetricsConfig(t *testing.T) {
|
||||||
|
cfg := observability.DefaultMetricsConfig()
|
||||||
|
if !cfg.Enabled {
|
||||||
|
t.Error("default config should have Enabled=true")
|
||||||
|
}
|
||||||
|
if cfg.Backend != "prometheus" {
|
||||||
|
t.Errorf("default backend should be prometheus, got %q", cfg.Backend)
|
||||||
|
}
|
||||||
|
if cfg.Prometheus.Path != defaultMetricsPath {
|
||||||
|
t.Errorf("default prometheus path should be %s, got %q", defaultMetricsPath, cfg.Prometheus.Path)
|
||||||
|
}
|
||||||
|
if cfg.OTel.Protocol != "grpc" {
|
||||||
|
t.Errorf("default otel protocol should be grpc, got %q", cfg.OTel.Protocol)
|
||||||
|
}
|
||||||
|
if cfg.OTel.ExportInterval != 60*time.Second {
|
||||||
|
t.Errorf("default otel export interval should be 60s, got %v", cfg.OTel.ExportInterval)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
func TestValidateValidConfigs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
cfg observability.MetricsConfig
|
||||||
|
}{
|
||||||
|
{name: "disabled", cfg: observability.MetricsConfig{Enabled: false}},
|
||||||
|
{name: "backend none", cfg: observability.MetricsConfig{Enabled: true, Backend: "none"}},
|
||||||
|
{name: "backend empty", cfg: observability.MetricsConfig{Enabled: true, Backend: ""}},
|
||||||
|
{name: "prometheus", cfg: observability.MetricsConfig{Enabled: true, Backend: "prometheus"}},
|
||||||
|
{
|
||||||
|
name: "otel grpc",
|
||||||
|
cfg: observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "otel",
|
||||||
|
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "otel http",
|
||||||
|
cfg: observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "otel",
|
||||||
|
OTel: observability.OTelConfig{Protocol: "http", Endpoint: "localhost:4318", ExportInterval: 30 * time.Second},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if err := tt.cfg.Validate(); err != nil {
|
||||||
|
t.Errorf("unexpected validation error: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestValidateInvalidConfigs(t *testing.T) {
|
||||||
|
tests := []struct {
|
||||||
|
name string
|
||||||
|
cfg observability.MetricsConfig
|
||||||
|
}{
|
||||||
|
{name: "unknown backend", cfg: observability.MetricsConfig{Enabled: true, Backend: "datadog"}},
|
||||||
|
{
|
||||||
|
name: "otel missing endpoint",
|
||||||
|
cfg: observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "otel",
|
||||||
|
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: "", ExportInterval: 10 * time.Second},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "otel invalid protocol",
|
||||||
|
cfg: observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "otel",
|
||||||
|
OTel: observability.OTelConfig{Protocol: "tcp", Endpoint: otelGRPCEndpoint, ExportInterval: 10 * time.Second},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
{
|
||||||
|
name: "otel zero interval",
|
||||||
|
cfg: observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "otel",
|
||||||
|
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, ExportInterval: 0},
|
||||||
|
},
|
||||||
|
},
|
||||||
|
}
|
||||||
|
for _, tt := range tests {
|
||||||
|
t.Run(tt.name, func(t *testing.T) {
|
||||||
|
if err := tt.cfg.Validate(); err == nil {
|
||||||
|
t.Error("expected validation error but got nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewNoopBackend(t *testing.T) {
|
||||||
|
b, err := observability.New(observability.MetricsConfig{Enabled: false})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(errUnexpectedFmt, err)
|
||||||
|
}
|
||||||
|
if b.HTTPHandler() != nil {
|
||||||
|
t.Error("noop backend HTTPHandler should return nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewNoneBackend(t *testing.T) {
|
||||||
|
b, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "none"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(errUnexpectedFmt, err)
|
||||||
|
}
|
||||||
|
if b.HTTPHandler() != nil {
|
||||||
|
t.Error("none backend HTTPHandler should return nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewPrometheusBackend(t *testing.T) {
|
||||||
|
cfg := observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "prometheus",
|
||||||
|
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
|
||||||
|
}
|
||||||
|
b, err := observability.New(cfg)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf(errUnexpectedFmt, err)
|
||||||
|
}
|
||||||
|
if b.HTTPHandler() == nil {
|
||||||
|
t.Error("prometheus backend HTTPHandler should not be nil")
|
||||||
|
}
|
||||||
|
if err := b.Shutdown(context.Background()); err != nil {
|
||||||
|
t.Errorf("prometheus shutdown error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNewInvalidBackend(t *testing.T) {
|
||||||
|
_, err := observability.New(observability.MetricsConfig{Enabled: true, Backend: "invalid"})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for invalid backend")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusAdapterAllInstruments(t *testing.T) {
|
||||||
|
b, err := observability.New(observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "prometheus",
|
||||||
|
Prometheus: observability.PrometheusConfig{Path: defaultMetricsPath},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create backend: %v", err)
|
||||||
|
}
|
||||||
|
ctx := context.Background()
|
||||||
|
labels := observability.Labels{"k": "v"}
|
||||||
|
|
||||||
|
b.NewCounter("prom_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||||
|
b.NewUpDownCounter("prom_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||||
|
b.NewInt64Gauge("prom_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||||
|
b.NewFloat64Gauge("prom_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||||
|
b.NewHistogram("prom_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||||
|
|
||||||
|
if b.HTTPHandler() == nil {
|
||||||
|
t.Error("prometheus adapter HTTPHandler should not be nil")
|
||||||
|
}
|
||||||
|
if err := b.Shutdown(ctx); err != nil {
|
||||||
|
t.Errorf("Shutdown error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelAdapterAllInstruments(t *testing.T) {
|
||||||
|
b, err := observability.New(observability.MetricsConfig{
|
||||||
|
Enabled: true, Backend: "otel",
|
||||||
|
OTel: observability.OTelConfig{Protocol: "grpc", Endpoint: otelGRPCEndpoint, Insecure: true, ExportInterval: 100 * time.Millisecond},
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create otel backend: %v", err)
|
||||||
|
}
|
||||||
|
ctx := context.Background()
|
||||||
|
labels := observability.Labels{"k": "v"}
|
||||||
|
|
||||||
|
b.NewCounter("otel_adapter_counter_total", "desc", "k").Add(ctx, 1, labels)
|
||||||
|
b.NewUpDownCounter("otel_adapter_updown", "desc", "k").Add(ctx, 2, labels)
|
||||||
|
b.NewInt64Gauge("otel_adapter_int_gauge", "desc", "k").Record(ctx, 99, labels)
|
||||||
|
b.NewFloat64Gauge("otel_adapter_float_gauge", "desc", "k").Record(ctx, 1.23, labels)
|
||||||
|
b.NewHistogram("otel_adapter_histogram", "desc", []float64{0.1, 1.0}, "k").Record(ctx, 0.5, labels)
|
||||||
|
|
||||||
|
if b.HTTPHandler() != nil {
|
||||||
|
t.Error("OTel adapter HTTPHandler should be nil")
|
||||||
|
}
|
||||||
|
|
||||||
|
shutdownCtx, cancel := context.WithTimeout(ctx, 2*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
b.Shutdown(shutdownCtx) //nolint:errcheck
|
||||||
|
}
|
||||||
71
internal/observability/noop.go
Normal file
71
internal/observability/noop.go
Normal file
@@ -0,0 +1,71 @@
|
|||||||
|
package observability
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
)
|
||||||
|
|
||||||
|
// NoopBackend is a Backend that discards all observations.
|
||||||
|
// It is used when metrics are disabled (Enabled=false or Backend="none").
|
||||||
|
// All methods are safe to call concurrently.
|
||||||
|
type NoopBackend struct{}
|
||||||
|
|
||||||
|
// Compile-time interface check.
|
||||||
|
var _ Backend = (*NoopBackend)(nil)
|
||||||
|
|
||||||
|
func (n *NoopBackend) NewCounter(_ string, _ string, _ ...string) Counter {
|
||||||
|
_ = n
|
||||||
|
return noopCounter{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *NoopBackend) NewUpDownCounter(_ string, _ string, _ ...string) UpDownCounter {
|
||||||
|
_ = n
|
||||||
|
return noopUpDownCounter{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *NoopBackend) NewInt64Gauge(_ string, _ string, _ ...string) Int64Gauge {
|
||||||
|
_ = n
|
||||||
|
return noopInt64Gauge{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *NoopBackend) NewFloat64Gauge(_ string, _ string, _ ...string) Float64Gauge {
|
||||||
|
_ = n
|
||||||
|
return noopFloat64Gauge{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *NoopBackend) NewHistogram(_ string, _ string, _ []float64, _ ...string) Histogram {
|
||||||
|
_ = n
|
||||||
|
return noopHistogram{}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *NoopBackend) HTTPHandler() http.Handler {
|
||||||
|
_ = n
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func (n *NoopBackend) Shutdown(_ context.Context) error {
|
||||||
|
_ = n
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- noop instrument types ---
|
||||||
|
|
||||||
|
type noopCounter struct{}
|
||||||
|
|
||||||
|
func (noopCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||||
|
|
||||||
|
type noopUpDownCounter struct{}
|
||||||
|
|
||||||
|
func (noopUpDownCounter) Add(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||||
|
|
||||||
|
type noopInt64Gauge struct{}
|
||||||
|
|
||||||
|
func (noopInt64Gauge) Record(_ context.Context, _ int64, _ Labels) { /* intentionally no-op */ }
|
||||||
|
|
||||||
|
type noopFloat64Gauge struct{}
|
||||||
|
|
||||||
|
func (noopFloat64Gauge) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
|
||||||
|
|
||||||
|
type noopHistogram struct{}
|
||||||
|
|
||||||
|
func (noopHistogram) Record(_ context.Context, _ float64, _ Labels) { /* intentionally no-op */ }
|
||||||
67
internal/observability/noop_test.go
Normal file
67
internal/observability/noop_test.go
Normal file
@@ -0,0 +1,67 @@
|
|||||||
|
package observability_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
"github.com/fosrl/gerbil/internal/observability"
|
||||||
|
)
|
||||||
|
|
||||||
|
func TestNoopBackendAllInstruments(t *testing.T) {
|
||||||
|
n := &observability.NoopBackend{}
|
||||||
|
|
||||||
|
ctx := context.Background()
|
||||||
|
labels := observability.Labels{"k": "v"}
|
||||||
|
|
||||||
|
t.Run("Counter", func(_ *testing.T) {
|
||||||
|
c := n.NewCounter("test_counter", "desc")
|
||||||
|
c.Add(ctx, 1, labels)
|
||||||
|
c.Add(ctx, 0, nil)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("UpDownCounter", func(_ *testing.T) {
|
||||||
|
u := n.NewUpDownCounter("test_updown", "desc")
|
||||||
|
u.Add(ctx, 1, labels)
|
||||||
|
u.Add(ctx, -1, nil)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Int64Gauge", func(_ *testing.T) {
|
||||||
|
g := n.NewInt64Gauge("test_int64gauge", "desc")
|
||||||
|
g.Record(ctx, 42, labels)
|
||||||
|
g.Record(ctx, 0, nil)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Float64Gauge", func(_ *testing.T) {
|
||||||
|
g := n.NewFloat64Gauge("test_float64gauge", "desc")
|
||||||
|
g.Record(ctx, 3.14, labels)
|
||||||
|
g.Record(ctx, 0, nil)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Histogram", func(_ *testing.T) {
|
||||||
|
h := n.NewHistogram("test_histogram", "desc", []float64{1, 5, 10})
|
||||||
|
h.Record(ctx, 2.5, labels)
|
||||||
|
h.Record(ctx, 0, nil)
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("HTTPHandler", func(t *testing.T) {
|
||||||
|
if n.HTTPHandler() != nil {
|
||||||
|
t.Error("noop HTTPHandler should be nil")
|
||||||
|
}
|
||||||
|
})
|
||||||
|
|
||||||
|
t.Run("Shutdown", func(t *testing.T) {
|
||||||
|
if err := n.Shutdown(ctx); err != nil {
|
||||||
|
t.Errorf("noop Shutdown should not error: %v", err)
|
||||||
|
}
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestNoopBackendLabelNames(_ *testing.T) {
|
||||||
|
// Verify that label names passed at creation time are accepted without panic.
|
||||||
|
n := &observability.NoopBackend{}
|
||||||
|
n.NewCounter("c", "d", "label1", "label2")
|
||||||
|
n.NewUpDownCounter("u", "d", "l1")
|
||||||
|
n.NewInt64Gauge("g1", "d", "l1", "l2", "l3")
|
||||||
|
n.NewFloat64Gauge("g2", "d")
|
||||||
|
n.NewHistogram("h", "d", []float64{0.1, 1.0}, "l1")
|
||||||
|
}
|
||||||
210
internal/observability/otel/backend.go
Normal file
210
internal/observability/otel/backend.go
Normal file
@@ -0,0 +1,210 @@
|
|||||||
|
// Package otel implements the OpenTelemetry metrics backend for Gerbil.
|
||||||
|
//
|
||||||
|
// Metrics are exported via OTLP (gRPC or HTTP) to an external collector.
|
||||||
|
// No Prometheus /metrics endpoint is exposed in this mode.
|
||||||
|
// Future OTel tracing and logging can be added alongside this package
|
||||||
|
// without touching the Prometheus-native path.
|
||||||
|
package otel
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
"net/http"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/metric"
|
||||||
|
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config holds OTel backend configuration.
|
||||||
|
type Config struct {
|
||||||
|
// Protocol is "grpc" (default) or "http".
|
||||||
|
Protocol string
|
||||||
|
|
||||||
|
// Endpoint is the OTLP collector address.
|
||||||
|
Endpoint string
|
||||||
|
|
||||||
|
// Insecure disables TLS.
|
||||||
|
Insecure bool
|
||||||
|
|
||||||
|
// ExportInterval is the period between pushes to the collector.
|
||||||
|
ExportInterval time.Duration
|
||||||
|
|
||||||
|
ServiceName string
|
||||||
|
ServiceVersion string
|
||||||
|
DeploymentEnvironment string
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend is the OTel metrics backend.
|
||||||
|
type Backend struct {
|
||||||
|
cfg Config
|
||||||
|
provider *sdkmetric.MeterProvider
|
||||||
|
meter metric.Meter
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates and initialises an OTel backend.
|
||||||
|
//
|
||||||
|
// cfg.Protocol must be "grpc" (default) or "http".
|
||||||
|
// cfg.Endpoint is the OTLP collector address (e.g. "localhost:4317").
|
||||||
|
// cfg.ExportInterval sets the push period (defaults to 60 s if ≤ 0).
|
||||||
|
// cfg.Insecure disables TLS on the OTLP connection.
|
||||||
|
//
|
||||||
|
// Connection to the collector is established lazily; New only validates cfg
|
||||||
|
// and creates the SDK components. It returns an error only if the OTel resource
|
||||||
|
// or exporter cannot be constructed.
|
||||||
|
func New(cfg Config) (*Backend, error) {
|
||||||
|
if cfg.Protocol == "" {
|
||||||
|
cfg.Protocol = "grpc"
|
||||||
|
}
|
||||||
|
if cfg.ExportInterval <= 0 {
|
||||||
|
cfg.ExportInterval = 60 * time.Second
|
||||||
|
}
|
||||||
|
if cfg.ServiceName == "" {
|
||||||
|
cfg.ServiceName = "gerbil"
|
||||||
|
}
|
||||||
|
|
||||||
|
res, err := newResource(cfg.ServiceName, cfg.ServiceVersion, cfg.DeploymentEnvironment)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("otel backend: build resource: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
exp, err := newExporter(context.Background(), cfg)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("otel backend: create exporter: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
reader := sdkmetric.NewPeriodicReader(exp,
|
||||||
|
sdkmetric.WithInterval(cfg.ExportInterval),
|
||||||
|
)
|
||||||
|
|
||||||
|
provider := sdkmetric.NewMeterProvider(
|
||||||
|
sdkmetric.WithResource(res),
|
||||||
|
sdkmetric.WithReader(reader),
|
||||||
|
)
|
||||||
|
|
||||||
|
meter := provider.Meter("github.com/fosrl/gerbil")
|
||||||
|
|
||||||
|
return &Backend{cfg: cfg, provider: provider, meter: meter}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTTPHandler returns nil – the OTel backend does not expose an HTTP endpoint.
|
||||||
|
func (b *Backend) HTTPHandler() http.Handler {
|
||||||
|
_ = b
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown flushes pending metrics and shuts down the MeterProvider.
|
||||||
|
func (b *Backend) Shutdown(ctx context.Context) error {
|
||||||
|
return b.provider.Shutdown(ctx)
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCounter creates an OTel Int64Counter.
|
||||||
|
func (b *Backend) NewCounter(name, desc string, _ ...string) *Counter {
|
||||||
|
c, err := b.meter.Int64Counter(name, metric.WithDescription(desc))
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("otel: create counter %q: %v", name, err))
|
||||||
|
}
|
||||||
|
return &Counter{c: c}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewUpDownCounter creates an OTel Int64UpDownCounter.
|
||||||
|
func (b *Backend) NewUpDownCounter(name, desc string, _ ...string) *UpDownCounter {
|
||||||
|
c, err := b.meter.Int64UpDownCounter(name, metric.WithDescription(desc))
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("otel: create up-down counter %q: %v", name, err))
|
||||||
|
}
|
||||||
|
return &UpDownCounter{c: c}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewInt64Gauge creates an OTel Int64Gauge.
|
||||||
|
func (b *Backend) NewInt64Gauge(name, desc string, _ ...string) *Int64Gauge {
|
||||||
|
g, err := b.meter.Int64Gauge(name, metric.WithDescription(desc))
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("otel: create int64 gauge %q: %v", name, err))
|
||||||
|
}
|
||||||
|
return &Int64Gauge{g: g}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFloat64Gauge creates an OTel Float64Gauge.
|
||||||
|
func (b *Backend) NewFloat64Gauge(name, desc string, _ ...string) *Float64Gauge {
|
||||||
|
g, err := b.meter.Float64Gauge(name, metric.WithDescription(desc))
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("otel: create float64 gauge %q: %v", name, err))
|
||||||
|
}
|
||||||
|
return &Float64Gauge{g: g}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewHistogram creates an OTel Float64Histogram with explicit bucket boundaries.
|
||||||
|
func (b *Backend) NewHistogram(name, desc string, buckets []float64, _ ...string) *Histogram {
|
||||||
|
h, err := b.meter.Float64Histogram(name,
|
||||||
|
metric.WithDescription(desc),
|
||||||
|
metric.WithExplicitBucketBoundaries(buckets...),
|
||||||
|
)
|
||||||
|
if err != nil {
|
||||||
|
panic(fmt.Sprintf("otel: create histogram %q: %v", name, err))
|
||||||
|
}
|
||||||
|
return &Histogram{h: h}
|
||||||
|
}
|
||||||
|
|
||||||
|
// labelsToAttrs converts a Labels map to OTel attribute key-value pairs.
|
||||||
|
func labelsToAttrs(labels map[string]string) []attribute.KeyValue {
|
||||||
|
if len(labels) == 0 {
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
attrs := make([]attribute.KeyValue, 0, len(labels))
|
||||||
|
for k, v := range labels {
|
||||||
|
attrs = append(attrs, attribute.String(k, v))
|
||||||
|
}
|
||||||
|
return attrs
|
||||||
|
}
|
||||||
|
|
||||||
|
// Counter wraps an OTel Int64Counter.
|
||||||
|
type Counter struct {
|
||||||
|
c metric.Int64Counter
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add increments the counter by value.
|
||||||
|
func (c *Counter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||||
|
c.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpDownCounter wraps an OTel Int64UpDownCounter.
|
||||||
|
type UpDownCounter struct {
|
||||||
|
c metric.Int64UpDownCounter
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add adjusts the up-down counter by value.
|
||||||
|
func (u *UpDownCounter) Add(ctx context.Context, value int64, labels map[string]string) {
|
||||||
|
u.c.Add(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Int64Gauge wraps an OTel Int64Gauge.
|
||||||
|
type Int64Gauge struct {
|
||||||
|
g metric.Int64Gauge
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record sets the gauge to value.
|
||||||
|
func (g *Int64Gauge) Record(ctx context.Context, value int64, labels map[string]string) {
|
||||||
|
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Float64Gauge wraps an OTel Float64Gauge.
|
||||||
|
type Float64Gauge struct {
|
||||||
|
g metric.Float64Gauge
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record sets the gauge to value.
|
||||||
|
func (g *Float64Gauge) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||||
|
g.g.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Histogram wraps an OTel Float64Histogram.
|
||||||
|
type Histogram struct {
|
||||||
|
h metric.Float64Histogram
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record observes value in the histogram.
|
||||||
|
func (h *Histogram) Record(ctx context.Context, value float64, labels map[string]string) {
|
||||||
|
h.h.Record(ctx, value, metric.WithAttributes(labelsToAttrs(labels)...))
|
||||||
|
}
|
||||||
141
internal/observability/otel/backend_test.go
Normal file
141
internal/observability/otel/backend_test.go
Normal file
@@ -0,0 +1,141 @@
|
|||||||
|
package otel_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"testing"
|
||||||
|
"time"
|
||||||
|
|
||||||
|
obsotel "github.com/fosrl/gerbil/internal/observability/otel"
|
||||||
|
)
|
||||||
|
|
||||||
|
const (
|
||||||
|
defaultGRPCEndpoint = "localhost:4317"
|
||||||
|
defaultServiceName = "gerbil-test"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newInMemoryBackend(t *testing.T) *obsotel.Backend {
|
||||||
|
t.Helper()
|
||||||
|
// Use a very short export interval; an in-process collector (noop exporter)
|
||||||
|
// is used by pointing to a non-existent endpoint with insecure mode.
|
||||||
|
// The backend itself should initialise without error since connection is lazy.
|
||||||
|
b, err := obsotel.New(obsotel.Config{
|
||||||
|
Protocol: "grpc",
|
||||||
|
Endpoint: defaultGRPCEndpoint,
|
||||||
|
Insecure: true,
|
||||||
|
ExportInterval: 100 * time.Millisecond,
|
||||||
|
ServiceName: defaultServiceName,
|
||||||
|
ServiceVersion: "0.0.1",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create otel backend: %v", err)
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendHTTPHandlerIsNil(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
if b.HTTPHandler() != nil {
|
||||||
|
t.Error("OTel backend HTTPHandler should return nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendShutdown(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
ctx, cancel := context.WithTimeout(context.Background(), 3*time.Second)
|
||||||
|
defer cancel()
|
||||||
|
if err := b.Shutdown(ctx); err != nil {
|
||||||
|
// Shutdown with unreachable collector may fail to flush; that's acceptable.
|
||||||
|
// What matters is that Shutdown does not panic.
|
||||||
|
t.Logf("Shutdown returned (expected with no collector): %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendCounter(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
|
||||||
|
c := b.NewCounter("gerbil_test_counter_total", "test counter", "result")
|
||||||
|
// Should not panic
|
||||||
|
c.Add(context.Background(), 1, map[string]string{"result": "ok"})
|
||||||
|
c.Add(context.Background(), 5, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendUpDownCounter(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
|
||||||
|
u := b.NewUpDownCounter("gerbil_test_updown", "test updown", "state")
|
||||||
|
u.Add(context.Background(), 3, map[string]string{"state": "active"})
|
||||||
|
u.Add(context.Background(), -1, map[string]string{"state": "active"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendInt64Gauge(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
|
||||||
|
g := b.NewInt64Gauge("gerbil_test_int_gauge", "test gauge")
|
||||||
|
g.Record(context.Background(), 42, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendFloat64Gauge(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
|
||||||
|
g := b.NewFloat64Gauge("gerbil_test_float_gauge", "test float gauge")
|
||||||
|
g.Record(context.Background(), 3.14, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendHistogram(t *testing.T) {
|
||||||
|
b := newInMemoryBackend(t)
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
|
||||||
|
h := b.NewHistogram("gerbil_test_duration_seconds", "test histogram",
|
||||||
|
[]float64{0.1, 0.5, 1.0}, "method")
|
||||||
|
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendHTTPProtocol(t *testing.T) {
|
||||||
|
b, err := obsotel.New(obsotel.Config{
|
||||||
|
Protocol: "http",
|
||||||
|
Endpoint: "localhost:4318",
|
||||||
|
Insecure: true,
|
||||||
|
ExportInterval: 100 * time.Millisecond,
|
||||||
|
ServiceName: defaultServiceName,
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create otel http backend: %v", err)
|
||||||
|
}
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
|
||||||
|
if b.HTTPHandler() != nil {
|
||||||
|
t.Error("OTel HTTP backend should not expose a /metrics endpoint")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendInvalidProtocol(t *testing.T) {
|
||||||
|
_, err := obsotel.New(obsotel.Config{
|
||||||
|
Protocol: "tcp",
|
||||||
|
Endpoint: defaultGRPCEndpoint,
|
||||||
|
ExportInterval: 10 * time.Second,
|
||||||
|
})
|
||||||
|
if err == nil {
|
||||||
|
t.Error("expected error for invalid protocol")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestOtelBackendDeploymentEnvironment(t *testing.T) {
|
||||||
|
b, err := obsotel.New(obsotel.Config{
|
||||||
|
Protocol: "grpc",
|
||||||
|
Endpoint: defaultGRPCEndpoint,
|
||||||
|
Insecure: true,
|
||||||
|
ExportInterval: 100 * time.Millisecond,
|
||||||
|
ServiceName: defaultServiceName,
|
||||||
|
ServiceVersion: "1.2.3",
|
||||||
|
DeploymentEnvironment: "staging",
|
||||||
|
})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
defer b.Shutdown(context.Background()) //nolint:errcheck
|
||||||
|
}
|
||||||
50
internal/observability/otel/exporter.go
Normal file
50
internal/observability/otel/exporter.go
Normal file
@@ -0,0 +1,50 @@
|
|||||||
|
package otel
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"fmt"
|
||||||
|
|
||||||
|
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||||
|
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetrichttp"
|
||||||
|
sdkmetric "go.opentelemetry.io/otel/sdk/metric"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newExporter creates the appropriate OTLP exporter based on cfg.Protocol.
|
||||||
|
func newExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||||
|
switch cfg.Protocol {
|
||||||
|
case "grpc", "":
|
||||||
|
return newGRPCExporter(ctx, cfg)
|
||||||
|
case "http":
|
||||||
|
return newHTTPExporter(ctx, cfg)
|
||||||
|
default:
|
||||||
|
return nil, fmt.Errorf("otel: unknown protocol %q (must be \"grpc\" or \"http\")", cfg.Protocol)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func newGRPCExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||||
|
opts := []otlpmetricgrpc.Option{
|
||||||
|
otlpmetricgrpc.WithEndpoint(cfg.Endpoint),
|
||||||
|
}
|
||||||
|
if cfg.Insecure {
|
||||||
|
opts = append(opts, otlpmetricgrpc.WithInsecure())
|
||||||
|
}
|
||||||
|
exp, err := otlpmetricgrpc.New(ctx, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("otlp grpc exporter: %w", err)
|
||||||
|
}
|
||||||
|
return exp, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
func newHTTPExporter(ctx context.Context, cfg Config) (sdkmetric.Exporter, error) {
|
||||||
|
opts := []otlpmetrichttp.Option{
|
||||||
|
otlpmetrichttp.WithEndpoint(cfg.Endpoint),
|
||||||
|
}
|
||||||
|
if cfg.Insecure {
|
||||||
|
opts = append(opts, otlpmetrichttp.WithInsecure())
|
||||||
|
}
|
||||||
|
exp, err := otlpmetrichttp.New(ctx, opts...)
|
||||||
|
if err != nil {
|
||||||
|
return nil, fmt.Errorf("otlp http exporter: %w", err)
|
||||||
|
}
|
||||||
|
return exp, nil
|
||||||
|
}
|
||||||
25
internal/observability/otel/resource.go
Normal file
25
internal/observability/otel/resource.go
Normal file
@@ -0,0 +1,25 @@
|
|||||||
|
package otel
|
||||||
|
|
||||||
|
import (
|
||||||
|
"go.opentelemetry.io/otel/attribute"
|
||||||
|
"go.opentelemetry.io/otel/sdk/resource"
|
||||||
|
semconv "go.opentelemetry.io/otel/semconv/v1.40.0"
|
||||||
|
)
|
||||||
|
|
||||||
|
// newResource builds an OTel resource for the Gerbil service.
|
||||||
|
func newResource(serviceName, serviceVersion, deploymentEnv string) (*resource.Resource, error) {
|
||||||
|
attrs := []attribute.KeyValue{
|
||||||
|
semconv.ServiceName(serviceName),
|
||||||
|
}
|
||||||
|
if serviceVersion != "" {
|
||||||
|
attrs = append(attrs, semconv.ServiceVersion(serviceVersion))
|
||||||
|
}
|
||||||
|
if deploymentEnv != "" {
|
||||||
|
attrs = append(attrs, semconv.DeploymentEnvironmentName(deploymentEnv))
|
||||||
|
}
|
||||||
|
|
||||||
|
return resource.Merge(
|
||||||
|
resource.Default(),
|
||||||
|
resource.NewWithAttributes(semconv.SchemaURL, attrs...),
|
||||||
|
)
|
||||||
|
}
|
||||||
185
internal/observability/prometheus/backend.go
Normal file
185
internal/observability/prometheus/backend.go
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
// Package prometheus implements the native Prometheus metrics backend for Gerbil.
|
||||||
|
//
|
||||||
|
// This backend uses the Prometheus Go client directly; it does NOT depend on the
|
||||||
|
// OpenTelemetry SDK. A dedicated Prometheus registry is used so that default
|
||||||
|
// Go/process metrics are not unintentionally included unless the caller opts in.
|
||||||
|
package prometheus
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"net/http"
|
||||||
|
|
||||||
|
"github.com/prometheus/client_golang/prometheus"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/collectors"
|
||||||
|
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||||
|
)
|
||||||
|
|
||||||
|
// Config holds Prometheus-backend configuration.
|
||||||
|
type Config struct {
|
||||||
|
// Path is the HTTP endpoint path (e.g. "/metrics").
|
||||||
|
Path string
|
||||||
|
|
||||||
|
// IncludeGoMetrics controls whether the standard Go runtime and process
|
||||||
|
// collectors are registered on the dedicated registry.
|
||||||
|
// Defaults to true if not explicitly set.
|
||||||
|
IncludeGoMetrics *bool
|
||||||
|
}
|
||||||
|
|
||||||
|
// Backend is the native Prometheus metrics backend.
|
||||||
|
// Metric instruments are created via the New* family of methods and stored
|
||||||
|
// in the backend-specific instrument types that implement the observability
|
||||||
|
// instrument interfaces.
|
||||||
|
type Backend struct {
|
||||||
|
cfg Config
|
||||||
|
registry *prometheus.Registry
|
||||||
|
handler http.Handler
|
||||||
|
}
|
||||||
|
|
||||||
|
// New creates and initialises a Prometheus backend.
|
||||||
|
//
|
||||||
|
// cfg.Path sets the HTTP endpoint path (defaults to "/metrics" if empty).
|
||||||
|
// cfg.IncludeGoMetrics controls whether standard Go runtime and process metrics
|
||||||
|
// are included; defaults to true when nil.
|
||||||
|
//
|
||||||
|
// Returns an error if the registry cannot be created.
|
||||||
|
func New(cfg Config) (*Backend, error) {
|
||||||
|
if cfg.Path == "" {
|
||||||
|
cfg.Path = "/metrics"
|
||||||
|
}
|
||||||
|
|
||||||
|
registry := prometheus.NewRegistry()
|
||||||
|
|
||||||
|
// Include Go and process metrics by default.
|
||||||
|
includeGo := cfg.IncludeGoMetrics == nil || *cfg.IncludeGoMetrics
|
||||||
|
if includeGo {
|
||||||
|
registry.MustRegister(
|
||||||
|
collectors.NewGoCollector(),
|
||||||
|
collectors.NewProcessCollector(collectors.ProcessCollectorOpts{}),
|
||||||
|
)
|
||||||
|
}
|
||||||
|
|
||||||
|
handler := promhttp.HandlerFor(registry, promhttp.HandlerOpts{
|
||||||
|
EnableOpenMetrics: false,
|
||||||
|
})
|
||||||
|
|
||||||
|
return &Backend{cfg: cfg, registry: registry, handler: handler}, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// HTTPHandler returns the Prometheus /metrics HTTP handler.
|
||||||
|
func (b *Backend) HTTPHandler() http.Handler {
|
||||||
|
return b.handler
|
||||||
|
}
|
||||||
|
|
||||||
|
// Shutdown is a no-op for the Prometheus backend.
|
||||||
|
// The registry does not maintain background goroutines.
|
||||||
|
func (b *Backend) Shutdown(_ context.Context) error {
|
||||||
|
_ = b
|
||||||
|
return nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewCounter creates a Prometheus CounterVec registered on the backend's registry.
|
||||||
|
func (b *Backend) NewCounter(name, desc string, labelNames ...string) *Counter {
|
||||||
|
vec := prometheus.NewCounterVec(prometheus.CounterOpts{
|
||||||
|
Name: name,
|
||||||
|
Help: desc,
|
||||||
|
}, labelNames)
|
||||||
|
b.registry.MustRegister(vec)
|
||||||
|
return &Counter{vec: vec}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewUpDownCounter creates a Prometheus GaugeVec (Prometheus gauges are
|
||||||
|
// bidirectional) registered on the backend's registry.
|
||||||
|
func (b *Backend) NewUpDownCounter(name, desc string, labelNames ...string) *UpDownCounter {
|
||||||
|
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: name,
|
||||||
|
Help: desc,
|
||||||
|
}, labelNames)
|
||||||
|
b.registry.MustRegister(vec)
|
||||||
|
return &UpDownCounter{vec: vec}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewInt64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||||
|
func (b *Backend) NewInt64Gauge(name, desc string, labelNames ...string) *Int64Gauge {
|
||||||
|
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: name,
|
||||||
|
Help: desc,
|
||||||
|
}, labelNames)
|
||||||
|
b.registry.MustRegister(vec)
|
||||||
|
return &Int64Gauge{vec: vec}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewFloat64Gauge creates a Prometheus GaugeVec registered on the backend's registry.
|
||||||
|
func (b *Backend) NewFloat64Gauge(name, desc string, labelNames ...string) *Float64Gauge {
|
||||||
|
vec := prometheus.NewGaugeVec(prometheus.GaugeOpts{
|
||||||
|
Name: name,
|
||||||
|
Help: desc,
|
||||||
|
}, labelNames)
|
||||||
|
b.registry.MustRegister(vec)
|
||||||
|
return &Float64Gauge{vec: vec}
|
||||||
|
}
|
||||||
|
|
||||||
|
// NewHistogram creates a Prometheus HistogramVec registered on the backend's registry.
|
||||||
|
func (b *Backend) NewHistogram(name, desc string, buckets []float64, labelNames ...string) *Histogram {
|
||||||
|
vec := prometheus.NewHistogramVec(prometheus.HistogramOpts{
|
||||||
|
Name: name,
|
||||||
|
Help: desc,
|
||||||
|
Buckets: buckets,
|
||||||
|
}, labelNames)
|
||||||
|
b.registry.MustRegister(vec)
|
||||||
|
return &Histogram{vec: vec}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Counter is a native Prometheus counter instrument.
|
||||||
|
type Counter struct {
|
||||||
|
vec *prometheus.CounterVec
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add increments the counter by value for the given labels.
|
||||||
|
//
|
||||||
|
// value must be non-negative. Negative values are ignored.
|
||||||
|
func (c *Counter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||||
|
if value < 0 {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
c.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpDownCounter is a native Prometheus gauge used as a bidirectional counter.
|
||||||
|
type UpDownCounter struct {
|
||||||
|
vec *prometheus.GaugeVec
|
||||||
|
}
|
||||||
|
|
||||||
|
// Add adjusts the gauge by value for the given labels.
|
||||||
|
func (u *UpDownCounter) Add(_ context.Context, value int64, labels map[string]string) {
|
||||||
|
u.vec.With(prometheus.Labels(labels)).Add(float64(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Int64Gauge is a native Prometheus gauge recording integer snapshot values.
|
||||||
|
type Int64Gauge struct {
|
||||||
|
vec *prometheus.GaugeVec
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record sets the gauge to value for the given labels.
|
||||||
|
func (g *Int64Gauge) Record(_ context.Context, value int64, labels map[string]string) {
|
||||||
|
g.vec.With(prometheus.Labels(labels)).Set(float64(value))
|
||||||
|
}
|
||||||
|
|
||||||
|
// Float64Gauge is a native Prometheus gauge recording float snapshot values.
|
||||||
|
type Float64Gauge struct {
|
||||||
|
vec *prometheus.GaugeVec
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record sets the gauge to value for the given labels.
|
||||||
|
func (g *Float64Gauge) Record(_ context.Context, value float64, labels map[string]string) {
|
||||||
|
g.vec.With(prometheus.Labels(labels)).Set(value)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Histogram is a native Prometheus histogram instrument.
|
||||||
|
type Histogram struct {
|
||||||
|
vec *prometheus.HistogramVec
|
||||||
|
}
|
||||||
|
|
||||||
|
// Record observes value for the given labels.
|
||||||
|
func (h *Histogram) Record(_ context.Context, value float64, labels map[string]string) {
|
||||||
|
h.vec.With(prometheus.Labels(labels)).Observe(value)
|
||||||
|
}
|
||||||
173
internal/observability/prometheus/backend_test.go
Normal file
173
internal/observability/prometheus/backend_test.go
Normal file
@@ -0,0 +1,173 @@
|
|||||||
|
package prometheus_test
|
||||||
|
|
||||||
|
import (
|
||||||
|
"context"
|
||||||
|
"io"
|
||||||
|
"net/http"
|
||||||
|
"net/http/httptest"
|
||||||
|
"strings"
|
||||||
|
"testing"
|
||||||
|
|
||||||
|
obsprom "github.com/fosrl/gerbil/internal/observability/prometheus"
|
||||||
|
)
|
||||||
|
|
||||||
|
func newTestBackend(t *testing.T) *obsprom.Backend {
|
||||||
|
t.Helper()
|
||||||
|
b, err := obsprom.New(obsprom.Config{Path: "/metrics"})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to create prometheus backend: %v", err)
|
||||||
|
}
|
||||||
|
return b
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendHTTPHandler(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
if b.HTTPHandler() == nil {
|
||||||
|
t.Error("HTTPHandler should not be nil")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendShutdown(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
if err := b.Shutdown(context.Background()); err != nil {
|
||||||
|
t.Errorf("Shutdown returned error: %v", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendCounter(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
c := b.NewCounter("test_counter_total", "A test counter", "result")
|
||||||
|
c.Add(context.Background(), 3, map[string]string{"result": "ok"})
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
assertMetricPresent(t, body, `test_counter_total{result="ok"} 3`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendUpDownCounter(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
u := b.NewUpDownCounter("test_gauge_total", "A test up-down counter", "state")
|
||||||
|
u.Add(context.Background(), 5, map[string]string{"state": "active"})
|
||||||
|
u.Add(context.Background(), -2, map[string]string{"state": "active"})
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
assertMetricPresent(t, body, `test_gauge_total{state="active"} 3`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendInt64Gauge(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
g := b.NewInt64Gauge("test_int_gauge", "An integer gauge", "ifname")
|
||||||
|
g.Record(context.Background(), 42, map[string]string{"ifname": "wg0"})
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
assertMetricPresent(t, body, `test_int_gauge{ifname="wg0"} 42`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendFloat64Gauge(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
g := b.NewFloat64Gauge("test_float_gauge", "A float gauge", "cert")
|
||||||
|
g.Record(context.Background(), 7.5, map[string]string{"cert": "example.com"})
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
assertMetricPresent(t, body, `test_float_gauge{cert="example.com"} 7.5`)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendHistogram(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
buckets := []float64{0.1, 0.5, 1.0, 5.0}
|
||||||
|
h := b.NewHistogram("test_duration_seconds", "A test histogram", buckets, "method")
|
||||||
|
h.Record(context.Background(), 0.3, map[string]string{"method": "GET"})
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
if !strings.Contains(body, "test_duration_seconds") {
|
||||||
|
t.Errorf("expected histogram metric in output, body:\n%s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendMultipleLabels(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
c := b.NewCounter("multi_label_total", "Multi-label counter", "method", "route", "status_code")
|
||||||
|
c.Add(context.Background(), 1, map[string]string{
|
||||||
|
"method": "POST",
|
||||||
|
"route": "/api/peers",
|
||||||
|
"status_code": "200",
|
||||||
|
})
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
if !strings.Contains(body, "multi_label_total") {
|
||||||
|
t.Errorf("expected multi_label_total in output, body:\n%s", body)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendGoMetrics(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
// Default backend includes Go runtime metrics.
|
||||||
|
if !strings.Contains(body, "go_goroutines") {
|
||||||
|
t.Error("expected go_goroutines in default backend output")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendNoGoMetrics(t *testing.T) {
|
||||||
|
f := false
|
||||||
|
b, err := obsprom.New(obsprom.Config{IncludeGoMetrics: &f})
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("unexpected error: %v", err)
|
||||||
|
}
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
if strings.Contains(body, "go_goroutines") {
|
||||||
|
t.Error("expected no go_goroutines when IncludeGoMetrics=false")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendNilLabels(t *testing.T) {
|
||||||
|
// Adding with nil labels should not panic (treated as empty map).
|
||||||
|
b := newTestBackend(t)
|
||||||
|
c := b.NewCounter("nil_labels_total", "counter with no labels")
|
||||||
|
// nil labels with no label names declared should be safe
|
||||||
|
c.Add(context.Background(), 1, nil)
|
||||||
|
}
|
||||||
|
|
||||||
|
func TestPrometheusBackendConcurrentAdd(t *testing.T) {
|
||||||
|
b := newTestBackend(t)
|
||||||
|
c := b.NewCounter("concurrent_total", "concurrent counter", "worker")
|
||||||
|
|
||||||
|
done := make(chan struct{})
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
go func(_ int) {
|
||||||
|
for j := 0; j < 100; j++ {
|
||||||
|
c.Add(context.Background(), 1, map[string]string{"worker": "w"})
|
||||||
|
}
|
||||||
|
done <- struct{}{}
|
||||||
|
}(i)
|
||||||
|
}
|
||||||
|
for i := 0; i < 10; i++ {
|
||||||
|
<-done
|
||||||
|
}
|
||||||
|
|
||||||
|
body := scrapeMetrics(t, b)
|
||||||
|
assertMetricPresent(t, body, `concurrent_total{worker="w"} 1000`)
|
||||||
|
}
|
||||||
|
|
||||||
|
// --- helpers ---
|
||||||
|
|
||||||
|
func scrapeMetrics(t *testing.T, b *obsprom.Backend) string {
|
||||||
|
t.Helper()
|
||||||
|
req := httptest.NewRequest(http.MethodGet, "/metrics", http.NoBody)
|
||||||
|
rr := httptest.NewRecorder()
|
||||||
|
b.HTTPHandler().ServeHTTP(rr, req)
|
||||||
|
if rr.Code != http.StatusOK {
|
||||||
|
t.Fatalf("metrics handler returned %d", rr.Code)
|
||||||
|
}
|
||||||
|
body, err := io.ReadAll(rr.Body)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("failed to read response body: %v", err)
|
||||||
|
}
|
||||||
|
return string(body)
|
||||||
|
}
|
||||||
|
|
||||||
|
func assertMetricPresent(t *testing.T, body, expected string) {
|
||||||
|
t.Helper()
|
||||||
|
if !strings.Contains(body, expected) {
|
||||||
|
t.Errorf("expected %q in metrics output\nbody:\n%s", expected, body)
|
||||||
|
}
|
||||||
|
}
|
||||||
384
proxy/proxy.go
384
proxy/proxy.go
@@ -11,10 +11,12 @@ import (
|
|||||||
"log"
|
"log"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"strconv"
|
||||||
"strings"
|
"strings"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/fosrl/gerbil/internal/metrics"
|
||||||
"github.com/fosrl/gerbil/logger"
|
"github.com/fosrl/gerbil/logger"
|
||||||
"github.com/patrickmn/go-cache"
|
"github.com/patrickmn/go-cache"
|
||||||
)
|
)
|
||||||
@@ -31,6 +33,16 @@ type RouteAPIResponse struct {
|
|||||||
Endpoints []string `json:"endpoints"`
|
Endpoints []string `json:"endpoints"`
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// ProxyProtocolInfo holds information parsed from incoming PROXY protocol header
|
||||||
|
type ProxyProtocolInfo struct {
|
||||||
|
Protocol string // TCP4 or TCP6
|
||||||
|
SrcIP string
|
||||||
|
DestIP string
|
||||||
|
SrcPort int
|
||||||
|
DestPort int
|
||||||
|
OriginalConn net.Conn // The original connection after PROXY protocol parsing
|
||||||
|
}
|
||||||
|
|
||||||
// SNIProxy represents the main proxy server
|
// SNIProxy represents the main proxy server
|
||||||
type SNIProxy struct {
|
type SNIProxy struct {
|
||||||
port int
|
port int
|
||||||
@@ -55,6 +67,15 @@ type SNIProxy struct {
|
|||||||
// Track active tunnels by SNI
|
// Track active tunnels by SNI
|
||||||
activeTunnels map[string]*activeTunnel
|
activeTunnels map[string]*activeTunnel
|
||||||
activeTunnelsLock sync.Mutex
|
activeTunnelsLock sync.Mutex
|
||||||
|
|
||||||
|
// Trusted upstream proxies that can send PROXY protocol
|
||||||
|
trustedUpstreams map[string]struct{}
|
||||||
|
|
||||||
|
// Reusable HTTP client for API requests
|
||||||
|
httpClient *http.Client
|
||||||
|
|
||||||
|
// Buffer pool for connection piping
|
||||||
|
bufferPool *sync.Pool
|
||||||
}
|
}
|
||||||
|
|
||||||
type activeTunnel struct {
|
type activeTunnel struct {
|
||||||
@@ -75,6 +96,194 @@ func (conn readOnlyConn) SetDeadline(t time.Time) error { return nil }
|
|||||||
func (conn readOnlyConn) SetReadDeadline(t time.Time) error { return nil }
|
func (conn readOnlyConn) SetReadDeadline(t time.Time) error { return nil }
|
||||||
func (conn readOnlyConn) SetWriteDeadline(t time.Time) error { return nil }
|
func (conn readOnlyConn) SetWriteDeadline(t time.Time) error { return nil }
|
||||||
|
|
||||||
|
// parseProxyProtocolHeader parses a PROXY protocol v1 header from the connection
|
||||||
|
func (p *SNIProxy) parseProxyProtocolHeader(conn net.Conn) (*ProxyProtocolInfo, net.Conn, error) {
|
||||||
|
// Check if the connection comes from a trusted upstream
|
||||||
|
remoteHost, _, err := net.SplitHostPort(conn.RemoteAddr().String())
|
||||||
|
if err != nil {
|
||||||
|
return nil, conn, fmt.Errorf("failed to parse remote address: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve the remote IP to hostname to check if it's trusted
|
||||||
|
// For simplicity, we'll check the IP directly in trusted upstreams
|
||||||
|
// In production, you might want to do reverse DNS lookup
|
||||||
|
if _, isTrusted := p.trustedUpstreams[remoteHost]; !isTrusted {
|
||||||
|
// Not from trusted upstream, return original connection
|
||||||
|
return nil, conn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Set read timeout for PROXY protocol parsing
|
||||||
|
if err := conn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||||
|
return nil, conn, fmt.Errorf("failed to set read deadline: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Read the first line (PROXY protocol header)
|
||||||
|
buffer := make([]byte, 512) // PROXY protocol header should be much smaller
|
||||||
|
n, err := conn.Read(buffer)
|
||||||
|
if err != nil {
|
||||||
|
// If we can't read from trusted upstream, treat as regular connection
|
||||||
|
logger.Debug("Could not read from trusted upstream %s, treating as regular connection: %v", remoteHost, err)
|
||||||
|
// Clear read timeout before returning
|
||||||
|
if clearErr := conn.SetReadDeadline(time.Time{}); clearErr != nil {
|
||||||
|
logger.Debug("Failed to clear read deadline: %v", clearErr)
|
||||||
|
}
|
||||||
|
return nil, conn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// Find the end of the first line (CRLF)
|
||||||
|
headerEnd := bytes.Index(buffer[:n], []byte("\r\n"))
|
||||||
|
if headerEnd == -1 {
|
||||||
|
// No PROXY protocol header found, treat as regular TLS connection
|
||||||
|
// Return the connection with the buffered data prepended
|
||||||
|
logger.Debug("No PROXY protocol header from trusted upstream %s, treating as regular TLS connection", remoteHost)
|
||||||
|
|
||||||
|
// Clear read timeout
|
||||||
|
if err := conn.SetReadDeadline(time.Time{}); err != nil {
|
||||||
|
logger.Debug("Failed to clear read deadline: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a reader that includes the buffered data + original connection
|
||||||
|
newReader := io.MultiReader(bytes.NewReader(buffer[:n]), conn)
|
||||||
|
wrappedConn := &proxyProtocolConn{
|
||||||
|
Conn: conn,
|
||||||
|
reader: newReader,
|
||||||
|
}
|
||||||
|
return nil, wrappedConn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
headerLine := string(buffer[:headerEnd])
|
||||||
|
remainingData := buffer[headerEnd+2 : n]
|
||||||
|
|
||||||
|
// Parse PROXY protocol line: "PROXY TCP4/TCP6 srcIP destIP srcPort destPort"
|
||||||
|
parts := strings.Fields(headerLine)
|
||||||
|
if len(parts) != 6 || parts[0] != "PROXY" {
|
||||||
|
// Check for PROXY UNKNOWN
|
||||||
|
if len(parts) == 2 && parts[0] == "PROXY" && parts[1] == "UNKNOWN" {
|
||||||
|
// PROXY UNKNOWN - use original connection info
|
||||||
|
return nil, conn, nil
|
||||||
|
}
|
||||||
|
// Invalid PROXY protocol, but might be regular TLS - treat as such
|
||||||
|
logger.Debug("Invalid PROXY protocol from trusted upstream %s, treating as regular TLS connection: %s", remoteHost, headerLine)
|
||||||
|
|
||||||
|
// Clear read timeout
|
||||||
|
if err := conn.SetReadDeadline(time.Time{}); err != nil {
|
||||||
|
logger.Debug("Failed to clear read deadline: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Return the connection with all buffered data prepended
|
||||||
|
newReader := io.MultiReader(bytes.NewReader(buffer[:n]), conn)
|
||||||
|
wrappedConn := &proxyProtocolConn{
|
||||||
|
Conn: conn,
|
||||||
|
reader: newReader,
|
||||||
|
}
|
||||||
|
return nil, wrappedConn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
protocol := parts[1]
|
||||||
|
srcIP := parts[2]
|
||||||
|
destIP := parts[3]
|
||||||
|
srcPort, err := strconv.Atoi(parts[4])
|
||||||
|
if err != nil {
|
||||||
|
return nil, conn, fmt.Errorf("invalid source port in PROXY header: %s", parts[4])
|
||||||
|
}
|
||||||
|
destPort, err := strconv.Atoi(parts[5])
|
||||||
|
if err != nil {
|
||||||
|
return nil, conn, fmt.Errorf("invalid destination port in PROXY header: %s", parts[5])
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a new reader that includes remaining data + original connection
|
||||||
|
var newReader io.Reader
|
||||||
|
if len(remainingData) > 0 {
|
||||||
|
newReader = io.MultiReader(bytes.NewReader(remainingData), conn)
|
||||||
|
} else {
|
||||||
|
newReader = conn
|
||||||
|
}
|
||||||
|
|
||||||
|
// Create a wrapper connection that reads from the combined reader
|
||||||
|
wrappedConn := &proxyProtocolConn{
|
||||||
|
Conn: conn,
|
||||||
|
reader: newReader,
|
||||||
|
}
|
||||||
|
|
||||||
|
proxyInfo := &ProxyProtocolInfo{
|
||||||
|
Protocol: protocol,
|
||||||
|
SrcIP: srcIP,
|
||||||
|
DestIP: destIP,
|
||||||
|
SrcPort: srcPort,
|
||||||
|
DestPort: destPort,
|
||||||
|
OriginalConn: wrappedConn,
|
||||||
|
}
|
||||||
|
|
||||||
|
// Clear read timeout
|
||||||
|
if err := conn.SetReadDeadline(time.Time{}); err != nil {
|
||||||
|
return nil, conn, fmt.Errorf("failed to clear read deadline: %w", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
return proxyInfo, wrappedConn, nil
|
||||||
|
}
|
||||||
|
|
||||||
|
// proxyProtocolConn wraps a connection to read from a custom reader
|
||||||
|
type proxyProtocolConn struct {
|
||||||
|
net.Conn
|
||||||
|
reader io.Reader
|
||||||
|
}
|
||||||
|
|
||||||
|
func (c *proxyProtocolConn) Read(b []byte) (int, error) {
|
||||||
|
return c.reader.Read(b)
|
||||||
|
}
|
||||||
|
|
||||||
|
// buildProxyProtocolHeaderFromInfo creates a PROXY protocol v1 header using ProxyProtocolInfo
|
||||||
|
func (p *SNIProxy) buildProxyProtocolHeaderFromInfo(proxyInfo *ProxyProtocolInfo, targetAddr net.Addr) string {
|
||||||
|
targetTCP, ok := targetAddr.(*net.TCPAddr)
|
||||||
|
if !ok {
|
||||||
|
// Fallback for unknown address types
|
||||||
|
return "PROXY UNKNOWN\r\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
// Use the original client information from the PROXY protocol
|
||||||
|
var targetIP string
|
||||||
|
var protocol string
|
||||||
|
|
||||||
|
// Parse source IP to determine protocol family
|
||||||
|
srcIP := net.ParseIP(proxyInfo.SrcIP)
|
||||||
|
if srcIP == nil {
|
||||||
|
return "PROXY UNKNOWN\r\n"
|
||||||
|
}
|
||||||
|
|
||||||
|
if srcIP.To4() != nil {
|
||||||
|
// Source is IPv4, use TCP4 protocol
|
||||||
|
protocol = "TCP4"
|
||||||
|
if targetTCP.IP.To4() != nil {
|
||||||
|
// Target is also IPv4, use as-is
|
||||||
|
targetIP = targetTCP.IP.String()
|
||||||
|
} else {
|
||||||
|
// Target is IPv6, but we need IPv4 for consistent protocol family
|
||||||
|
if targetTCP.IP.IsLoopback() {
|
||||||
|
targetIP = "127.0.0.1"
|
||||||
|
} else {
|
||||||
|
targetIP = "127.0.0.1" // Safe fallback
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Source is IPv6, use TCP6 protocol
|
||||||
|
protocol = "TCP6"
|
||||||
|
if targetTCP.IP.To4() != nil {
|
||||||
|
// Target is IPv4, convert to IPv6 representation
|
||||||
|
targetIP = "::ffff:" + targetTCP.IP.String()
|
||||||
|
} else {
|
||||||
|
// Target is also IPv6, use as-is
|
||||||
|
targetIP = targetTCP.IP.String()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return fmt.Sprintf("PROXY %s %s %s %d %d\r\n",
|
||||||
|
protocol,
|
||||||
|
proxyInfo.SrcIP,
|
||||||
|
targetIP,
|
||||||
|
proxyInfo.SrcPort,
|
||||||
|
targetTCP.Port)
|
||||||
|
}
|
||||||
|
|
||||||
// buildProxyProtocolHeader creates a PROXY protocol v1 header
|
// buildProxyProtocolHeader creates a PROXY protocol v1 header
|
||||||
func buildProxyProtocolHeader(clientAddr, targetAddr net.Addr) string {
|
func buildProxyProtocolHeader(clientAddr, targetAddr net.Addr) string {
|
||||||
clientTCP, ok := clientAddr.(*net.TCPAddr)
|
clientTCP, ok := clientAddr.(*net.TCPAddr)
|
||||||
@@ -131,7 +340,7 @@ func buildProxyProtocolHeader(clientAddr, targetAddr net.Addr) string {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// NewSNIProxy creates a new SNI proxy instance
|
// NewSNIProxy creates a new SNI proxy instance
|
||||||
func NewSNIProxy(port int, remoteConfigURL, publicKey, localProxyAddr string, localProxyPort int, localOverrides []string, proxyProtocol bool) (*SNIProxy, error) {
|
func NewSNIProxy(port int, remoteConfigURL, publicKey, localProxyAddr string, localProxyPort int, localOverrides []string, proxyProtocol bool, trustedUpstreams []string) (*SNIProxy, error) {
|
||||||
ctx, cancel := context.WithCancel(context.Background())
|
ctx, cancel := context.WithCancel(context.Background())
|
||||||
|
|
||||||
// Create local overrides map
|
// Create local overrides map
|
||||||
@@ -142,19 +351,50 @@ func NewSNIProxy(port int, remoteConfigURL, publicKey, localProxyAddr string, lo
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Create trusted upstreams map
|
||||||
|
trustedMap := make(map[string]struct{})
|
||||||
|
for _, upstream := range trustedUpstreams {
|
||||||
|
if upstream != "" {
|
||||||
|
// Add both the domain and potentially resolved IPs
|
||||||
|
trustedMap[upstream] = struct{}{}
|
||||||
|
|
||||||
|
// Try to resolve the domain to IPs and add them too
|
||||||
|
if ips, err := net.LookupIP(upstream); err == nil {
|
||||||
|
for _, ip := range ips {
|
||||||
|
trustedMap[ip.String()] = struct{}{}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
proxy := &SNIProxy{
|
proxy := &SNIProxy{
|
||||||
port: port,
|
port: port,
|
||||||
cache: cache.New(3*time.Second, 10*time.Minute),
|
cache: cache.New(3*time.Second, 10*time.Minute),
|
||||||
ctx: ctx,
|
ctx: ctx,
|
||||||
cancel: cancel,
|
cancel: cancel,
|
||||||
localProxyAddr: localProxyAddr,
|
localProxyAddr: localProxyAddr,
|
||||||
localProxyPort: localProxyPort,
|
localProxyPort: localProxyPort,
|
||||||
remoteConfigURL: remoteConfigURL,
|
remoteConfigURL: remoteConfigURL,
|
||||||
publicKey: publicKey,
|
publicKey: publicKey,
|
||||||
proxyProtocol: proxyProtocol,
|
proxyProtocol: proxyProtocol,
|
||||||
localSNIs: make(map[string]struct{}),
|
localSNIs: make(map[string]struct{}),
|
||||||
localOverrides: overridesMap,
|
localOverrides: overridesMap,
|
||||||
activeTunnels: make(map[string]*activeTunnel),
|
activeTunnels: make(map[string]*activeTunnel),
|
||||||
|
trustedUpstreams: trustedMap,
|
||||||
|
httpClient: &http.Client{
|
||||||
|
Timeout: 5 * time.Second,
|
||||||
|
Transport: &http.Transport{
|
||||||
|
MaxIdleConns: 100,
|
||||||
|
MaxIdleConnsPerHost: 10,
|
||||||
|
IdleConnTimeout: 90 * time.Second,
|
||||||
|
},
|
||||||
|
},
|
||||||
|
bufferPool: &sync.Pool{
|
||||||
|
New: func() interface{} {
|
||||||
|
buf := make([]byte, 32*1024)
|
||||||
|
return &buf
|
||||||
|
},
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
return proxy, nil
|
return proxy, nil
|
||||||
@@ -268,20 +508,47 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
|||||||
defer p.wg.Done()
|
defer p.wg.Done()
|
||||||
defer clientConn.Close()
|
defer clientConn.Close()
|
||||||
|
|
||||||
|
metrics.RecordSNIConnection("accepted")
|
||||||
|
|
||||||
logger.Debug("Accepted connection from %s", clientConn.RemoteAddr())
|
logger.Debug("Accepted connection from %s", clientConn.RemoteAddr())
|
||||||
|
|
||||||
|
// Check for PROXY protocol from trusted upstream
|
||||||
|
var proxyInfo *ProxyProtocolInfo
|
||||||
|
var actualClientConn net.Conn = clientConn
|
||||||
|
|
||||||
|
if len(p.trustedUpstreams) > 0 {
|
||||||
|
var err error
|
||||||
|
proxyInfo, actualClientConn, err = p.parseProxyProtocolHeader(clientConn)
|
||||||
|
if err != nil {
|
||||||
|
metrics.RecordSNIProxyProtocolParseError()
|
||||||
|
logger.Debug("Failed to parse PROXY protocol: %v", err)
|
||||||
|
return
|
||||||
|
}
|
||||||
|
if proxyInfo != nil {
|
||||||
|
metrics.RecordSNITrustedProxyEvent("proxy_protocol_parsed")
|
||||||
|
logger.Debug("Received PROXY protocol from trusted upstream: %s:%d -> %s:%d",
|
||||||
|
proxyInfo.SrcIP, proxyInfo.SrcPort, proxyInfo.DestIP, proxyInfo.DestPort)
|
||||||
|
} else {
|
||||||
|
// No PROXY protocol detected, but connection is from trusted upstream
|
||||||
|
// This is fine - treat as regular connection
|
||||||
|
logger.Debug("No PROXY protocol detected from trusted upstream, treating as regular connection")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Set read timeout for SNI extraction
|
// Set read timeout for SNI extraction
|
||||||
if err := clientConn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
if err := actualClientConn.SetReadDeadline(time.Now().Add(5 * time.Second)); err != nil {
|
||||||
logger.Debug("Failed to set read deadline: %v", err)
|
logger.Debug("Failed to set read deadline: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Extract SNI hostname
|
// Extract SNI hostname
|
||||||
hostname, clientReader, err := p.extractSNI(clientConn)
|
clientHelloStart := time.Now()
|
||||||
|
hostname, clientReader, err := p.extractSNI(actualClientConn)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Debug("SNI extraction failed: %v", err)
|
logger.Debug("SNI extraction failed: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
metrics.RecordProxyTLSHandshake(hostname, time.Since(clientHelloStart).Seconds())
|
||||||
|
|
||||||
if hostname == "" {
|
if hostname == "" {
|
||||||
log.Println("No SNI hostname found")
|
log.Println("No SNI hostname found")
|
||||||
@@ -291,13 +558,20 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
|||||||
logger.Debug("SNI hostname detected: %s", hostname)
|
logger.Debug("SNI hostname detected: %s", hostname)
|
||||||
|
|
||||||
// Remove read timeout for normal operation
|
// Remove read timeout for normal operation
|
||||||
if err := clientConn.SetReadDeadline(time.Time{}); err != nil {
|
if err := actualClientConn.SetReadDeadline(time.Time{}); err != nil {
|
||||||
logger.Debug("Failed to clear read deadline: %v", err)
|
logger.Debug("Failed to clear read deadline: %v", err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
// Get routing information
|
// Get routing information - use original client address if available from PROXY protocol
|
||||||
route, err := p.getRoute(hostname, clientConn.RemoteAddr().String())
|
var clientAddrStr string
|
||||||
|
if proxyInfo != nil {
|
||||||
|
clientAddrStr = fmt.Sprintf("%s:%d", proxyInfo.SrcIP, proxyInfo.SrcPort)
|
||||||
|
} else {
|
||||||
|
clientAddrStr = clientConn.RemoteAddr().String()
|
||||||
|
}
|
||||||
|
|
||||||
|
route, err := p.getRoute(hostname, clientAddrStr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Debug("Failed to get route for %s: %v", hostname, err)
|
logger.Debug("Failed to get route for %s: %v", hostname, err)
|
||||||
return
|
return
|
||||||
@@ -322,10 +596,19 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
|||||||
defer targetConn.Close()
|
defer targetConn.Close()
|
||||||
|
|
||||||
logger.Debug("Connected to target: %s:%d", route.TargetHost, route.TargetPort)
|
logger.Debug("Connected to target: %s:%d", route.TargetHost, route.TargetPort)
|
||||||
|
metrics.RecordActiveProxyConnection(hostname, 1)
|
||||||
|
defer metrics.RecordActiveProxyConnection(hostname, -1)
|
||||||
|
|
||||||
// Send PROXY protocol header if enabled
|
// Send PROXY protocol header if enabled
|
||||||
if p.proxyProtocol {
|
if p.proxyProtocol {
|
||||||
proxyHeader := buildProxyProtocolHeader(clientConn.RemoteAddr(), targetConn.LocalAddr())
|
var proxyHeader string
|
||||||
|
if proxyInfo != nil {
|
||||||
|
// Use original client info from PROXY protocol
|
||||||
|
proxyHeader = p.buildProxyProtocolHeaderFromInfo(proxyInfo, targetConn.LocalAddr())
|
||||||
|
} else {
|
||||||
|
// Use direct client connection info
|
||||||
|
proxyHeader = buildProxyProtocolHeader(clientConn.RemoteAddr(), targetConn.LocalAddr())
|
||||||
|
}
|
||||||
logger.Debug("Sending PROXY protocol header: %s", strings.TrimSpace(proxyHeader))
|
logger.Debug("Sending PROXY protocol header: %s", strings.TrimSpace(proxyHeader))
|
||||||
|
|
||||||
if _, err := targetConn.Write([]byte(proxyHeader)); err != nil {
|
if _, err := targetConn.Write([]byte(proxyHeader)); err != nil {
|
||||||
@@ -341,7 +624,7 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
|||||||
tunnel = &activeTunnel{}
|
tunnel = &activeTunnel{}
|
||||||
p.activeTunnels[hostname] = tunnel
|
p.activeTunnels[hostname] = tunnel
|
||||||
}
|
}
|
||||||
tunnel.conns = append(tunnel.conns, clientConn)
|
tunnel.conns = append(tunnel.conns, actualClientConn)
|
||||||
p.activeTunnelsLock.Unlock()
|
p.activeTunnelsLock.Unlock()
|
||||||
|
|
||||||
defer func() {
|
defer func() {
|
||||||
@@ -350,7 +633,7 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
|||||||
if tunnel, ok := p.activeTunnels[hostname]; ok {
|
if tunnel, ok := p.activeTunnels[hostname]; ok {
|
||||||
newConns := make([]net.Conn, 0, len(tunnel.conns))
|
newConns := make([]net.Conn, 0, len(tunnel.conns))
|
||||||
for _, c := range tunnel.conns {
|
for _, c := range tunnel.conns {
|
||||||
if c != clientConn {
|
if c != actualClientConn {
|
||||||
newConns = append(newConns, c)
|
newConns = append(newConns, c)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -364,7 +647,7 @@ func (p *SNIProxy) handleConnection(clientConn net.Conn) {
|
|||||||
}()
|
}()
|
||||||
|
|
||||||
// Start bidirectional data transfer
|
// Start bidirectional data transfer
|
||||||
p.pipe(clientConn, targetConn, clientReader)
|
p.pipe(hostname, actualClientConn, targetConn, clientReader)
|
||||||
}
|
}
|
||||||
|
|
||||||
// getRoute retrieves routing information for a hostname
|
// getRoute retrieves routing information for a hostname
|
||||||
@@ -372,6 +655,7 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
|||||||
// Check local overrides first
|
// Check local overrides first
|
||||||
if _, isOverride := p.localOverrides[hostname]; isOverride {
|
if _, isOverride := p.localOverrides[hostname]; isOverride {
|
||||||
logger.Debug("Local override matched for hostname: %s", hostname)
|
logger.Debug("Local override matched for hostname: %s", hostname)
|
||||||
|
metrics.RecordProxyRouteLookup("local_override", hostname)
|
||||||
return &RouteRecord{
|
return &RouteRecord{
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
TargetHost: p.localProxyAddr,
|
TargetHost: p.localProxyAddr,
|
||||||
@@ -384,6 +668,7 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
|||||||
_, isLocal := p.localSNIs[hostname]
|
_, isLocal := p.localSNIs[hostname]
|
||||||
p.localSNIsLock.RUnlock()
|
p.localSNIsLock.RUnlock()
|
||||||
if isLocal {
|
if isLocal {
|
||||||
|
metrics.RecordProxyRouteLookup("local", hostname)
|
||||||
return &RouteRecord{
|
return &RouteRecord{
|
||||||
Hostname: hostname,
|
Hostname: hostname,
|
||||||
TargetHost: p.localProxyAddr,
|
TargetHost: p.localProxyAddr,
|
||||||
@@ -394,13 +679,16 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
|||||||
// Check cache first
|
// Check cache first
|
||||||
if cached, found := p.cache.Get(hostname); found {
|
if cached, found := p.cache.Get(hostname); found {
|
||||||
if cached == nil {
|
if cached == nil {
|
||||||
|
metrics.RecordProxyRouteLookup("cached_not_found", hostname)
|
||||||
return nil, nil // Cached negative result
|
return nil, nil // Cached negative result
|
||||||
}
|
}
|
||||||
logger.Debug("Cache hit for hostname: %s", hostname)
|
logger.Debug("Cache hit for hostname: %s", hostname)
|
||||||
|
metrics.RecordProxyRouteLookup("cache_hit", hostname)
|
||||||
return cached.(*RouteRecord), nil
|
return cached.(*RouteRecord), nil
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.Debug("Cache miss for hostname: %s, querying API", hostname)
|
logger.Debug("Cache miss for hostname: %s, querying API", hostname)
|
||||||
|
metrics.RecordProxyRouteLookup("cache_miss", hostname)
|
||||||
|
|
||||||
// Query API with timeout
|
// Query API with timeout
|
||||||
ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
|
ctx, cancel := context.WithTimeout(p.ctx, 5*time.Second)
|
||||||
@@ -428,22 +716,28 @@ func (p *SNIProxy) getRoute(hostname, clientAddr string) (*RouteRecord, error) {
|
|||||||
req.Header.Set("Content-Type", "application/json")
|
req.Header.Set("Content-Type", "application/json")
|
||||||
|
|
||||||
// Make HTTP request
|
// Make HTTP request
|
||||||
client := &http.Client{Timeout: 5 * time.Second}
|
apiStart := time.Now()
|
||||||
resp, err := client.Do(req)
|
// Make HTTP request using reusable client
|
||||||
|
resp, err := p.httpClient.Do(req)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.RecordSNIRouteAPIRequest("error")
|
||||||
return nil, fmt.Errorf("API request failed: %w", err)
|
return nil, fmt.Errorf("API request failed: %w", err)
|
||||||
}
|
}
|
||||||
defer resp.Body.Close()
|
defer resp.Body.Close()
|
||||||
|
metrics.RecordSNIRouteAPILatency(time.Since(apiStart).Seconds())
|
||||||
|
|
||||||
if resp.StatusCode == http.StatusNotFound {
|
if resp.StatusCode == http.StatusNotFound {
|
||||||
|
metrics.RecordSNIRouteAPIRequest("not_found")
|
||||||
// Cache negative result for shorter time (1 minute)
|
// Cache negative result for shorter time (1 minute)
|
||||||
p.cache.Set(hostname, nil, 1*time.Minute)
|
p.cache.Set(hostname, nil, 1*time.Minute)
|
||||||
return nil, nil
|
return nil, nil
|
||||||
}
|
}
|
||||||
|
|
||||||
if resp.StatusCode != http.StatusOK {
|
if resp.StatusCode != http.StatusOK {
|
||||||
|
metrics.RecordSNIRouteAPIRequest("error")
|
||||||
return nil, fmt.Errorf("API returned status %d", resp.StatusCode)
|
return nil, fmt.Errorf("API returned status %d", resp.StatusCode)
|
||||||
}
|
}
|
||||||
|
metrics.RecordSNIRouteAPIRequest("success")
|
||||||
|
|
||||||
// Parse response
|
// Parse response
|
||||||
var apiResponse RouteAPIResponse
|
var apiResponse RouteAPIResponse
|
||||||
@@ -500,22 +794,35 @@ func (p *SNIProxy) selectStickyEndpoint(clientAddr string, endpoints []string) s
|
|||||||
}
|
}
|
||||||
|
|
||||||
// pipe handles bidirectional data transfer between connections
|
// pipe handles bidirectional data transfer between connections
|
||||||
func (p *SNIProxy) pipe(clientConn, targetConn net.Conn, clientReader io.Reader) {
|
func (p *SNIProxy) pipe(hostname string, clientConn, targetConn net.Conn, clientReader io.Reader) {
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
wg.Add(2)
|
wg.Add(2)
|
||||||
|
|
||||||
|
// closeOnce ensures we only close connections once
|
||||||
|
var closeOnce sync.Once
|
||||||
|
closeConns := func() {
|
||||||
|
closeOnce.Do(func() {
|
||||||
|
// Close both connections to unblock any pending reads
|
||||||
|
clientConn.Close()
|
||||||
|
targetConn.Close()
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
// Copy data from client to target (using the buffered reader)
|
// Copy data from client to target (using the buffered reader)
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
defer closeConns()
|
||||||
|
|
||||||
|
// Get buffer from pool and return when done
|
||||||
|
bufPtr := p.bufferPool.Get().(*[]byte)
|
||||||
defer func() {
|
defer func() {
|
||||||
if tcpConn, ok := targetConn.(*net.TCPConn); ok {
|
// Clear buffer before returning to pool to prevent data leakage
|
||||||
tcpConn.CloseWrite()
|
clear(*bufPtr)
|
||||||
}
|
p.bufferPool.Put(bufPtr)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Use a large buffer for better performance
|
bytesCopied, err := io.CopyBuffer(targetConn, clientReader, *bufPtr)
|
||||||
buf := make([]byte, 32*1024)
|
metrics.RecordProxyBytesTransmitted(hostname, "client_to_target", bytesCopied)
|
||||||
_, err := io.CopyBuffer(targetConn, clientReader, buf)
|
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
logger.Debug("Copy client->target error: %v", err)
|
logger.Debug("Copy client->target error: %v", err)
|
||||||
}
|
}
|
||||||
@@ -524,15 +831,18 @@ func (p *SNIProxy) pipe(clientConn, targetConn net.Conn, clientReader io.Reader)
|
|||||||
// Copy data from target to client
|
// Copy data from target to client
|
||||||
go func() {
|
go func() {
|
||||||
defer wg.Done()
|
defer wg.Done()
|
||||||
|
defer closeConns()
|
||||||
|
|
||||||
|
// Get buffer from pool and return when done
|
||||||
|
bufPtr := p.bufferPool.Get().(*[]byte)
|
||||||
defer func() {
|
defer func() {
|
||||||
if tcpConn, ok := clientConn.(*net.TCPConn); ok {
|
// Clear buffer before returning to pool to prevent data leakage
|
||||||
tcpConn.CloseWrite()
|
clear(*bufPtr)
|
||||||
}
|
p.bufferPool.Put(bufPtr)
|
||||||
}()
|
}()
|
||||||
|
|
||||||
// Use a large buffer for better performance
|
bytesCopied, err := io.CopyBuffer(clientConn, targetConn, *bufPtr)
|
||||||
buf := make([]byte, 32*1024)
|
metrics.RecordProxyBytesTransmitted(hostname, "target_to_client", bytesCopied)
|
||||||
_, err := io.CopyBuffer(clientConn, targetConn, buf)
|
|
||||||
if err != nil && err != io.EOF {
|
if err != nil && err != io.EOF {
|
||||||
logger.Debug("Copy target->client error: %v", err)
|
logger.Debug("Copy target->client error: %v", err)
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -76,3 +76,44 @@ func TestBuildProxyProtocolHeaderUnknownType(t *testing.T) {
|
|||||||
t.Errorf("Expected %q, got %q", expected, result)
|
t.Errorf("Expected %q, got %q", expected, result)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
func TestBuildProxyProtocolHeaderFromInfo(t *testing.T) {
|
||||||
|
proxy, err := NewSNIProxy(8443, "", "", "127.0.0.1", 443, nil, true, nil)
|
||||||
|
if err != nil {
|
||||||
|
t.Fatalf("Failed to create SNI proxy: %v", err)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test IPv4 case
|
||||||
|
proxyInfo := &ProxyProtocolInfo{
|
||||||
|
Protocol: "TCP4",
|
||||||
|
SrcIP: "10.0.0.1",
|
||||||
|
DestIP: "192.168.1.100",
|
||||||
|
SrcPort: 12345,
|
||||||
|
DestPort: 443,
|
||||||
|
}
|
||||||
|
|
||||||
|
targetAddr, _ := net.ResolveTCPAddr("tcp", "127.0.0.1:8080")
|
||||||
|
header := proxy.buildProxyProtocolHeaderFromInfo(proxyInfo, targetAddr)
|
||||||
|
|
||||||
|
expected := "PROXY TCP4 10.0.0.1 127.0.0.1 12345 8080\r\n"
|
||||||
|
if header != expected {
|
||||||
|
t.Errorf("Expected header '%s', got '%s'", expected, header)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Test IPv6 case
|
||||||
|
proxyInfo = &ProxyProtocolInfo{
|
||||||
|
Protocol: "TCP6",
|
||||||
|
SrcIP: "2001:db8::1",
|
||||||
|
DestIP: "2001:db8::2",
|
||||||
|
SrcPort: 12345,
|
||||||
|
DestPort: 443,
|
||||||
|
}
|
||||||
|
|
||||||
|
targetAddr, _ = net.ResolveTCPAddr("tcp6", "[::1]:8080")
|
||||||
|
header = proxy.buildProxyProtocolHeaderFromInfo(proxyInfo, targetAddr)
|
||||||
|
|
||||||
|
expected = "PROXY TCP6 2001:db8::1 ::1 12345 8080\r\n"
|
||||||
|
if header != expected {
|
||||||
|
t.Errorf("Expected header '%s', got '%s'", expected, header)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
598
relay/relay.go
598
relay/relay.go
@@ -2,21 +2,26 @@ package relay
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"bytes"
|
"bytes"
|
||||||
|
"context"
|
||||||
"encoding/binary"
|
"encoding/binary"
|
||||||
"encoding/json"
|
"encoding/json"
|
||||||
"fmt"
|
"fmt"
|
||||||
"io"
|
"io"
|
||||||
"net"
|
"net"
|
||||||
"net/http"
|
"net/http"
|
||||||
|
"runtime"
|
||||||
"sync"
|
"sync"
|
||||||
"time"
|
"time"
|
||||||
|
|
||||||
|
"github.com/fosrl/gerbil/internal/metrics"
|
||||||
"github.com/fosrl/gerbil/logger"
|
"github.com/fosrl/gerbil/logger"
|
||||||
"golang.org/x/crypto/chacha20poly1305"
|
"golang.org/x/crypto/chacha20poly1305"
|
||||||
"golang.org/x/crypto/curve25519"
|
"golang.org/x/crypto/curve25519"
|
||||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||||
)
|
)
|
||||||
|
|
||||||
|
const relayIfname = "relay"
|
||||||
|
|
||||||
type EncryptedHolePunchMessage struct {
|
type EncryptedHolePunchMessage struct {
|
||||||
EphemeralPublicKey string `json:"ephemeralPublicKey"`
|
EphemeralPublicKey string `json:"ephemeralPublicKey"`
|
||||||
Nonce []byte `json:"nonce"`
|
Nonce []byte `json:"nonce"`
|
||||||
@@ -24,20 +29,22 @@ type EncryptedHolePunchMessage struct {
|
|||||||
}
|
}
|
||||||
|
|
||||||
type HolePunchMessage struct {
|
type HolePunchMessage struct {
|
||||||
OlmID string `json:"olmId"`
|
OlmID string `json:"olmId"`
|
||||||
NewtID string `json:"newtId"`
|
NewtID string `json:"newtId"`
|
||||||
Token string `json:"token"`
|
Token string `json:"token"`
|
||||||
|
PublicKey string `json:"publicKey"`
|
||||||
}
|
}
|
||||||
|
|
||||||
type ClientEndpoint struct {
|
type ClientEndpoint struct {
|
||||||
OlmID string `json:"olmId"`
|
OlmID string `json:"olmId"`
|
||||||
NewtID string `json:"newtId"`
|
NewtID string `json:"newtId"`
|
||||||
Token string `json:"token"`
|
Token string `json:"token"`
|
||||||
IP string `json:"ip"`
|
IP string `json:"ip"`
|
||||||
Port int `json:"port"`
|
Port int `json:"port"`
|
||||||
Timestamp int64 `json:"timestamp"`
|
Timestamp int64 `json:"timestamp"`
|
||||||
ReachableAt string `json:"reachableAt"`
|
ReachableAt string `json:"reachableAt"`
|
||||||
PublicKey string `json:"publicKey"`
|
ExitNodePublicKey string `json:"exitNodePublicKey"`
|
||||||
|
ClientPublicKey string `json:"publicKey"`
|
||||||
}
|
}
|
||||||
|
|
||||||
// Updated to support multiple destination peers
|
// Updated to support multiple destination peers
|
||||||
@@ -58,12 +65,52 @@ type DestinationConn struct {
|
|||||||
|
|
||||||
// Type for storing WireGuard handshake information
|
// Type for storing WireGuard handshake information
|
||||||
type WireGuardSession struct {
|
type WireGuardSession struct {
|
||||||
|
mu sync.RWMutex
|
||||||
ReceiverIndex uint32
|
ReceiverIndex uint32
|
||||||
SenderIndex uint32
|
SenderIndex uint32
|
||||||
DestAddr *net.UDPAddr
|
DestAddr *net.UDPAddr
|
||||||
LastSeen time.Time
|
LastSeen time.Time
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// GetSenderIndex returns the SenderIndex in a thread-safe manner
|
||||||
|
func (s *WireGuardSession) GetSenderIndex() uint32 {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
return s.SenderIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetDestAddr returns the DestAddr in a thread-safe manner
|
||||||
|
func (s *WireGuardSession) GetDestAddr() *net.UDPAddr {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
return s.DestAddr
|
||||||
|
}
|
||||||
|
|
||||||
|
// GetLastSeen returns the LastSeen timestamp in a thread-safe manner
|
||||||
|
func (s *WireGuardSession) GetLastSeen() time.Time {
|
||||||
|
s.mu.RLock()
|
||||||
|
defer s.mu.RUnlock()
|
||||||
|
return s.LastSeen
|
||||||
|
}
|
||||||
|
|
||||||
|
// UpdateLastSeen updates the LastSeen timestamp in a thread-safe manner
|
||||||
|
func (s *WireGuardSession) UpdateLastSeen() {
|
||||||
|
s.mu.Lock()
|
||||||
|
defer s.mu.Unlock()
|
||||||
|
s.LastSeen = time.Now()
|
||||||
|
}
|
||||||
|
|
||||||
|
// Type for tracking bidirectional communication patterns to rebuild sessions
|
||||||
|
type CommunicationPattern struct {
|
||||||
|
FromClient *net.UDPAddr // The client address
|
||||||
|
ToDestination *net.UDPAddr // The destination address
|
||||||
|
ClientIndex uint32 // The receiver index seen from client
|
||||||
|
DestIndex uint32 // The receiver index seen from destination
|
||||||
|
LastFromClient time.Time // Last packet from client to destination
|
||||||
|
LastFromDest time.Time // Last packet from destination to client
|
||||||
|
PacketCount int // Number of packets observed
|
||||||
|
}
|
||||||
|
|
||||||
type InitialMappings struct {
|
type InitialMappings struct {
|
||||||
Mappings map[string]ProxyMapping `json:"mappings"` // key is "ip:port"
|
Mappings map[string]ProxyMapping `json:"mappings"` // key is "ip:port"
|
||||||
}
|
}
|
||||||
@@ -75,6 +122,13 @@ type Packet struct {
|
|||||||
n int
|
n int
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// holePunchRateLimitEntry tracks hole punch message counts within a sliding 1-second window.
|
||||||
|
type holePunchRateLimitEntry struct {
|
||||||
|
mu sync.Mutex
|
||||||
|
count int
|
||||||
|
windowStart time.Time
|
||||||
|
}
|
||||||
|
|
||||||
// WireGuard message types
|
// WireGuard message types
|
||||||
const (
|
const (
|
||||||
WireGuardMessageTypeHandshakeInitiation = 1
|
WireGuardMessageTypeHandshakeInitiation = 1
|
||||||
@@ -101,22 +155,38 @@ type UDPProxyServer struct {
|
|||||||
connections sync.Map // map[string]*DestinationConn where key is destination "ip:port"
|
connections sync.Map // map[string]*DestinationConn where key is destination "ip:port"
|
||||||
privateKey wgtypes.Key
|
privateKey wgtypes.Key
|
||||||
packetChan chan Packet
|
packetChan chan Packet
|
||||||
|
ctx context.Context
|
||||||
|
cancel context.CancelFunc
|
||||||
|
|
||||||
// Session tracking for WireGuard peers
|
// Session tracking for WireGuard peers
|
||||||
// Key format: "senderIndex:receiverIndex"
|
// Key format: "senderIndex:receiverIndex"
|
||||||
wgSessions sync.Map
|
wgSessions sync.Map
|
||||||
|
// Session index for O(1) lookup by receiver index
|
||||||
|
// Key: receiverIndex (uint32), Value: *WireGuardSession
|
||||||
|
sessionsByReceiverIndex sync.Map
|
||||||
|
// Communication pattern tracking for rebuilding sessions
|
||||||
|
// Key format: "clientIP:clientPort-destIP:destPort"
|
||||||
|
commPatterns sync.Map
|
||||||
|
// Rate limiter for encrypted hole punch messages, keyed by "ip:port"
|
||||||
|
holePunchRateLimiter sync.Map
|
||||||
|
// Cache for resolved UDP addresses to avoid per-packet DNS lookups
|
||||||
|
// Key: "ip:port" string, Value: *net.UDPAddr
|
||||||
|
addrCache sync.Map
|
||||||
// ReachableAt is the URL where this server can be reached
|
// ReachableAt is the URL where this server can be reached
|
||||||
ReachableAt string
|
ReachableAt string
|
||||||
}
|
}
|
||||||
|
|
||||||
// NewUDPProxyServer initializes the server with a buffered packet channel.
|
// NewUDPProxyServer initializes the server with a buffered packet channel and derived context.
|
||||||
func NewUDPProxyServer(addr, serverURL string, privateKey wgtypes.Key, reachableAt string) *UDPProxyServer {
|
func NewUDPProxyServer(parentCtx context.Context, addr, serverURL string, privateKey wgtypes.Key, reachableAt string) *UDPProxyServer {
|
||||||
|
ctx, cancel := context.WithCancel(parentCtx)
|
||||||
return &UDPProxyServer{
|
return &UDPProxyServer{
|
||||||
addr: addr,
|
addr: addr,
|
||||||
serverURL: serverURL,
|
serverURL: serverURL,
|
||||||
privateKey: privateKey,
|
privateKey: privateKey,
|
||||||
packetChan: make(chan Packet, 1000),
|
packetChan: make(chan Packet, 50000), // Increased from 1000 to handle high throughput
|
||||||
ReachableAt: reachableAt,
|
ReachableAt: reachableAt,
|
||||||
|
ctx: ctx,
|
||||||
|
cancel: cancel,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -138,8 +208,13 @@ func (s *UDPProxyServer) Start() error {
|
|||||||
s.conn = conn
|
s.conn = conn
|
||||||
logger.Info("UDP server listening on %s", s.addr)
|
logger.Info("UDP server listening on %s", s.addr)
|
||||||
|
|
||||||
// Start a fixed number of worker goroutines.
|
// Start worker goroutines based on CPU cores for better parallelism
|
||||||
workerCount := 10 // TODO: Make this configurable or pick it better!
|
// At high throughput (160+ Mbps), we need many workers to avoid bottlenecks
|
||||||
|
workerCount := runtime.NumCPU() * 10
|
||||||
|
if workerCount < 20 {
|
||||||
|
workerCount = 20 // Minimum 20 workers
|
||||||
|
}
|
||||||
|
logger.Info("Starting %d packet workers (CPUs: %d)", workerCount, runtime.NumCPU())
|
||||||
for i := 0; i < workerCount; i++ {
|
for i := 0; i < workerCount; i++ {
|
||||||
go s.packetWorker()
|
go s.packetWorker()
|
||||||
}
|
}
|
||||||
@@ -156,21 +231,61 @@ func (s *UDPProxyServer) Start() error {
|
|||||||
// Start the proxy mapping cleanup routine
|
// Start the proxy mapping cleanup routine
|
||||||
go s.cleanupIdleProxyMappings()
|
go s.cleanupIdleProxyMappings()
|
||||||
|
|
||||||
|
// Start the communication pattern cleanup routine
|
||||||
|
go s.cleanupIdleCommunicationPatterns()
|
||||||
|
|
||||||
|
// Start the hole punch rate limiter cleanup routine
|
||||||
|
go s.cleanupHolePunchRateLimiter()
|
||||||
|
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
|
|
||||||
func (s *UDPProxyServer) Stop() {
|
func (s *UDPProxyServer) Stop() {
|
||||||
s.conn.Close()
|
// Signal all background goroutines to stop
|
||||||
|
if s.cancel != nil {
|
||||||
|
s.cancel()
|
||||||
|
}
|
||||||
|
// Close listener to unblock reads
|
||||||
|
if s.conn != nil {
|
||||||
|
_ = s.conn.Close()
|
||||||
|
}
|
||||||
|
// Close all downstream UDP connections
|
||||||
|
s.connections.Range(func(key, value interface{}) bool {
|
||||||
|
if dc, ok := value.(*DestinationConn); ok && dc.conn != nil {
|
||||||
|
_ = dc.conn.Close()
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
// Close packet channel to stop workers
|
||||||
|
select {
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
default:
|
||||||
|
}
|
||||||
|
close(s.packetChan)
|
||||||
}
|
}
|
||||||
|
|
||||||
// readPackets continuously reads from the UDP socket and pushes packets into the channel.
|
// readPackets continuously reads from the UDP socket and pushes packets into the channel.
|
||||||
func (s *UDPProxyServer) readPackets() {
|
func (s *UDPProxyServer) readPackets() {
|
||||||
for {
|
for {
|
||||||
|
// Exit promptly if context is canceled
|
||||||
|
select {
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
}
|
||||||
buf := bufferPool.Get().([]byte)
|
buf := bufferPool.Get().([]byte)
|
||||||
n, remoteAddr, err := s.conn.ReadFromUDP(buf)
|
n, remoteAddr, err := s.conn.ReadFromUDP(buf)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Error reading UDP packet: %v", err)
|
// If we're shutting down, exit
|
||||||
continue
|
select {
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
bufferPool.Put(buf[:1500])
|
||||||
|
return
|
||||||
|
default:
|
||||||
|
logger.Error("Error reading UDP packet: %v", err)
|
||||||
|
bufferPool.Put(buf[:1500])
|
||||||
|
continue
|
||||||
|
}
|
||||||
}
|
}
|
||||||
s.packetChan <- Packet{data: buf[:n], remoteAddr: remoteAddr, n: n}
|
s.packetChan <- Packet{data: buf[:n], remoteAddr: remoteAddr, n: n}
|
||||||
}
|
}
|
||||||
@@ -181,13 +296,40 @@ func (s *UDPProxyServer) packetWorker() {
|
|||||||
for packet := range s.packetChan {
|
for packet := range s.packetChan {
|
||||||
// Determine packet type by inspecting the first byte.
|
// Determine packet type by inspecting the first byte.
|
||||||
if packet.n > 0 && packet.data[0] >= 1 && packet.data[0] <= 4 {
|
if packet.n > 0 && packet.data[0] >= 1 && packet.data[0] <= 4 {
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "in")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(packet.n))
|
||||||
// Process as a WireGuard packet.
|
// Process as a WireGuard packet.
|
||||||
s.handleWireGuardPacket(packet.data, packet.remoteAddr)
|
s.handleWireGuardPacket(packet.data, packet.remoteAddr)
|
||||||
} else {
|
} else {
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "hole_punch", "in")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "hole_punch", float64(packet.n))
|
||||||
|
// Rate limit: allow at most 2 hole punch messages per IP:Port per second
|
||||||
|
rateLimitKey := packet.remoteAddr.String()
|
||||||
|
entryVal, _ := s.holePunchRateLimiter.LoadOrStore(rateLimitKey, &holePunchRateLimitEntry{
|
||||||
|
windowStart: time.Now(),
|
||||||
|
})
|
||||||
|
rlEntry := entryVal.(*holePunchRateLimitEntry)
|
||||||
|
rlEntry.mu.Lock()
|
||||||
|
now := time.Now()
|
||||||
|
if now.Sub(rlEntry.windowStart) >= time.Second {
|
||||||
|
rlEntry.count = 0
|
||||||
|
rlEntry.windowStart = now
|
||||||
|
}
|
||||||
|
rlEntry.count++
|
||||||
|
allowed := rlEntry.count <= 2
|
||||||
|
rlEntry.mu.Unlock()
|
||||||
|
if !allowed {
|
||||||
|
// logger.Debug("Rate limiting hole punch message from %s", rateLimitKey)
|
||||||
|
metrics.RecordHolePunchEvent(relayIfname, "rate_limited")
|
||||||
|
bufferPool.Put(packet.data[:1500])
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
|
||||||
// Process as an encrypted hole punch message
|
// Process as an encrypted hole punch message
|
||||||
var encMsg EncryptedHolePunchMessage
|
var encMsg EncryptedHolePunchMessage
|
||||||
if err := json.Unmarshal(packet.data, &encMsg); err != nil {
|
if err := json.Unmarshal(packet.data, &encMsg); err != nil {
|
||||||
logger.Error("Error unmarshaling encrypted message: %v", err)
|
logger.Error("Error unmarshaling encrypted message: %v", err)
|
||||||
|
metrics.RecordHolePunchEvent(relayIfname, "error")
|
||||||
// Return the buffer to the pool for reuse and continue with next packet
|
// Return the buffer to the pool for reuse and continue with next packet
|
||||||
bufferPool.Put(packet.data[:1500])
|
bufferPool.Put(packet.data[:1500])
|
||||||
continue
|
continue
|
||||||
@@ -195,6 +337,7 @@ func (s *UDPProxyServer) packetWorker() {
|
|||||||
|
|
||||||
if encMsg.EphemeralPublicKey == "" {
|
if encMsg.EphemeralPublicKey == "" {
|
||||||
logger.Error("Received malformed message without ephemeral key")
|
logger.Error("Received malformed message without ephemeral key")
|
||||||
|
metrics.RecordHolePunchEvent(relayIfname, "error")
|
||||||
// Return the buffer to the pool for reuse and continue with next packet
|
// Return the buffer to the pool for reuse and continue with next packet
|
||||||
bufferPool.Put(packet.data[:1500])
|
bufferPool.Put(packet.data[:1500])
|
||||||
continue
|
continue
|
||||||
@@ -203,7 +346,8 @@ func (s *UDPProxyServer) packetWorker() {
|
|||||||
// This appears to be an encrypted message
|
// This appears to be an encrypted message
|
||||||
decryptedData, err := s.decryptMessage(encMsg)
|
decryptedData, err := s.decryptMessage(encMsg)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to decrypt message: %v", err)
|
// logger.Error("Failed to decrypt message: %v", err)
|
||||||
|
metrics.RecordHolePunchEvent(relayIfname, "error")
|
||||||
// Return the buffer to the pool for reuse and continue with next packet
|
// Return the buffer to the pool for reuse and continue with next packet
|
||||||
bufferPool.Put(packet.data[:1500])
|
bufferPool.Put(packet.data[:1500])
|
||||||
continue
|
continue
|
||||||
@@ -213,24 +357,27 @@ func (s *UDPProxyServer) packetWorker() {
|
|||||||
var msg HolePunchMessage
|
var msg HolePunchMessage
|
||||||
if err := json.Unmarshal(decryptedData, &msg); err != nil {
|
if err := json.Unmarshal(decryptedData, &msg); err != nil {
|
||||||
logger.Error("Error unmarshaling decrypted message: %v", err)
|
logger.Error("Error unmarshaling decrypted message: %v", err)
|
||||||
|
metrics.RecordHolePunchEvent(relayIfname, "error")
|
||||||
// Return the buffer to the pool for reuse and continue with next packet
|
// Return the buffer to the pool for reuse and continue with next packet
|
||||||
bufferPool.Put(packet.data[:1500])
|
bufferPool.Put(packet.data[:1500])
|
||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
endpoint := ClientEndpoint{
|
endpoint := ClientEndpoint{
|
||||||
NewtID: msg.NewtID,
|
NewtID: msg.NewtID,
|
||||||
OlmID: msg.OlmID,
|
OlmID: msg.OlmID,
|
||||||
Token: msg.Token,
|
Token: msg.Token,
|
||||||
IP: packet.remoteAddr.IP.String(),
|
IP: packet.remoteAddr.IP.String(),
|
||||||
Port: packet.remoteAddr.Port,
|
Port: packet.remoteAddr.Port,
|
||||||
Timestamp: time.Now().Unix(),
|
Timestamp: time.Now().Unix(),
|
||||||
ReachableAt: s.ReachableAt,
|
ReachableAt: s.ReachableAt,
|
||||||
PublicKey: s.privateKey.PublicKey().String(),
|
ExitNodePublicKey: s.privateKey.PublicKey().String(),
|
||||||
|
ClientPublicKey: msg.PublicKey,
|
||||||
}
|
}
|
||||||
logger.Debug("Created endpoint from packet remoteAddr %s: IP=%s, Port=%d", packet.remoteAddr.String(), endpoint.IP, endpoint.Port)
|
logger.Debug("Created endpoint from packet remoteAddr %s: IP=%s, Port=%d", packet.remoteAddr.String(), endpoint.IP, endpoint.Port)
|
||||||
s.notifyServer(endpoint)
|
s.notifyServer(endpoint)
|
||||||
s.clearSessionsForIP(endpoint.IP) // Clear sessions for this IP to allow re-establishment
|
s.clearSessionsForIP(endpoint.IP) // Clear sessions for this IP to allow re-establishment
|
||||||
|
metrics.RecordHolePunchEvent(relayIfname, "success")
|
||||||
}
|
}
|
||||||
// Return the buffer to the pool for reuse.
|
// Return the buffer to the pool for reuse.
|
||||||
bufferPool.Put(packet.data[:1500])
|
bufferPool.Put(packet.data[:1500])
|
||||||
@@ -298,6 +445,8 @@ func (s *UDPProxyServer) fetchInitialMappings() error {
|
|||||||
mapping.LastUsed = time.Now()
|
mapping.LastUsed = time.Now()
|
||||||
s.proxyMappings.Store(key, mapping)
|
s.proxyMappings.Store(key, mapping)
|
||||||
}
|
}
|
||||||
|
metrics.RecordProxyInitialMappings(relayIfname, int64(len(initialMappings.Mappings)))
|
||||||
|
metrics.RecordProxyMapping(relayIfname, int64(len(initialMappings.Mappings)))
|
||||||
logger.Info("Loaded %d initial proxy mappings", len(initialMappings.Mappings))
|
logger.Info("Loaded %d initial proxy mappings", len(initialMappings.Mappings))
|
||||||
return nil
|
return nil
|
||||||
}
|
}
|
||||||
@@ -327,6 +476,43 @@ func extractWireGuardIndices(packet []byte) (uint32, uint32, bool) {
|
|||||||
return 0, 0, false
|
return 0, 0, false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// cachedAddr holds a resolved UDP address with TTL
|
||||||
|
type cachedAddr struct {
|
||||||
|
addr *net.UDPAddr
|
||||||
|
expiresAt time.Time
|
||||||
|
}
|
||||||
|
|
||||||
|
// addrCacheTTL is how long resolved addresses are cached before re-resolving
|
||||||
|
const addrCacheTTL = 5 * time.Minute
|
||||||
|
|
||||||
|
// getCachedAddr returns a cached UDP address or resolves and caches it.
|
||||||
|
// This avoids per-packet DNS lookups which are a major throughput bottleneck.
|
||||||
|
func (s *UDPProxyServer) getCachedAddr(ip string, port int) (*net.UDPAddr, error) {
|
||||||
|
key := fmt.Sprintf("%s:%d", ip, port)
|
||||||
|
|
||||||
|
// Check cache first
|
||||||
|
if cached, ok := s.addrCache.Load(key); ok {
|
||||||
|
entry := cached.(*cachedAddr)
|
||||||
|
if time.Now().Before(entry.expiresAt) {
|
||||||
|
return entry.addr, nil
|
||||||
|
}
|
||||||
|
// Cache expired, delete and re-resolve
|
||||||
|
s.addrCache.Delete(key)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Resolve and cache
|
||||||
|
addr, err := net.ResolveUDPAddr("udp", key)
|
||||||
|
if err != nil {
|
||||||
|
return nil, err
|
||||||
|
}
|
||||||
|
|
||||||
|
s.addrCache.Store(key, &cachedAddr{
|
||||||
|
addr: addr,
|
||||||
|
expiresAt: time.Now().Add(addrCacheTTL),
|
||||||
|
})
|
||||||
|
return addr, nil
|
||||||
|
}
|
||||||
|
|
||||||
// Updated to handle multi-peer WireGuard communication
|
// Updated to handle multi-peer WireGuard communication
|
||||||
func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UDPAddr) {
|
func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UDPAddr) {
|
||||||
if len(packet) == 0 {
|
if len(packet) == 0 {
|
||||||
@@ -361,7 +547,7 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
logger.Debug("Forwarding handshake initiation from %s (sender index: %d) to peers %v", remoteAddr, senderIndex, proxyMapping.Destinations)
|
logger.Debug("Forwarding handshake initiation from %s (sender index: %d) to peers %v", remoteAddr, senderIndex, proxyMapping.Destinations)
|
||||||
|
|
||||||
for _, dest := range proxyMapping.Destinations {
|
for _, dest := range proxyMapping.Destinations {
|
||||||
destAddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", dest.DestinationIP, dest.DestinationPort))
|
destAddr, err := s.getCachedAddr(dest.DestinationIP, dest.DestinationPort)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to resolve destination address: %v", err)
|
logger.Error("Failed to resolve destination address: %v", err)
|
||||||
continue
|
continue
|
||||||
@@ -375,8 +561,12 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
|
|
||||||
_, err = conn.Write(packet)
|
_, err = conn.Write(packet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to forward handshake initiation: %v", err)
|
logger.Debug("Failed to forward handshake initiation: %v", err)
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "write_udp")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "out")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(len(packet)))
|
||||||
}
|
}
|
||||||
|
|
||||||
case WireGuardMessageTypeHandshakeResponse:
|
case WireGuardMessageTypeHandshakeResponse:
|
||||||
@@ -388,16 +578,23 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
sessionKey := fmt.Sprintf("%d:%d", receiverIndex, senderIndex)
|
sessionKey := fmt.Sprintf("%d:%d", receiverIndex, senderIndex)
|
||||||
|
|
||||||
// Store the session information
|
// Store the session information
|
||||||
s.wgSessions.Store(sessionKey, &WireGuardSession{
|
session := &WireGuardSession{
|
||||||
ReceiverIndex: receiverIndex,
|
ReceiverIndex: receiverIndex,
|
||||||
SenderIndex: senderIndex,
|
SenderIndex: senderIndex,
|
||||||
DestAddr: remoteAddr,
|
DestAddr: remoteAddr,
|
||||||
LastSeen: time.Now(),
|
LastSeen: time.Now(),
|
||||||
})
|
}
|
||||||
|
if _, loaded := s.wgSessions.LoadOrStore(sessionKey, session); loaded {
|
||||||
|
s.wgSessions.Store(sessionKey, session)
|
||||||
|
} else {
|
||||||
|
metrics.RecordSession(relayIfname, 1)
|
||||||
|
}
|
||||||
|
// Also index by sender index for O(1) lookup in transport data path
|
||||||
|
s.sessionsByReceiverIndex.Store(senderIndex, session)
|
||||||
|
|
||||||
// Forward the response to the original sender
|
// Forward the response to the original sender
|
||||||
for _, dest := range proxyMapping.Destinations {
|
for _, dest := range proxyMapping.Destinations {
|
||||||
destAddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", dest.DestinationIP, dest.DestinationPort))
|
destAddr, err := s.getCachedAddr(dest.DestinationIP, dest.DestinationPort)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to resolve destination address: %v", err)
|
logger.Error("Failed to resolve destination address: %v", err)
|
||||||
continue
|
continue
|
||||||
@@ -412,30 +609,26 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
_, err = conn.Write(packet)
|
_, err = conn.Write(packet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to forward handshake response: %v", err)
|
logger.Error("Failed to forward handshake response: %v", err)
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "write_udp")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "out")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(len(packet)))
|
||||||
}
|
}
|
||||||
|
|
||||||
case WireGuardMessageTypeTransportData:
|
case WireGuardMessageTypeTransportData:
|
||||||
// Data packet: forward only to the established session peer
|
// Data packet: forward only to the established session peer
|
||||||
// logger.Debug("Received transport data with receiver index %d from %s", receiverIndex, remoteAddr)
|
// logger.Debug("Received transport data with receiver index %d from %s", receiverIndex, remoteAddr)
|
||||||
|
|
||||||
// Look up the session based on the receiver index
|
// Look up the session based on the receiver index - O(1) lookup instead of O(n) Range
|
||||||
var destAddr *net.UDPAddr
|
var destAddr *net.UDPAddr
|
||||||
|
|
||||||
// First check for existing sessions to see if we know where to send this packet
|
// Fast path: direct index lookup by receiver index
|
||||||
s.wgSessions.Range(func(k, v interface{}) bool {
|
if sessionObj, ok := s.sessionsByReceiverIndex.Load(receiverIndex); ok {
|
||||||
session := v.(*WireGuardSession)
|
session := sessionObj.(*WireGuardSession)
|
||||||
if session.SenderIndex == receiverIndex {
|
destAddr = session.GetDestAddr()
|
||||||
// Found matching session
|
session.UpdateLastSeen()
|
||||||
destAddr = session.DestAddr
|
}
|
||||||
|
|
||||||
// Update last seen time
|
|
||||||
session.LastSeen = time.Now()
|
|
||||||
s.wgSessions.Store(k, session)
|
|
||||||
return false // stop iteration
|
|
||||||
}
|
|
||||||
return true // continue iteration
|
|
||||||
})
|
|
||||||
|
|
||||||
if destAddr != nil {
|
if destAddr != nil {
|
||||||
// We found a specific peer to forward to
|
// We found a specific peer to forward to
|
||||||
@@ -445,15 +638,22 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track communication pattern for session rebuilding
|
||||||
|
s.trackCommunicationPattern(remoteAddr, destAddr, receiverIndex, true)
|
||||||
|
|
||||||
_, err = conn.Write(packet)
|
_, err = conn.Write(packet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Debug("Failed to forward transport data: %v", err)
|
logger.Debug("Failed to forward transport data: %v", err)
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "write_udp")
|
||||||
|
return
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "out")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(len(packet)))
|
||||||
} else {
|
} else {
|
||||||
// No known session, fall back to forwarding to all peers
|
// No known session, fall back to forwarding to all peers
|
||||||
logger.Debug("No session found for receiver index %d, forwarding to all destinations", receiverIndex)
|
logger.Debug("No session found for receiver index %d, forwarding to all destinations", receiverIndex)
|
||||||
for _, dest := range proxyMapping.Destinations {
|
for _, dest := range proxyMapping.Destinations {
|
||||||
destAddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", dest.DestinationIP, dest.DestinationPort))
|
destAddr, err := s.getCachedAddr(dest.DestinationIP, dest.DestinationPort)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to resolve destination address: %v", err)
|
logger.Error("Failed to resolve destination address: %v", err)
|
||||||
continue
|
continue
|
||||||
@@ -465,10 +665,17 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
continue
|
continue
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Track communication pattern for session rebuilding
|
||||||
|
s.trackCommunicationPattern(remoteAddr, destAddr, receiverIndex, true)
|
||||||
|
|
||||||
_, err = conn.Write(packet)
|
_, err = conn.Write(packet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Debug("Failed to forward transport data: %v", err)
|
logger.Debug("Failed to forward transport data: %v", err)
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "write_udp")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "out")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(len(packet)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -478,7 +685,7 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
|
|
||||||
// Forward to all peers
|
// Forward to all peers
|
||||||
for _, dest := range proxyMapping.Destinations {
|
for _, dest := range proxyMapping.Destinations {
|
||||||
destAddr, err := net.ResolveUDPAddr("udp", fmt.Sprintf("%s:%d", dest.DestinationIP, dest.DestinationPort))
|
destAddr, err := s.getCachedAddr(dest.DestinationIP, dest.DestinationPort)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to resolve destination address: %v", err)
|
logger.Error("Failed to resolve destination address: %v", err)
|
||||||
continue
|
continue
|
||||||
@@ -493,7 +700,11 @@ func (s *UDPProxyServer) handleWireGuardPacket(packet []byte, remoteAddr *net.UD
|
|||||||
_, err = conn.Write(packet)
|
_, err = conn.Write(packet)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to forward WireGuard packet: %v", err)
|
logger.Error("Failed to forward WireGuard packet: %v", err)
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "write_udp")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "out")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(len(packet)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -511,6 +722,7 @@ func (s *UDPProxyServer) getOrCreateConnection(destAddr *net.UDPAddr, remoteAddr
|
|||||||
// Create new connection
|
// Create new connection
|
||||||
newConn, err := net.DialUDP("udp", nil, destAddr)
|
newConn, err := net.DialUDP("udp", nil, destAddr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "dial_udp")
|
||||||
return nil, fmt.Errorf("failed to create UDP connection: %v", err)
|
return nil, fmt.Errorf("failed to create UDP connection: %v", err)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -534,6 +746,8 @@ func (s *UDPProxyServer) handleResponses(conn *net.UDPConn, destAddr *net.UDPAdd
|
|||||||
logger.Debug("Error reading response from %s: %v", destAddr.String(), err)
|
logger.Debug("Error reading response from %s: %v", destAddr.String(), err)
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "in")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(n))
|
||||||
|
|
||||||
// Process the response to track sessions if it's a WireGuard packet
|
// Process the response to track sessions if it's a WireGuard packet
|
||||||
if n > 0 && buffer[0] >= 1 && buffer[0] <= 4 {
|
if n > 0 && buffer[0] >= 1 && buffer[0] <= 4 {
|
||||||
@@ -541,13 +755,23 @@ func (s *UDPProxyServer) handleResponses(conn *net.UDPConn, destAddr *net.UDPAdd
|
|||||||
if ok && buffer[0] == WireGuardMessageTypeHandshakeResponse {
|
if ok && buffer[0] == WireGuardMessageTypeHandshakeResponse {
|
||||||
// Store the session mapping for the handshake response
|
// Store the session mapping for the handshake response
|
||||||
sessionKey := fmt.Sprintf("%d:%d", senderIndex, receiverIndex)
|
sessionKey := fmt.Sprintf("%d:%d", senderIndex, receiverIndex)
|
||||||
s.wgSessions.Store(sessionKey, &WireGuardSession{
|
session := &WireGuardSession{
|
||||||
ReceiverIndex: receiverIndex,
|
ReceiverIndex: receiverIndex,
|
||||||
SenderIndex: senderIndex,
|
SenderIndex: senderIndex,
|
||||||
DestAddr: destAddr,
|
DestAddr: destAddr,
|
||||||
LastSeen: time.Now(),
|
LastSeen: time.Now(),
|
||||||
})
|
}
|
||||||
|
if _, loaded := s.wgSessions.LoadOrStore(sessionKey, session); loaded {
|
||||||
|
s.wgSessions.Store(sessionKey, session)
|
||||||
|
} else {
|
||||||
|
metrics.RecordSession(relayIfname, 1)
|
||||||
|
}
|
||||||
|
// Also index by sender index for O(1) lookup
|
||||||
|
s.sessionsByReceiverIndex.Store(senderIndex, session)
|
||||||
logger.Debug("Stored session mapping: %s -> %s", sessionKey, destAddr.String())
|
logger.Debug("Stored session mapping: %s -> %s", sessionKey, destAddr.String())
|
||||||
|
} else if ok && buffer[0] == WireGuardMessageTypeTransportData {
|
||||||
|
// Track communication pattern for session rebuilding (reverse direction)
|
||||||
|
s.trackCommunicationPattern(destAddr, remoteAddr, receiverIndex, false)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -555,56 +779,91 @@ func (s *UDPProxyServer) handleResponses(conn *net.UDPConn, destAddr *net.UDPAdd
|
|||||||
_, err = s.conn.WriteToUDP(buffer[:n], remoteAddr)
|
_, err = s.conn.WriteToUDP(buffer[:n], remoteAddr)
|
||||||
if err != nil {
|
if err != nil {
|
||||||
logger.Error("Failed to forward response: %v", err)
|
logger.Error("Failed to forward response: %v", err)
|
||||||
|
metrics.RecordProxyConnectionError(relayIfname, "write_udp")
|
||||||
|
continue
|
||||||
}
|
}
|
||||||
|
metrics.RecordUDPPacket(relayIfname, "wireguard", "out")
|
||||||
|
metrics.RecordUDPPacketSize(relayIfname, "wireguard", float64(n))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Add a cleanup method to periodically remove idle connections
|
// Add a cleanup method to periodically remove idle connections
|
||||||
func (s *UDPProxyServer) cleanupIdleConnections() {
|
func (s *UDPProxyServer) cleanupIdleConnections() {
|
||||||
ticker := time.NewTicker(5 * time.Minute)
|
ticker := time.NewTicker(5 * time.Minute)
|
||||||
for range ticker.C {
|
defer ticker.Stop()
|
||||||
now := time.Now()
|
for {
|
||||||
s.connections.Range(func(key, value interface{}) bool {
|
select {
|
||||||
destConn := value.(*DestinationConn)
|
case <-ticker.C:
|
||||||
if now.Sub(destConn.lastUsed) > 10*time.Minute {
|
cleanupStart := time.Now()
|
||||||
destConn.conn.Close()
|
now := time.Now()
|
||||||
s.connections.Delete(key)
|
s.connections.Range(func(key, value interface{}) bool {
|
||||||
}
|
destConn := value.(*DestinationConn)
|
||||||
return true
|
if now.Sub(destConn.lastUsed) > 10*time.Minute {
|
||||||
})
|
destConn.conn.Close()
|
||||||
|
s.connections.Delete(key)
|
||||||
|
metrics.RecordProxyCleanupRemoved(relayIfname, "conn", 1)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
metrics.RecordProxyIdleCleanupDuration(relayIfname, "conn", time.Since(cleanupStart).Seconds())
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// New method to periodically remove idle sessions
|
// New method to periodically remove idle sessions
|
||||||
func (s *UDPProxyServer) cleanupIdleSessions() {
|
func (s *UDPProxyServer) cleanupIdleSessions() {
|
||||||
ticker := time.NewTicker(5 * time.Minute)
|
ticker := time.NewTicker(5 * time.Minute)
|
||||||
for range ticker.C {
|
|
||||||
now := time.Now()
|
defer ticker.Stop()
|
||||||
s.wgSessions.Range(func(key, value interface{}) bool {
|
for {
|
||||||
session := value.(*WireGuardSession)
|
select {
|
||||||
if now.Sub(session.LastSeen) > 15*time.Minute {
|
case <-ticker.C:
|
||||||
s.wgSessions.Delete(key)
|
cleanupStart := time.Now()
|
||||||
logger.Debug("Removed idle session: %s", key)
|
now := time.Now()
|
||||||
}
|
s.wgSessions.Range(func(key, value interface{}) bool {
|
||||||
return true
|
session := value.(*WireGuardSession)
|
||||||
})
|
// Use thread-safe method to read LastSeen
|
||||||
|
if now.Sub(session.GetLastSeen()) > 15*time.Minute {
|
||||||
|
s.wgSessions.Delete(key)
|
||||||
|
metrics.RecordSession(relayIfname, -1)
|
||||||
|
metrics.RecordProxyCleanupRemoved(relayIfname, "session", 1)
|
||||||
|
logger.Debug("Removed idle session: %s", key)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
metrics.RecordProxyIdleCleanupDuration(relayIfname, "session", time.Since(cleanupStart).Seconds())
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// New method to periodically remove idle proxy mappings
|
// New method to periodically remove idle proxy mappings
|
||||||
func (s *UDPProxyServer) cleanupIdleProxyMappings() {
|
func (s *UDPProxyServer) cleanupIdleProxyMappings() {
|
||||||
ticker := time.NewTicker(10 * time.Minute)
|
ticker := time.NewTicker(10 * time.Minute)
|
||||||
for range ticker.C {
|
defer ticker.Stop()
|
||||||
now := time.Now()
|
for {
|
||||||
s.proxyMappings.Range(func(key, value interface{}) bool {
|
select {
|
||||||
mapping := value.(ProxyMapping)
|
case <-ticker.C:
|
||||||
// Remove mappings that haven't been used in 30 minutes
|
cleanupStart := time.Now()
|
||||||
if now.Sub(mapping.LastUsed) > 30*time.Minute {
|
now := time.Now()
|
||||||
s.proxyMappings.Delete(key)
|
s.proxyMappings.Range(func(key, value interface{}) bool {
|
||||||
logger.Debug("Removed idle proxy mapping: %s", key)
|
mapping := value.(ProxyMapping)
|
||||||
}
|
// Remove mappings that haven't been used in 30 minutes
|
||||||
return true
|
if now.Sub(mapping.LastUsed) > 30*time.Minute {
|
||||||
})
|
s.proxyMappings.Delete(key)
|
||||||
|
metrics.RecordProxyMapping(relayIfname, -1)
|
||||||
|
metrics.RecordProxyCleanupRemoved(relayIfname, "proxy_mapping", 1)
|
||||||
|
logger.Debug("Removed idle proxy mapping: %s", key)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
metrics.RecordProxyIdleCleanupDuration(relayIfname, "proxy_mapping", time.Since(cleanupStart).Seconds())
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -644,6 +903,11 @@ func (s *UDPProxyServer) notifyServer(endpoint ClientEndpoint) {
|
|||||||
key := fmt.Sprintf("%s:%d", endpoint.IP, endpoint.Port)
|
key := fmt.Sprintf("%s:%d", endpoint.IP, endpoint.Port)
|
||||||
logger.Debug("About to store proxy mapping with key: %s (from endpoint IP=%s, Port=%d)", key, endpoint.IP, endpoint.Port)
|
logger.Debug("About to store proxy mapping with key: %s (from endpoint IP=%s, Port=%d)", key, endpoint.IP, endpoint.Port)
|
||||||
mapping.LastUsed = time.Now()
|
mapping.LastUsed = time.Now()
|
||||||
|
if _, existed := s.proxyMappings.Load(key); existed {
|
||||||
|
metrics.RecordProxyMappingUpdate(relayIfname)
|
||||||
|
} else {
|
||||||
|
metrics.RecordProxyMapping(relayIfname, 1)
|
||||||
|
}
|
||||||
s.proxyMappings.Store(key, mapping)
|
s.proxyMappings.Store(key, mapping)
|
||||||
|
|
||||||
logger.Debug("Stored proxy mapping for %s with %d destinations (timestamp: %v)", key, len(mapping.Destinations), mapping.LastUsed)
|
logger.Debug("Stored proxy mapping for %s with %d destinations (timestamp: %v)", key, len(mapping.Destinations), mapping.LastUsed)
|
||||||
@@ -656,6 +920,11 @@ func (s *UDPProxyServer) UpdateProxyMapping(sourceIP string, sourcePort int, des
|
|||||||
Destinations: destinations,
|
Destinations: destinations,
|
||||||
LastUsed: time.Now(),
|
LastUsed: time.Now(),
|
||||||
}
|
}
|
||||||
|
if _, existed := s.proxyMappings.Load(key); existed {
|
||||||
|
metrics.RecordProxyMappingUpdate(relayIfname)
|
||||||
|
} else {
|
||||||
|
metrics.RecordProxyMapping(relayIfname, 1)
|
||||||
|
}
|
||||||
s.proxyMappings.Store(key, mapping)
|
s.proxyMappings.Store(key, mapping)
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -709,8 +978,9 @@ func (s *UDPProxyServer) clearSessionsForIP(ip string) {
|
|||||||
keyStr := key.(string)
|
keyStr := key.(string)
|
||||||
session := value.(*WireGuardSession)
|
session := value.(*WireGuardSession)
|
||||||
|
|
||||||
// Check if the session's destination address contains the WG IP
|
// Check if the session's destination address contains the WG IP (thread-safe)
|
||||||
if session.DestAddr != nil && session.DestAddr.IP.String() == ip {
|
destAddr := session.GetDestAddr()
|
||||||
|
if destAddr != nil && destAddr.IP.String() == ip {
|
||||||
keysToDelete = append(keysToDelete, keyStr)
|
keysToDelete = append(keysToDelete, keyStr)
|
||||||
logger.Debug("Marking session for deletion for WG IP %s: %s", ip, keyStr)
|
logger.Debug("Marking session for deletion for WG IP %s: %s", ip, keyStr)
|
||||||
}
|
}
|
||||||
@@ -721,8 +991,12 @@ func (s *UDPProxyServer) clearSessionsForIP(ip string) {
|
|||||||
for _, key := range keysToDelete {
|
for _, key := range keysToDelete {
|
||||||
s.wgSessions.Delete(key)
|
s.wgSessions.Delete(key)
|
||||||
}
|
}
|
||||||
|
if len(keysToDelete) > 0 {
|
||||||
|
metrics.RecordSession(relayIfname, -int64(len(keysToDelete)))
|
||||||
|
metrics.RecordProxyCleanupRemoved(relayIfname, "session", int64(len(keysToDelete)))
|
||||||
|
}
|
||||||
|
|
||||||
logger.Info("Cleared %d sessions for WG IP: %s", len(keysToDelete), ip)
|
logger.Debug("Cleared %d sessions for WG IP: %s", len(keysToDelete), ip)
|
||||||
}
|
}
|
||||||
|
|
||||||
// // clearProxyMappingsForWGIP removes all proxy mappings that have destinations pointing to a specific WireGuard IP
|
// // clearProxyMappingsForWGIP removes all proxy mappings that have destinations pointing to a specific WireGuard IP
|
||||||
@@ -823,3 +1097,155 @@ func (s *UDPProxyServer) UpdateDestinationInMappings(oldDest, newDest PeerDestin
|
|||||||
|
|
||||||
return updatedCount
|
return updatedCount
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// trackCommunicationPattern tracks bidirectional communication patterns to rebuild sessions
|
||||||
|
func (s *UDPProxyServer) trackCommunicationPattern(fromAddr, toAddr *net.UDPAddr, receiverIndex uint32, fromClient bool) {
|
||||||
|
var clientAddr, destAddr *net.UDPAddr
|
||||||
|
var clientIndex, destIndex uint32
|
||||||
|
|
||||||
|
if fromClient {
|
||||||
|
clientAddr = fromAddr
|
||||||
|
destAddr = toAddr
|
||||||
|
clientIndex = receiverIndex
|
||||||
|
destIndex = 0 // We don't know the destination index yet
|
||||||
|
} else {
|
||||||
|
clientAddr = toAddr
|
||||||
|
destAddr = fromAddr
|
||||||
|
clientIndex = 0 // We don't know the client index yet
|
||||||
|
destIndex = receiverIndex
|
||||||
|
}
|
||||||
|
|
||||||
|
patternKey := fmt.Sprintf("%s-%s", clientAddr.String(), destAddr.String())
|
||||||
|
now := time.Now()
|
||||||
|
|
||||||
|
if existingPattern, ok := s.commPatterns.Load(patternKey); ok {
|
||||||
|
pattern := existingPattern.(*CommunicationPattern)
|
||||||
|
|
||||||
|
// Update the pattern
|
||||||
|
if fromClient {
|
||||||
|
pattern.LastFromClient = now
|
||||||
|
if pattern.ClientIndex == 0 {
|
||||||
|
pattern.ClientIndex = clientIndex
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
pattern.LastFromDest = now
|
||||||
|
if pattern.DestIndex == 0 {
|
||||||
|
pattern.DestIndex = destIndex
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pattern.PacketCount++
|
||||||
|
s.commPatterns.Store(patternKey, pattern)
|
||||||
|
|
||||||
|
// Check if we have bidirectional communication and can rebuild a session
|
||||||
|
s.tryRebuildSession(pattern)
|
||||||
|
} else {
|
||||||
|
// Create new pattern
|
||||||
|
pattern := &CommunicationPattern{
|
||||||
|
FromClient: clientAddr,
|
||||||
|
ToDestination: destAddr,
|
||||||
|
ClientIndex: clientIndex,
|
||||||
|
DestIndex: destIndex,
|
||||||
|
PacketCount: 1,
|
||||||
|
}
|
||||||
|
|
||||||
|
if fromClient {
|
||||||
|
pattern.LastFromClient = now
|
||||||
|
} else {
|
||||||
|
pattern.LastFromDest = now
|
||||||
|
}
|
||||||
|
|
||||||
|
if _, loaded := s.commPatterns.LoadOrStore(patternKey, pattern); !loaded {
|
||||||
|
metrics.RecordCommPattern(relayIfname, 1)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// tryRebuildSession attempts to rebuild a WireGuard session from communication patterns
|
||||||
|
func (s *UDPProxyServer) tryRebuildSession(pattern *CommunicationPattern) {
|
||||||
|
// Check if we have bidirectional communication within a reasonable time window
|
||||||
|
timeDiff := pattern.LastFromClient.Sub(pattern.LastFromDest)
|
||||||
|
if timeDiff < 0 {
|
||||||
|
timeDiff = -timeDiff
|
||||||
|
}
|
||||||
|
|
||||||
|
// Only rebuild if we have recent bidirectional communication and both indices
|
||||||
|
if timeDiff < 30*time.Second && pattern.ClientIndex != 0 && pattern.DestIndex != 0 && pattern.PacketCount >= 4 {
|
||||||
|
// Create session mapping: client's index maps to destination
|
||||||
|
sessionKey := fmt.Sprintf("%d:%d", pattern.DestIndex, pattern.ClientIndex)
|
||||||
|
|
||||||
|
// Check if we already have this session
|
||||||
|
session := &WireGuardSession{
|
||||||
|
ReceiverIndex: pattern.DestIndex,
|
||||||
|
SenderIndex: pattern.ClientIndex,
|
||||||
|
DestAddr: pattern.ToDestination,
|
||||||
|
LastSeen: time.Now(),
|
||||||
|
}
|
||||||
|
if _, loaded := s.wgSessions.LoadOrStore(sessionKey, session); loaded {
|
||||||
|
s.wgSessions.Store(sessionKey, session)
|
||||||
|
} else {
|
||||||
|
metrics.RecordSession(relayIfname, 1)
|
||||||
|
metrics.RecordSessionRebuilt(relayIfname)
|
||||||
|
}
|
||||||
|
logger.Info("Rebuilt WireGuard session from communication pattern: %s -> %s (packets: %d)",
|
||||||
|
sessionKey, pattern.ToDestination.String(), pattern.PacketCount)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// cleanupIdleCommunicationPatterns periodically removes idle communication patterns
|
||||||
|
// cleanupHolePunchRateLimiter periodically evicts stale rate limit entries to prevent unbounded growth.
|
||||||
|
func (s *UDPProxyServer) cleanupHolePunchRateLimiter() {
|
||||||
|
ticker := time.NewTicker(30 * time.Second)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
now := time.Now()
|
||||||
|
s.holePunchRateLimiter.Range(func(key, value interface{}) bool {
|
||||||
|
rlEntry := value.(*holePunchRateLimitEntry)
|
||||||
|
rlEntry.mu.Lock()
|
||||||
|
stale := now.Sub(rlEntry.windowStart) > 10*time.Second
|
||||||
|
rlEntry.mu.Unlock()
|
||||||
|
if stale {
|
||||||
|
s.holePunchRateLimiter.Delete(key)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
func (s *UDPProxyServer) cleanupIdleCommunicationPatterns() {
|
||||||
|
ticker := time.NewTicker(10 * time.Minute)
|
||||||
|
defer ticker.Stop()
|
||||||
|
for {
|
||||||
|
select {
|
||||||
|
case <-ticker.C:
|
||||||
|
cleanupStart := time.Now()
|
||||||
|
now := time.Now()
|
||||||
|
s.commPatterns.Range(func(key, value interface{}) bool {
|
||||||
|
pattern := value.(*CommunicationPattern)
|
||||||
|
|
||||||
|
// Get the most recent activity
|
||||||
|
lastActivity := pattern.LastFromClient
|
||||||
|
if pattern.LastFromDest.After(lastActivity) {
|
||||||
|
lastActivity = pattern.LastFromDest
|
||||||
|
}
|
||||||
|
|
||||||
|
// Remove patterns that haven't had activity in 20 minutes
|
||||||
|
if now.Sub(lastActivity) > 20*time.Minute {
|
||||||
|
s.commPatterns.Delete(key)
|
||||||
|
metrics.RecordCommPattern(relayIfname, -1)
|
||||||
|
metrics.RecordProxyCleanupRemoved(relayIfname, "comm_pattern", 1)
|
||||||
|
logger.Debug("Removed idle communication pattern: %s", key)
|
||||||
|
}
|
||||||
|
return true
|
||||||
|
})
|
||||||
|
metrics.RecordProxyIdleCleanupDuration(relayIfname, "comm_pattern", time.Since(cleanupStart).Seconds())
|
||||||
|
case <-s.ctx.Done():
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user