mirror of
https://github.com/fosrl/newt.git
synced 2026-03-12 18:04:28 -05:00
Compare commits
63 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bf33a3d81f | ||
|
|
527321a415 | ||
|
|
8d3ae5afd7 | ||
|
|
f1e07272bd | ||
|
|
a1a3d63fcf | ||
|
|
2a273dc435 | ||
|
|
ec05686523 | ||
|
|
915e7e44d1 | ||
|
|
a729b91ac3 | ||
|
|
ddc37658df | ||
|
|
7c780f7a4f | ||
|
|
6b1c1ed077 | ||
|
|
7a07437b22 | ||
|
|
d63d8d6f5e | ||
|
|
bda1d04f67 | ||
|
|
7f8ee37c7f | ||
|
|
6d2073a478 | ||
|
|
6048f244f1 | ||
|
|
9fec22a53b | ||
|
|
c086e69dd0 | ||
|
|
c729ab5fc6 | ||
|
|
552617cbb5 | ||
|
|
b383cec0b0 | ||
|
|
fb110ba2a1 | ||
|
|
f287888480 | ||
|
|
348b8f6b94 | ||
|
|
71c5bf7e65 | ||
|
|
dda0b414cc | ||
|
|
8f224e2a45 | ||
|
|
90243cd6c6 | ||
|
|
9b79af10ed | ||
|
|
31b1ffcbe9 | ||
|
|
f1c4e1db71 | ||
|
|
72a61d0933 | ||
|
|
e489a2cc66 | ||
|
|
4e648af8e9 | ||
|
|
5d891225de | ||
|
|
9864965381 | ||
|
|
75f6362a90 | ||
|
|
30907188fb | ||
|
|
5f11df8df2 | ||
|
|
7eea6dd335 | ||
|
|
9dc5a3d91c | ||
|
|
1881309148 | ||
|
|
aff928e60f | ||
|
|
f6e7bfe8ea | ||
|
|
60873f0a4f | ||
|
|
50bb81981b | ||
|
|
4ced99fa3f | ||
|
|
9bd96ac540 | ||
|
|
c673743692 | ||
|
|
a08a3b9665 | ||
|
|
0fc13be413 | ||
|
|
92cedd00b3 | ||
|
|
8b0cc36554 | ||
|
|
ba9ca9f097 | ||
|
|
8b4a88937c | ||
|
|
58412a7a61 | ||
|
|
2675b812aa | ||
|
|
217a9346c6 | ||
|
|
eda8073bce | ||
|
|
2969f9d2d6 | ||
|
|
07b7025a24 |
5
.env.example
Normal file
5
.env.example
Normal file
@@ -0,0 +1,5 @@
|
||||
# Copy this file to .env and fill in your values
|
||||
# Required for connecting to Pangolin service
|
||||
PANGOLIN_ENDPOINT=https://example.com
|
||||
NEWT_ID=changeme-id
|
||||
NEWT_SECRET=changeme-secret
|
||||
47
.github/DISCUSSION_TEMPLATE/feature-requests.yml
vendored
Normal file
47
.github/DISCUSSION_TEMPLATE/feature-requests.yml
vendored
Normal file
@@ -0,0 +1,47 @@
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Summary
|
||||
description: A clear and concise summary of the requested feature.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Motivation
|
||||
description: |
|
||||
Why is this feature important?
|
||||
Explain the problem this feature would solve or what use case it would enable.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Proposed Solution
|
||||
description: |
|
||||
How would you like to see this feature implemented?
|
||||
Provide as much detail as possible about the desired behavior, configuration, or changes.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Alternatives Considered
|
||||
description: Describe any alternative solutions or workarounds you've thought about.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Additional Context
|
||||
description: Add any other context, mockups, or screenshots about the feature request here.
|
||||
validations:
|
||||
required: false
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before submitting, please:
|
||||
- Check if there is an existing issue for this feature.
|
||||
- Clearly explain the benefit and use case.
|
||||
- Be as specific as possible to help contributors evaluate and implement.
|
||||
51
.github/ISSUE_TEMPLATE/1.bug_report.yml
vendored
Normal file
51
.github/ISSUE_TEMPLATE/1.bug_report.yml
vendored
Normal file
@@ -0,0 +1,51 @@
|
||||
name: Bug Report
|
||||
description: Create a bug report
|
||||
labels: []
|
||||
body:
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Describe the Bug
|
||||
description: A clear and concise description of what the bug is.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Environment
|
||||
description: Please fill out the relevant details below for your environment.
|
||||
value: |
|
||||
- OS Type & Version: (e.g., Ubuntu 22.04)
|
||||
- Pangolin Version:
|
||||
- Gerbil Version:
|
||||
- Traefik Version:
|
||||
- Newt Version:
|
||||
- Olm Version: (if applicable)
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: To Reproduce
|
||||
description: |
|
||||
Steps to reproduce the behavior, please provide a clear description of how to reproduce the issue, based on the linked minimal reproduction. Screenshots can be provided in the issue body below.
|
||||
|
||||
If using code blocks, make sure syntax highlighting is correct and double-check that the rendered preview is not broken.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: textarea
|
||||
attributes:
|
||||
label: Expected Behavior
|
||||
description: A clear and concise description of what you expected to happen.
|
||||
validations:
|
||||
required: true
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Before posting the issue go through the steps you've written down to make sure the steps provided are detailed and clear.
|
||||
|
||||
- type: markdown
|
||||
attributes:
|
||||
value: |
|
||||
Contributors should be able to follow the steps provided in order to reproduce the bug.
|
||||
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
8
.github/ISSUE_TEMPLATE/config.yml
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
blank_issues_enabled: false
|
||||
contact_links:
|
||||
- name: Need help or have questions?
|
||||
url: https://github.com/orgs/fosrl/discussions
|
||||
about: Ask questions, get help, and discuss with other community members
|
||||
- name: Request a Feature
|
||||
url: https://github.com/orgs/fosrl/discussions/new?category=feature-requests
|
||||
about: Feature requests should be opened as discussions so others can upvote and comment
|
||||
189
.github/workflows/cicd.yml
vendored
189
.github/workflows/cicd.yml
vendored
@@ -1,61 +1,158 @@
|
||||
name: CI/CD Pipeline
|
||||
|
||||
# CI/CD workflow for building, publishing, mirroring, signing container images and building release binaries.
|
||||
# Actions are pinned to specific SHAs to reduce supply-chain risk. This workflow triggers on tag push events.
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write # for GHCR push
|
||||
id-token: write # for Cosign Keyless (OIDC) Signing
|
||||
|
||||
# Required secrets:
|
||||
# - DOCKER_HUB_USERNAME / DOCKER_HUB_ACCESS_TOKEN: push to Docker Hub
|
||||
# - GITHUB_TOKEN: used for GHCR login and OIDC keyless signing
|
||||
# - COSIGN_PRIVATE_KEY / COSIGN_PASSWORD / COSIGN_PUBLIC_KEY: for key-based signing
|
||||
|
||||
on:
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
push:
|
||||
tags:
|
||||
- "*"
|
||||
|
||||
concurrency:
|
||||
group: ${{ github.ref }}
|
||||
cancel-in-progress: true
|
||||
|
||||
jobs:
|
||||
release:
|
||||
name: Build and Release
|
||||
runs-on: ubuntu-latest
|
||||
release:
|
||||
name: Build and Release
|
||||
runs-on: amd64-runner
|
||||
# Job-level timeout to avoid runaway or stuck runs
|
||||
timeout-minutes: 120
|
||||
env:
|
||||
# Target images
|
||||
DOCKERHUB_IMAGE: docker.io/${{ secrets.DOCKER_HUB_USERNAME }}/${{ github.event.repository.name }}
|
||||
GHCR_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@v5
|
||||
steps:
|
||||
- name: Checkout code
|
||||
uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@29109295f81e9208d7d86ff1c6c12d2833863392 # v3.6.0
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@e468171a9de216ec08956ac3ada2f0791b6bd435 # v3.11.1
|
||||
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@v3
|
||||
with:
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
- name: Log in to Docker Hub
|
||||
uses: docker/login-action@5e57cd118135c172c3672efd75eb46360885c0ef # v3.6.0
|
||||
with:
|
||||
registry: docker.io
|
||||
username: ${{ secrets.DOCKER_HUB_USERNAME }}
|
||||
password: ${{ secrets.DOCKER_HUB_ACCESS_TOKEN }}
|
||||
|
||||
- name: Extract tag name
|
||||
id: get-tag
|
||||
run: echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
- name: Extract tag name
|
||||
id: get-tag
|
||||
run: echo "TAG=${GITHUB_REF#refs/tags/}" >> $GITHUB_ENV
|
||||
shell: bash
|
||||
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@v5
|
||||
with:
|
||||
go-version: 1.25
|
||||
- name: Install Go
|
||||
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: 1.25
|
||||
|
||||
- name: Update version in main.go
|
||||
run: |
|
||||
TAG=${{ env.TAG }}
|
||||
if [ -f main.go ]; then
|
||||
sed -i 's/version_replaceme/'"$TAG"'/' main.go
|
||||
echo "Updated main.go with version $TAG"
|
||||
else
|
||||
echo "main.go not found"
|
||||
fi
|
||||
- name: Update version in main.go
|
||||
run: |
|
||||
TAG=${{ env.TAG }}
|
||||
if [ -f main.go ]; then
|
||||
sed -i 's/version_replaceme/'"$TAG"'/' main.go
|
||||
echo "Updated main.go with version $TAG"
|
||||
else
|
||||
echo "main.go not found"
|
||||
fi
|
||||
shell: bash
|
||||
|
||||
- name: Build and push Docker images
|
||||
run: |
|
||||
TAG=${{ env.TAG }}
|
||||
make docker-build-release tag=$TAG
|
||||
- name: Build and push Docker images (Docker Hub)
|
||||
run: |
|
||||
TAG=${{ env.TAG }}
|
||||
make docker-build-release tag=$TAG
|
||||
echo "Built & pushed to: ${{ env.DOCKERHUB_IMAGE }}:${TAG}"
|
||||
shell: bash
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
make go-build-release
|
||||
- name: Install skopeo + jq
|
||||
# skopeo: copy/inspect images between registries
|
||||
# jq: JSON parsing tool used to extract digest values
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y skopeo jq
|
||||
skopeo --version
|
||||
shell: bash
|
||||
|
||||
- name: Upload artifacts from /bin
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: binaries
|
||||
path: bin/
|
||||
- name: Login to GHCR
|
||||
run: |
|
||||
skopeo login ghcr.io -u "${{ github.actor }}" -p "${{ secrets.GITHUB_TOKEN }}"
|
||||
shell: bash
|
||||
|
||||
- name: Copy tag from Docker Hub to GHCR
|
||||
# Mirror the already-built image (all architectures) to GHCR so we can sign it
|
||||
run: |
|
||||
set -euo pipefail
|
||||
TAG=${{ env.TAG }}
|
||||
echo "Copying ${{ env.DOCKERHUB_IMAGE }}:${TAG} -> ${{ env.GHCR_IMAGE }}:${TAG}"
|
||||
skopeo copy --all --retry-times 3 \
|
||||
docker://$DOCKERHUB_IMAGE:$TAG \
|
||||
docker://$GHCR_IMAGE:$TAG
|
||||
shell: bash
|
||||
|
||||
- name: Install cosign
|
||||
# cosign is used to sign and verify container images (key and keyless)
|
||||
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
|
||||
|
||||
- name: Dual-sign and verify (GHCR & Docker Hub)
|
||||
# Sign each image by digest using keyless (OIDC) and key-based signing,
|
||||
# then verify both the public key signature and the keyless OIDC signature.
|
||||
env:
|
||||
TAG: ${{ env.TAG }}
|
||||
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
|
||||
COSIGN_YES: "true"
|
||||
run: |
|
||||
set -euo pipefail
|
||||
|
||||
issuer="https://token.actions.githubusercontent.com"
|
||||
id_regex="^https://github.com/${{ github.repository }}/.+" # accept this repo (all workflows/refs)
|
||||
|
||||
for IMAGE in "${GHCR_IMAGE}" "${DOCKERHUB_IMAGE}"; do
|
||||
echo "Processing ${IMAGE}:${TAG}"
|
||||
|
||||
DIGEST="$(skopeo inspect --retry-times 3 docker://${IMAGE}:${TAG} | jq -r '.Digest')"
|
||||
REF="${IMAGE}@${DIGEST}"
|
||||
echo "Resolved digest: ${REF}"
|
||||
|
||||
echo "==> cosign sign (keyless) --recursive ${REF}"
|
||||
cosign sign --recursive "${REF}"
|
||||
|
||||
echo "==> cosign sign (key) --recursive ${REF}"
|
||||
cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${REF}"
|
||||
|
||||
echo "==> cosign verify (public key) ${REF}"
|
||||
cosign verify --key env://COSIGN_PUBLIC_KEY "${REF}" -o text
|
||||
|
||||
echo "==> cosign verify (keyless policy) ${REF}"
|
||||
cosign verify \
|
||||
--certificate-oidc-issuer "${issuer}" \
|
||||
--certificate-identity-regexp "${id_regex}" \
|
||||
"${REF}" -o text
|
||||
done
|
||||
shell: bash
|
||||
|
||||
- name: Build binaries
|
||||
run: |
|
||||
make go-build-release
|
||||
shell: bash
|
||||
|
||||
- name: Upload artifacts from /bin
|
||||
uses: actions/upload-artifact@ea165f8d65b6e75b540449e92b4886f43607fa02 # v4.6.2
|
||||
with:
|
||||
name: binaries
|
||||
path: bin/
|
||||
|
||||
132
.github/workflows/mirror.yaml
vendored
Normal file
132
.github/workflows/mirror.yaml
vendored
Normal file
@@ -0,0 +1,132 @@
|
||||
name: Mirror & Sign (Docker Hub to GHCR)
|
||||
|
||||
on:
|
||||
workflow_dispatch: {}
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
packages: write
|
||||
id-token: write # for keyless OIDC
|
||||
|
||||
env:
|
||||
SOURCE_IMAGE: docker.io/fosrl/newt
|
||||
DEST_IMAGE: ghcr.io/${{ github.repository_owner }}/${{ github.event.repository.name }}
|
||||
|
||||
jobs:
|
||||
mirror-and-dual-sign:
|
||||
runs-on: amd64-runner
|
||||
steps:
|
||||
- name: Install skopeo + jq
|
||||
run: |
|
||||
sudo apt-get update -y
|
||||
sudo apt-get install -y skopeo jq
|
||||
skopeo --version
|
||||
|
||||
- name: Install cosign
|
||||
uses: sigstore/cosign-installer@faadad0cce49287aee09b3a48701e75088a2c6ad # v4.0.0
|
||||
|
||||
- name: Input check
|
||||
run: |
|
||||
test -n "${SOURCE_IMAGE}" || (echo "SOURCE_IMAGE is empty" && exit 1)
|
||||
echo "Source : ${SOURCE_IMAGE}"
|
||||
echo "Target : ${DEST_IMAGE}"
|
||||
|
||||
# Auth for skopeo (containers-auth)
|
||||
- name: Skopeo login to GHCR
|
||||
run: |
|
||||
skopeo login ghcr.io -u "${{ github.actor }}" -p "${{ secrets.GITHUB_TOKEN }}"
|
||||
|
||||
# Auth for cosign (docker-config)
|
||||
- name: Docker login to GHCR (for cosign)
|
||||
run: |
|
||||
echo "${{ secrets.GITHUB_TOKEN }}" | docker login ghcr.io -u "${{ github.actor }}" --password-stdin
|
||||
|
||||
- name: List source tags
|
||||
run: |
|
||||
set -euo pipefail
|
||||
skopeo list-tags --retry-times 3 docker://"${SOURCE_IMAGE}" \
|
||||
| jq -r '.Tags[]' | sort -u > src-tags.txt
|
||||
echo "Found source tags: $(wc -l < src-tags.txt)"
|
||||
head -n 20 src-tags.txt || true
|
||||
|
||||
- name: List destination tags (skip existing)
|
||||
run: |
|
||||
set -euo pipefail
|
||||
if skopeo list-tags --retry-times 3 docker://"${DEST_IMAGE}" >/tmp/dst.json 2>/dev/null; then
|
||||
jq -r '.Tags[]' /tmp/dst.json | sort -u > dst-tags.txt
|
||||
else
|
||||
: > dst-tags.txt
|
||||
fi
|
||||
echo "Existing destination tags: $(wc -l < dst-tags.txt)"
|
||||
|
||||
- name: Mirror, dual-sign, and verify
|
||||
env:
|
||||
# keyless
|
||||
COSIGN_YES: "true"
|
||||
# key-based
|
||||
COSIGN_PRIVATE_KEY: ${{ secrets.COSIGN_PRIVATE_KEY }}
|
||||
COSIGN_PASSWORD: ${{ secrets.COSIGN_PASSWORD }}
|
||||
# verify
|
||||
COSIGN_PUBLIC_KEY: ${{ secrets.COSIGN_PUBLIC_KEY }}
|
||||
run: |
|
||||
set -euo pipefail
|
||||
copied=0; skipped=0; v_ok=0; errs=0
|
||||
|
||||
issuer="https://token.actions.githubusercontent.com"
|
||||
id_regex="^https://github.com/${{ github.repository }}/.+"
|
||||
|
||||
while read -r tag; do
|
||||
[ -z "$tag" ] && continue
|
||||
|
||||
if grep -Fxq "$tag" dst-tags.txt; then
|
||||
echo "::notice ::Skip (exists) ${DEST_IMAGE}:${tag}"
|
||||
skipped=$((skipped+1))
|
||||
continue
|
||||
fi
|
||||
|
||||
echo "==> Copy ${SOURCE_IMAGE}:${tag} → ${DEST_IMAGE}:${tag}"
|
||||
if ! skopeo copy --all --retry-times 3 \
|
||||
docker://"${SOURCE_IMAGE}:${tag}" docker://"${DEST_IMAGE}:${tag}"; then
|
||||
echo "::warning title=Copy failed::${SOURCE_IMAGE}:${tag}"
|
||||
errs=$((errs+1)); continue
|
||||
fi
|
||||
copied=$((copied+1))
|
||||
|
||||
digest="$(skopeo inspect --retry-times 3 docker://"${DEST_IMAGE}:${tag}" | jq -r '.Digest')"
|
||||
ref="${DEST_IMAGE}@${digest}"
|
||||
|
||||
echo "==> cosign sign (keyless) --recursive ${ref}"
|
||||
if ! cosign sign --recursive "${ref}"; then
|
||||
echo "::warning title=Keyless sign failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
fi
|
||||
|
||||
echo "==> cosign sign (key) --recursive ${ref}"
|
||||
if ! cosign sign --key env://COSIGN_PRIVATE_KEY --recursive "${ref}"; then
|
||||
echo "::warning title=Key sign failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
fi
|
||||
|
||||
echo "==> cosign verify (public key) ${ref}"
|
||||
if ! cosign verify --key env://COSIGN_PUBLIC_KEY "${ref}" -o text; then
|
||||
echo "::warning title=Verify(pubkey) failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
fi
|
||||
|
||||
echo "==> cosign verify (keyless policy) ${ref}"
|
||||
if ! cosign verify \
|
||||
--certificate-oidc-issuer "${issuer}" \
|
||||
--certificate-identity-regexp "${id_regex}" \
|
||||
"${ref}" -o text; then
|
||||
echo "::warning title=Verify(keyless) failed::${ref}"
|
||||
errs=$((errs+1))
|
||||
else
|
||||
v_ok=$((v_ok+1))
|
||||
fi
|
||||
done < src-tags.txt
|
||||
|
||||
echo "---- Summary ----"
|
||||
echo "Copied : $copied"
|
||||
echo "Skipped : $skipped"
|
||||
echo "Verified OK : $v_ok"
|
||||
echo "Errors : $errs"
|
||||
9
.github/workflows/test.yml
vendored
9
.github/workflows/test.yml
vendored
@@ -1,5 +1,8 @@
|
||||
name: Run Tests
|
||||
|
||||
permissions:
|
||||
contents: read
|
||||
|
||||
on:
|
||||
pull_request:
|
||||
branches:
|
||||
@@ -8,13 +11,13 @@ on:
|
||||
|
||||
jobs:
|
||||
test:
|
||||
runs-on: ubuntu-latest
|
||||
runs-on: amd64-runner
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
|
||||
|
||||
- name: Set up Go
|
||||
uses: actions/setup-go@v5
|
||||
uses: actions/setup-go@44694675825211faa026b3c33043df3e48a5fa00 # v6.0.0
|
||||
with:
|
||||
go-version: 1.25
|
||||
|
||||
|
||||
@@ -4,11 +4,7 @@ Contributions are welcome!
|
||||
|
||||
Please see the contribution and local development guide on the docs page before getting started:
|
||||
|
||||
https://docs.fossorial.io/development
|
||||
|
||||
For ideas about what features to work on and our future plans, please see the roadmap:
|
||||
|
||||
https://docs.fossorial.io/roadmap
|
||||
https://docs.pangolin.net/development/contributing
|
||||
|
||||
### Licensing Considerations
|
||||
|
||||
@@ -21,4 +17,4 @@ By creating this pull request, I grant the project maintainers an unlimited,
|
||||
perpetual license to use, modify, and redistribute these contributions under any terms they
|
||||
choose, including both the AGPLv3 and the Fossorial Commercial license terms. I
|
||||
represent that I have the right to grant this license for all contributed content.
|
||||
```
|
||||
```
|
||||
10
Dockerfile
10
Dockerfile
@@ -1,5 +1,8 @@
|
||||
FROM golang:1.25-alpine AS builder
|
||||
|
||||
# Install git and ca-certificates
|
||||
RUN apk --no-cache add ca-certificates git tzdata
|
||||
|
||||
# Set the working directory inside the container
|
||||
WORKDIR /app
|
||||
|
||||
@@ -13,7 +16,7 @@ RUN go mod download
|
||||
COPY . .
|
||||
|
||||
# Build the application
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -o /newt
|
||||
RUN CGO_ENABLED=0 GOOS=linux go build -ldflags="-s -w" -o /newt
|
||||
|
||||
FROM alpine:3.22 AS runner
|
||||
|
||||
@@ -22,6 +25,9 @@ RUN apk --no-cache add ca-certificates tzdata
|
||||
COPY --from=builder /newt /usr/local/bin/
|
||||
COPY entrypoint.sh /
|
||||
|
||||
# Admin/metrics endpoint (Prometheus scrape)
|
||||
EXPOSE 2112
|
||||
|
||||
RUN chmod +x /entrypoint.sh
|
||||
ENTRYPOINT ["/entrypoint.sh"]
|
||||
CMD ["newt"]
|
||||
CMD ["newt"]
|
||||
|
||||
107
README.md
107
README.md
@@ -1,4 +1,7 @@
|
||||
# Newt
|
||||
[](https://pkg.go.dev/github.com/fosrl/newt)
|
||||
[](https://github.com/fosrl/newt/blob/main/LICENSE)
|
||||
[](https://goreportcard.com/report/github.com/fosrl/newt)
|
||||
|
||||
Newt is a fully user space [WireGuard](https://www.wireguard.com/) tunnel client and TCP/UDP proxy, designed to securely expose private resources controlled by Pangolin. By using Newt, you don't need to manage complex WireGuard tunnels and NATing.
|
||||
|
||||
@@ -6,7 +9,7 @@ Newt is a fully user space [WireGuard](https://www.wireguard.com/) tunnel client
|
||||
|
||||
Newt is used with Pangolin and Gerbil as part of the larger system. See documentation below:
|
||||
|
||||
- [Full Documentation](https://docs.fossorial.io)
|
||||
- [Full Documentation](https://docs.pangolin.net)
|
||||
|
||||
## Preview
|
||||
|
||||
@@ -30,57 +33,109 @@ When Newt receives WireGuard control messages, it will use the information encod
|
||||
|
||||
## CLI Args
|
||||
|
||||
### Core Configuration
|
||||
|
||||
- `id`: Newt ID generated by Pangolin to identify the client.
|
||||
- `secret`: A unique secret (not shared and kept private) used to authenticate the client ID with the websocket in order to receive commands.
|
||||
- `endpoint`: The endpoint where both Gerbil and Pangolin reside in order to connect to the websocket.
|
||||
|
||||
- `mtu` (optional): MTU for the internal WG interface. Default: 1280
|
||||
- `dns` (optional): DNS server to use to resolve the endpoint. Default: 8.8.8.8
|
||||
- `blueprint-file` (optional): Path to blueprint file to define Pangolin resources and configurations.
|
||||
- `no-cloud` (optional): Don't fail over to the cloud when using managed nodes in Pangolin Cloud. Default: false
|
||||
- `log-level` (optional): The log level to use (DEBUG, INFO, WARN, ERROR, FATAL). Default: INFO
|
||||
- `enforce-hc-cert` (optional): Enforce certificate validation for health checks. Default: false (accepts any cert)
|
||||
|
||||
### Docker Integration
|
||||
|
||||
- `docker-socket` (optional): Set the Docker socket to use the container discovery integration
|
||||
- `ping-interval` (optional): Interval for pinging the server. Default: 3s
|
||||
- `ping-timeout` (optional): Timeout for each ping. Default: 5s
|
||||
- `updown` (optional): A script to be called when targets are added or removed.
|
||||
- `tls-client-cert` (optional): Client certificate (p12 or pfx) for mTLS. See [mTLS](#mtls)
|
||||
- `tls-client-cert` (optional): Path to client certificate (PEM format, optional if using PKCS12). See [mTLS](#mtls)
|
||||
- `tls-client-key` (optional): Path to private key for mTLS (PEM format, optional if using PKCS12)
|
||||
- `tls-ca-cert` (optional): Path to CA certificate to verify server (PEM format, optional if using PKCS12)
|
||||
- `docker-enforce-network-validation` (optional): Validate the container target is on the same network as the newt process. Default: false
|
||||
- `health-file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt
|
||||
|
||||
### Accpet Client Connection
|
||||
|
||||
- `accept-clients` (optional): Enable WireGuard server mode to accept incoming newt client connections. Default: false
|
||||
- `generateAndSaveKeyTo` (optional): Path to save generated private key
|
||||
- `native` (optional): Use native WireGuard interface when accepting clients (requires WireGuard kernel module and Linux, must run as root). Default: false (uses userspace netstack)
|
||||
- `interface` (optional): Name of the WireGuard interface. Default: newt
|
||||
- `keep-interface` (optional): Keep the WireGuard interface. Default: false
|
||||
|
||||
### Metrics & Observability
|
||||
|
||||
- `metrics` (optional): Enable Prometheus /metrics exporter. Default: true
|
||||
- `otlp` (optional): Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT. Default: false
|
||||
- `metrics-admin-addr` (optional): Admin/metrics bind address. Default: 127.0.0.1:2112
|
||||
- `metrics-async-bytes` (optional): Enable async bytes counting (background flush; lower hot path overhead). Default: false
|
||||
- `region` (optional): Optional region resource attribute for telemetry and metrics.
|
||||
|
||||
### Network Configuration
|
||||
|
||||
- `mtu` (optional): MTU for the internal WG interface. Default: 1280
|
||||
- `dns` (optional): DNS server to use to resolve the endpoint. Default: 9.9.9.9
|
||||
- `ping-interval` (optional): Interval for pinging the server. Default: 3s
|
||||
- `ping-timeout` (optional): Timeout for each ping. Default: 5s
|
||||
|
||||
### Security & TLS
|
||||
|
||||
- `enforce-hc-cert` (optional): Enforce certificate validation for health checks. Default: false (accepts any cert)
|
||||
- `tls-client-cert` (optional): Client certificate (p12 or pfx) for mTLS or path to client certificate (PEM format). See [mTLS](#mtls)
|
||||
- `tls-client-key` (optional): Path to private key for mTLS (PEM format, optional if using PKCS12)
|
||||
- `tls-ca-cert` (optional): Path to CA certificate to verify server (PEM format, optional if using PKCS12)
|
||||
|
||||
### Monitoring & Health
|
||||
|
||||
- `health-file` (optional): Check if connection to WG server (pangolin) is ok. creates a file if ok, removes it if not ok. Can be used with docker healtcheck to restart newt
|
||||
- `updown` (optional): A script to be called when targets are added or removed.
|
||||
|
||||
## Environment Variables
|
||||
|
||||
All CLI arguments can be set using environment variables as an alternative to command line flags. Environment variables are particularly useful when running Newt in containerized environments.
|
||||
|
||||
### Core Configuration
|
||||
|
||||
- `PANGOLIN_ENDPOINT`: Endpoint of your pangolin server (equivalent to `--endpoint`)
|
||||
- `NEWT_ID`: Newt ID generated by Pangolin (equivalent to `--id`)
|
||||
- `NEWT_SECRET`: Newt secret for authentication (equivalent to `--secret`)
|
||||
- `MTU`: MTU for the internal WG interface. Default: 1280 (equivalent to `--mtu`)
|
||||
- `DNS`: DNS server to use to resolve the endpoint. Default: 8.8.8.8 (equivalent to `--dns`)
|
||||
- `CONFIG_FILE`: Load the config json from this file instead of in the home folder.
|
||||
- `BLUEPRINT_FILE`: Path to blueprint file to define Pangolin resources and configurations. (equivalent to `--blueprint-file`)
|
||||
- `NO_CLOUD`: Don't fail over to the cloud when using managed nodes in Pangolin Cloud. Default: false (equivalent to `--no-cloud`)
|
||||
- `LOG_LEVEL`: Log level (DEBUG, INFO, WARN, ERROR, FATAL). Default: INFO (equivalent to `--log-level`)
|
||||
|
||||
### Docker Integration
|
||||
|
||||
- `DOCKER_SOCKET`: Path to Docker socket for container discovery (equivalent to `--docker-socket`)
|
||||
- `PING_INTERVAL`: Interval for pinging the server. Default: 3s (equivalent to `--ping-interval`)
|
||||
- `PING_TIMEOUT`: Timeout for each ping. Default: 5s (equivalent to `--ping-timeout`)
|
||||
- `UPDOWN_SCRIPT`: Path to updown script for target add/remove events (equivalent to `--updown`)
|
||||
- `TLS_CLIENT_CERT`: Path to client certificate for mTLS (equivalent to `--tls-client-cert`)
|
||||
- `TLS_CLIENT_CERT`: Path to client certificate for mTLS (equivalent to `--tls-client-cert`)
|
||||
- `TLS_CLIENT_KEY`: Path to private key for mTLS (equivalent to `--tls-client-key`)
|
||||
- `TLS_CA_CERT`: Path to CA certificate to verify server (equivalent to `--tls-ca-cert`)
|
||||
- `DOCKER_ENFORCE_NETWORK_VALIDATION`: Validate container targets are on same network. Default: false (equivalent to `--docker-enforce-network-validation`)
|
||||
- `ENFORCE_HC_CERT`: Enforce certificate validation for health checks. Default: false (equivalent to `--enforce-hc-cert`)
|
||||
- `HEALTH_FILE`: Path to health file for connection monitoring (equivalent to `--health-file`)
|
||||
|
||||
### Accept Client Connections
|
||||
|
||||
- `ACCEPT_CLIENTS`: Enable WireGuard server mode. Default: false (equivalent to `--accept-clients`)
|
||||
- `GENERATE_AND_SAVE_KEY_TO`: Path to save generated private key (equivalent to `--generateAndSaveKeyTo`)
|
||||
- `USE_NATIVE_INTERFACE`: Use native WireGuard interface (Linux only). Default: false (equivalent to `--native`)
|
||||
- `INTERFACE`: Name of the WireGuard interface. Default: newt (equivalent to `--interface`)
|
||||
- `KEEP_INTERFACE`: Keep the WireGuard interface after shutdown. Default: false (equivalent to `--keep-interface`)
|
||||
- `CONFIG_FILE`: Load the config json from this file instead of in the home folder.
|
||||
|
||||
### Monitoring & Health
|
||||
|
||||
- `HEALTH_FILE`: Path to health file for connection monitoring (equivalent to `--health-file`)
|
||||
- `UPDOWN_SCRIPT`: Path to updown script for target add/remove events (equivalent to `--updown`)
|
||||
|
||||
### Metrics & Observability
|
||||
|
||||
- `NEWT_METRICS_PROMETHEUS_ENABLED`: Enable Prometheus /metrics exporter. Default: true (equivalent to `--metrics`)
|
||||
- `NEWT_METRICS_OTLP_ENABLED`: Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT. Default: false (equivalent to `--otlp`)
|
||||
- `NEWT_ADMIN_ADDR`: Admin/metrics bind address. Default: 127.0.0.1:2112 (equivalent to `--metrics-admin-addr`)
|
||||
- `NEWT_METRICS_ASYNC_BYTES`: Enable async bytes counting (background flush; lower hot path overhead). Default: false (equivalent to `--metrics-async-bytes`)
|
||||
- `NEWT_REGION`: Optional region resource attribute for telemetry and metrics (equivalent to `--region`)
|
||||
|
||||
### Network Configuration
|
||||
|
||||
- `MTU`: MTU for the internal WG interface. Default: 1280 (equivalent to `--mtu`)
|
||||
- `DNS`: DNS server to use to resolve the endpoint. Default: 9.9.9.9 (equivalent to `--dns`)
|
||||
- `PING_INTERVAL`: Interval for pinging the server. Default: 3s (equivalent to `--ping-interval`)
|
||||
- `PING_TIMEOUT`: Timeout for each ping. Default: 5s (equivalent to `--ping-timeout`)
|
||||
|
||||
### Security & TLS
|
||||
|
||||
- `ENFORCE_HC_CERT`: Enforce certificate validation for health checks. Default: false (equivalent to `--enforce-hc-cert`)
|
||||
- `TLS_CLIENT_CERT`: Path to client certificate for mTLS (equivalent to `--tls-client-cert`)
|
||||
- `TLS_CLIENT_KEY`: Path to private key for mTLS (equivalent to `--tls-client-key`)
|
||||
- `TLS_CA_CERT`: Path to CA certificate to verify server (equivalent to `--tls-ca-cert`)
|
||||
- `SKIP_TLS_VERIFY`: Skip TLS verification for server connections. Default: false
|
||||
|
||||
## Loading secrets from files
|
||||
|
||||
@@ -91,7 +146,7 @@ $ cat ~/.config/newt-client/config.json
|
||||
{
|
||||
"id": "spmzu8rbpzj1qq6",
|
||||
"secret": "f6v61mjutwme2kkydbw3fjo227zl60a2tsf5psw9r25hgae3",
|
||||
"endpoint": "https://pangolin.fossorial.io",
|
||||
"endpoint": "https://app.pangolin.net",
|
||||
"tlsClientCert": ""
|
||||
}
|
||||
```
|
||||
|
||||
@@ -3,7 +3,7 @@
|
||||
If you discover a security vulnerability, please follow the steps below to responsibly disclose it to us:
|
||||
|
||||
1. **Do not create a public GitHub issue or discussion post.** This could put the security of other users at risk.
|
||||
2. Send a detailed report to [security@fossorial.io](mailto:security@fossorial.io) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
|
||||
2. Send a detailed report to [security@pangolin.net](mailto:security@pangolin.net) or send a **private** message to a maintainer on [Discord](https://discord.gg/HCJR8Xhme4). Include:
|
||||
|
||||
- Description and location of the vulnerability.
|
||||
- Potential impact of the vulnerability.
|
||||
|
||||
37
blueprint.yaml
Normal file
37
blueprint.yaml
Normal file
@@ -0,0 +1,37 @@
|
||||
resources:
|
||||
resource-nice-id:
|
||||
name: this is my resource
|
||||
protocol: http
|
||||
full-domain: level1.test3.example.com
|
||||
host-header: example.com
|
||||
tls-server-name: example.com
|
||||
auth:
|
||||
pincode: 123456
|
||||
password: sadfasdfadsf
|
||||
sso-enabled: true
|
||||
sso-roles:
|
||||
- Member
|
||||
sso-users:
|
||||
- owen@pangolin.net
|
||||
whitelist-users:
|
||||
- owen@pangolin.net
|
||||
targets:
|
||||
# - site: glossy-plains-viscacha-rat
|
||||
- hostname: localhost
|
||||
method: http
|
||||
port: 8000
|
||||
healthcheck:
|
||||
port: 8000
|
||||
hostname: localhost
|
||||
# - site: glossy-plains-viscacha-rat
|
||||
- hostname: localhost
|
||||
method: http
|
||||
port: 8001
|
||||
resource-nice-id2:
|
||||
name: this is other resource
|
||||
protocol: tcp
|
||||
proxy-port: 3000
|
||||
targets:
|
||||
# - site: glossy-plains-viscacha-rat
|
||||
- hostname: localhost
|
||||
port: 3000
|
||||
@@ -39,7 +39,7 @@ func setupClients(client *websocket.Client) {
|
||||
func setupClientsNetstack(client *websocket.Client, host string) {
|
||||
logger.Info("Setting up clients with netstack...")
|
||||
// Create WireGuard service
|
||||
wgService, err = wgnetstack.NewWireGuardService(interfaceName, mtuInt, generateAndSaveKeyTo, host, id, client, "8.8.8.8")
|
||||
wgService, err = wgnetstack.NewWireGuardService(interfaceName, mtuInt, generateAndSaveKeyTo, host, id, client, "9.9.9.9")
|
||||
if err != nil {
|
||||
logger.Fatal("Failed to create WireGuard service: %v", err)
|
||||
}
|
||||
|
||||
41
docker-compose.metrics.collector.yml
Normal file
41
docker-compose.metrics.collector.yml
Normal file
@@ -0,0 +1,41 @@
|
||||
services:
|
||||
newt:
|
||||
build: .
|
||||
image: newt:dev
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
- NEWT_METRICS_PROMETHEUS_ENABLED=false # important: disable direct /metrics scraping
|
||||
- NEWT_METRICS_OTLP_ENABLED=true # OTLP to the Collector
|
||||
# optional:
|
||||
# - NEWT_METRICS_INCLUDE_TUNNEL_ID=false
|
||||
# When using the Collector pattern, do NOT map the Newt admin/metrics port
|
||||
# (2112) on the application service. Mapping 2112 here can cause port
|
||||
# conflicts and may result in duplicated Prometheus scraping (app AND
|
||||
# collector being scraped for the same metrics). Instead either:
|
||||
# - leave ports unset on the app service (recommended), or
|
||||
# - map 2112 only on a dedicated metrics/collector service that is
|
||||
# responsible for exposing metrics to Prometheus.
|
||||
# Example: do NOT map here
|
||||
# ports: []
|
||||
# Example: map 2112 only on a collector service
|
||||
# collector:
|
||||
# ports:
|
||||
# - "2112:2112" # collector's prometheus exporter (scraped by Prometheus)
|
||||
|
||||
otel-collector:
|
||||
image: otel/opentelemetry-collector-contrib:latest
|
||||
command: ["--config=/etc/otelcol/config.yaml"]
|
||||
volumes:
|
||||
- ./examples/otel-collector.yaml:/etc/otelcol/config.yaml:ro
|
||||
ports:
|
||||
- "4317:4317" # OTLP gRPC
|
||||
- "8889:8889" # Prometheus Exporter (scraped by Prometheus)
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:latest
|
||||
volumes:
|
||||
- ./examples/prometheus.with-collector.yml:/etc/prometheus/prometheus.yml:ro
|
||||
ports:
|
||||
- "9090:9090"
|
||||
|
||||
56
docker-compose.metrics.yml
Normal file
56
docker-compose.metrics.yml
Normal file
@@ -0,0 +1,56 @@
|
||||
name: Newt-Metrics
|
||||
services:
|
||||
# Recommended Variant A: Direct Prometheus scrape of Newt (/metrics)
|
||||
# Optional: You may add the Collector service and enable OTLP export, but do NOT
|
||||
# scrape both Newt and the Collector for the same process.
|
||||
|
||||
newt:
|
||||
build: .
|
||||
image: newt:dev
|
||||
env_file:
|
||||
- .env
|
||||
environment:
|
||||
OTEL_SERVICE_NAME: newt
|
||||
NEWT_METRICS_PROMETHEUS_ENABLED: "true"
|
||||
NEWT_METRICS_OTLP_ENABLED: "false" # avoid double-scrape by default
|
||||
NEWT_ADMIN_ADDR: ":2112"
|
||||
# Base NEWT configuration
|
||||
PANGOLIN_ENDPOINT: ${PANGOLIN_ENDPOINT}
|
||||
NEWT_ID: ${NEWT_ID}
|
||||
NEWT_SECRET: ${NEWT_SECRET}
|
||||
LOG_LEVEL: "DEBUG"
|
||||
ports:
|
||||
- "2112:2112"
|
||||
|
||||
# Optional Variant B: Enable the Collector and switch Prometheus scrape to it.
|
||||
# collector:
|
||||
# image: otel/opentelemetry-collector-contrib:0.136.0
|
||||
# command: ["--config=/etc/otelcol/config.yaml"]
|
||||
# volumes:
|
||||
# - ./examples/otel-collector.yaml:/etc/otelcol/config.yaml:ro
|
||||
# ports:
|
||||
# - "4317:4317" # OTLP gRPC in
|
||||
# - "8889:8889" # Prometheus scrape out
|
||||
|
||||
prometheus:
|
||||
image: prom/prometheus:v3.6.0
|
||||
volumes:
|
||||
- ./examples/prometheus.yml:/etc/prometheus/prometheus.yml:ro
|
||||
ports:
|
||||
- "9090:9090"
|
||||
|
||||
grafana:
|
||||
image: grafana/grafana:12.2.0
|
||||
container_name: newt-metrics-grafana
|
||||
restart: unless-stopped
|
||||
environment:
|
||||
- GF_SECURITY_ADMIN_USER=admin
|
||||
- GF_SECURITY_ADMIN_PASSWORD=admin
|
||||
ports:
|
||||
- "3005:3000"
|
||||
depends_on:
|
||||
- prometheus
|
||||
volumes:
|
||||
- ./examples/grafana/provisioning/datasources:/etc/grafana/provisioning/datasources:ro
|
||||
- ./examples/grafana/provisioning/dashboards:/etc/grafana/provisioning/dashboards:ro
|
||||
- ./examples/grafana/dashboards:/var/lib/grafana/dashboards:ro
|
||||
146
docker/client.go
146
docker/client.go
@@ -10,6 +10,7 @@ import (
|
||||
"time"
|
||||
|
||||
"github.com/docker/docker/api/types/container"
|
||||
"github.com/docker/docker/api/types/events"
|
||||
"github.com/docker/docker/api/types/filters"
|
||||
"github.com/docker/docker/client"
|
||||
"github.com/fosrl/newt/logger"
|
||||
@@ -73,8 +74,11 @@ func parseDockerHost(raw string) (dockerHost, error) {
|
||||
s = strings.TrimPrefix(s, "http://")
|
||||
s = strings.TrimPrefix(s, "https://")
|
||||
return dockerHost{"tcp", s}, nil
|
||||
case strings.HasPrefix(raw, "/"):
|
||||
// Absolute path without scheme - treat as unix socket
|
||||
return dockerHost{"unix", raw}, nil
|
||||
default:
|
||||
// default fallback to unix
|
||||
// For relative paths or other formats, also default to unix
|
||||
return dockerHost{"unix", raw}, nil
|
||||
}
|
||||
}
|
||||
@@ -85,6 +89,13 @@ func CheckSocket(socketPath string) bool {
|
||||
if socketPath == "" {
|
||||
socketPath = "unix:///var/run/docker.sock"
|
||||
}
|
||||
|
||||
// Ensure the socket path is properly formatted
|
||||
if !strings.Contains(socketPath, "://") {
|
||||
// If no scheme provided, assume unix socket
|
||||
socketPath = "unix://" + socketPath
|
||||
}
|
||||
|
||||
host, err := parseDockerHost(socketPath)
|
||||
if err != nil {
|
||||
logger.Debug("Invalid Docker socket path '%s': %v", socketPath, err)
|
||||
@@ -149,7 +160,13 @@ func IsWithinHostNetwork(socketPath string, targetAddress string, targetPort int
|
||||
func ListContainers(socketPath string, enforceNetworkValidation bool) ([]Container, error) {
|
||||
// Use the provided socket path or default to standard location
|
||||
if socketPath == "" {
|
||||
socketPath = "/var/run/docker.sock"
|
||||
socketPath = "unix:///var/run/docker.sock"
|
||||
}
|
||||
|
||||
// Ensure the socket path is properly formatted for the Docker client
|
||||
if !strings.Contains(socketPath, "://") {
|
||||
// If no scheme provided, assume unix socket
|
||||
socketPath = "unix://" + socketPath
|
||||
}
|
||||
|
||||
// Used to filter down containers returned to Pangolin
|
||||
@@ -305,3 +322,128 @@ func getHostContainer(dockerContext context.Context, dockerClient *client.Client
|
||||
|
||||
return &hostContainer, nil
|
||||
}
|
||||
|
||||
// EventCallback defines the function signature for handling Docker events
|
||||
type EventCallback func(containers []Container)
|
||||
|
||||
// EventMonitor handles Docker event monitoring
|
||||
type EventMonitor struct {
|
||||
client *client.Client
|
||||
ctx context.Context
|
||||
cancel context.CancelFunc
|
||||
callback EventCallback
|
||||
socketPath string
|
||||
enforceNetworkValidation bool
|
||||
}
|
||||
|
||||
// NewEventMonitor creates a new Docker event monitor
|
||||
func NewEventMonitor(socketPath string, enforceNetworkValidation bool, callback EventCallback) (*EventMonitor, error) {
|
||||
if socketPath == "" {
|
||||
socketPath = "unix:///var/run/docker.sock"
|
||||
}
|
||||
|
||||
if !strings.Contains(socketPath, "://") {
|
||||
socketPath = "unix://" + socketPath
|
||||
}
|
||||
|
||||
cli, err := client.NewClientWithOpts(
|
||||
client.WithHost(socketPath),
|
||||
client.WithAPIVersionNegotiation(),
|
||||
)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to create Docker client: %v", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithCancel(context.Background())
|
||||
|
||||
return &EventMonitor{
|
||||
client: cli,
|
||||
ctx: ctx,
|
||||
cancel: cancel,
|
||||
callback: callback,
|
||||
socketPath: socketPath,
|
||||
enforceNetworkValidation: enforceNetworkValidation,
|
||||
}, nil
|
||||
}
|
||||
|
||||
// Start begins monitoring Docker events
|
||||
func (em *EventMonitor) Start() error {
|
||||
logger.Debug("Starting Docker event monitoring")
|
||||
|
||||
// Filter for container events we care about
|
||||
eventFilters := filters.NewArgs()
|
||||
eventFilters.Add("type", "container")
|
||||
// eventFilters.Add("event", "create")
|
||||
eventFilters.Add("event", "start")
|
||||
eventFilters.Add("event", "stop")
|
||||
// eventFilters.Add("event", "destroy")
|
||||
// eventFilters.Add("event", "die")
|
||||
// eventFilters.Add("event", "pause")
|
||||
// eventFilters.Add("event", "unpause")
|
||||
|
||||
// Start listening for events
|
||||
eventCh, errCh := em.client.Events(em.ctx, events.ListOptions{
|
||||
Filters: eventFilters,
|
||||
})
|
||||
|
||||
go func() {
|
||||
defer func() {
|
||||
if err := em.client.Close(); err != nil {
|
||||
logger.Error("Error closing Docker client: %v", err)
|
||||
}
|
||||
}()
|
||||
|
||||
for {
|
||||
select {
|
||||
case event := <-eventCh:
|
||||
logger.Debug("Docker event received: %s %s for container %s", event.Action, event.Type, event.Actor.ID[:12])
|
||||
|
||||
// Fetch updated container list and trigger callback
|
||||
go em.handleEvent(event)
|
||||
|
||||
case err := <-errCh:
|
||||
if err != nil && err != context.Canceled {
|
||||
logger.Error("Docker event stream error: %v", err)
|
||||
// Try to reconnect after a brief delay
|
||||
time.Sleep(5 * time.Second)
|
||||
if em.ctx.Err() == nil {
|
||||
logger.Info("Attempting to reconnect to Docker event stream")
|
||||
eventCh, errCh = em.client.Events(em.ctx, events.ListOptions{
|
||||
Filters: eventFilters,
|
||||
})
|
||||
}
|
||||
}
|
||||
return
|
||||
|
||||
case <-em.ctx.Done():
|
||||
logger.Info("Docker event monitoring stopped")
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
// handleEvent processes a Docker event and triggers the callback with updated container list
|
||||
func (em *EventMonitor) handleEvent(event events.Message) {
|
||||
// Add a small delay to ensure Docker has fully processed the event
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
containers, err := ListContainers(em.socketPath, em.enforceNetworkValidation)
|
||||
if err != nil {
|
||||
logger.Error("Failed to list containers after Docker event %s: %v", event.Action, err)
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debug("Triggering callback with %d containers after Docker event %s", len(containers), event.Action)
|
||||
em.callback(containers)
|
||||
}
|
||||
|
||||
// Stop stops the event monitoring
|
||||
func (em *EventMonitor) Stop() {
|
||||
logger.Info("Stopping Docker event monitoring")
|
||||
if em.cancel != nil {
|
||||
em.cancel()
|
||||
}
|
||||
}
|
||||
|
||||
898
examples/grafana/dashboards/newt-overview.json
Normal file
898
examples/grafana/dashboards/newt-overview.json
Normal file
@@ -0,0 +1,898 @@
|
||||
{
|
||||
"annotations": {
|
||||
"list": [
|
||||
{
|
||||
"builtIn": 1,
|
||||
"datasource": {
|
||||
"type": "grafana",
|
||||
"uid": "-- Grafana --"
|
||||
},
|
||||
"enable": true,
|
||||
"hide": true,
|
||||
"iconColor": "rgba(0, 211, 255, 1)",
|
||||
"name": "Annotations & Alerts",
|
||||
"type": "dashboard"
|
||||
}
|
||||
]
|
||||
},
|
||||
"editable": true,
|
||||
"fiscalYearStartMonth": 0,
|
||||
"graphTooltip": 0,
|
||||
"id": null,
|
||||
"links": [],
|
||||
"liveNow": false,
|
||||
"panels": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 0,
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 500
|
||||
}
|
||||
]
|
||||
}
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 0,
|
||||
"y": 0
|
||||
},
|
||||
"id": 1,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "go_goroutine_count",
|
||||
"instant": true,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Goroutines",
|
||||
"transformations": [],
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 1,
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 256
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 512
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 6,
|
||||
"y": 0
|
||||
},
|
||||
"id": 2,
|
||||
"options": {
|
||||
"colorMode": "background",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "go_memory_gc_goal_bytes / 1024 / 1024",
|
||||
"format": "time_series",
|
||||
"instant": true,
|
||||
"legendFormat": "",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "GC Target Heap (MiB)",
|
||||
"transformations": [],
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 2,
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 10
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 25
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 12,
|
||||
"y": 0
|
||||
},
|
||||
"id": 3,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(http_server_request_duration_seconds_count[$__rate_interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "req/s",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Requests / s",
|
||||
"transformations": [],
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"color": {
|
||||
"mode": "thresholds"
|
||||
},
|
||||
"decimals": 3,
|
||||
"mappings": [],
|
||||
"thresholds": {
|
||||
"mode": "absolute",
|
||||
"steps": [
|
||||
{
|
||||
"color": "green",
|
||||
"value": null
|
||||
},
|
||||
{
|
||||
"color": "orange",
|
||||
"value": 0.1
|
||||
},
|
||||
{
|
||||
"color": "red",
|
||||
"value": 0.5
|
||||
}
|
||||
]
|
||||
},
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 7,
|
||||
"w": 6,
|
||||
"x": 18,
|
||||
"y": 0
|
||||
},
|
||||
"id": 4,
|
||||
"options": {
|
||||
"colorMode": "value",
|
||||
"graphMode": "area",
|
||||
"justifyMode": "auto",
|
||||
"orientation": "horizontal",
|
||||
"reduceOptions": {
|
||||
"calcs": [
|
||||
"lastNotNull"
|
||||
],
|
||||
"fields": "",
|
||||
"values": false
|
||||
},
|
||||
"textMode": "value_and_name"
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(newt_connection_errors_total{site_id=~\"$site_id\"}[$__rate_interval]))",
|
||||
"instant": false,
|
||||
"legendFormat": "errors/s",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Connection Errors / s",
|
||||
"transformations": [],
|
||||
"type": "stat"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"unit": "bytes"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 7
|
||||
},
|
||||
"id": 5,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(go_memory_used_bytes)",
|
||||
"legendFormat": "Used",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "go_memory_gc_goal_bytes",
|
||||
"legendFormat": "GC Goal",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Go Heap Usage vs GC Goal",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"decimals": 0,
|
||||
"mappings": [],
|
||||
"unit": "short"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 7
|
||||
},
|
||||
"id": 6,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(go_memory_allocations_total[$__rate_interval])",
|
||||
"legendFormat": "Allocations/s",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "rate(go_memory_allocated_bytes_total[$__rate_interval])",
|
||||
"legendFormat": "Allocated bytes/s",
|
||||
"refId": "B"
|
||||
}
|
||||
],
|
||||
"title": "Allocation Activity",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 16
|
||||
},
|
||||
"id": 7,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
|
||||
"legendFormat": "p50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
|
||||
"legendFormat": "p95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(http_server_request_duration_seconds_bucket[$__rate_interval])) by (le))",
|
||||
"legendFormat": "p99",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Request Duration Quantiles",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 16
|
||||
},
|
||||
"id": 8,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(http_server_request_duration_seconds_count[$__rate_interval])) by (http_response_status_code)",
|
||||
"legendFormat": "{{http_response_status_code}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "HTTP Requests by Status",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 25
|
||||
},
|
||||
"id": 9,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(newt_connection_attempts_total{site_id=~\"$site_id\"}[$__rate_interval])) by (transport, result)",
|
||||
"legendFormat": "{{transport}} • {{result}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Connection Attempts by Transport/Result",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"unit": "ops"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 25
|
||||
},
|
||||
"id": 10,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(newt_connection_errors_total{site_id=~\"$site_id\"}[$__rate_interval])) by (transport, error_type)",
|
||||
"legendFormat": "{{transport}} • {{error_type}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Connection Errors by Type",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"decimals": 3,
|
||||
"mappings": [],
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 0,
|
||||
"y": 34
|
||||
},
|
||||
"id": 11,
|
||||
"options": {
|
||||
"legend": {
|
||||
"calcs": [],
|
||||
"displayMode": "table",
|
||||
"placement": "right"
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "multi",
|
||||
"sort": "desc"
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.5, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
|
||||
"legendFormat": "p50",
|
||||
"refId": "A"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.95, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
|
||||
"legendFormat": "p95",
|
||||
"refId": "B"
|
||||
},
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "histogram_quantile(0.99, sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le))",
|
||||
"legendFormat": "p99",
|
||||
"refId": "C"
|
||||
}
|
||||
],
|
||||
"title": "Tunnel Latency Quantiles",
|
||||
"type": "timeseries"
|
||||
},
|
||||
{
|
||||
"cards": {},
|
||||
"color": {
|
||||
"cardColor": "#b4ff00",
|
||||
"colorScale": "sqrt",
|
||||
"colorScheme": "interpolateTurbo"
|
||||
},
|
||||
"dataFormat": "tsbuckets",
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"fieldConfig": {
|
||||
"defaults": {
|
||||
"custom": {},
|
||||
"mappings": [],
|
||||
"unit": "s"
|
||||
},
|
||||
"overrides": []
|
||||
},
|
||||
"gridPos": {
|
||||
"h": 9,
|
||||
"w": 12,
|
||||
"x": 12,
|
||||
"y": 34
|
||||
},
|
||||
"heatmap": {},
|
||||
"hideZeroBuckets": true,
|
||||
"id": 12,
|
||||
"legend": {
|
||||
"show": false
|
||||
},
|
||||
"options": {
|
||||
"calculate": true,
|
||||
"cellGap": 2,
|
||||
"cellSize": "auto",
|
||||
"color": {
|
||||
"exponent": 0.5
|
||||
},
|
||||
"exemplars": {
|
||||
"color": "rgba(255,255,255,0.7)"
|
||||
},
|
||||
"filterValues": {
|
||||
"le": 1e-9
|
||||
},
|
||||
"legend": {
|
||||
"show": false
|
||||
},
|
||||
"tooltip": {
|
||||
"mode": "single",
|
||||
"show": true
|
||||
},
|
||||
"xAxis": {
|
||||
"show": true
|
||||
},
|
||||
"yAxis": {
|
||||
"decimals": 3,
|
||||
"show": true
|
||||
}
|
||||
},
|
||||
"pluginVersion": "11.1.0",
|
||||
"targets": [
|
||||
{
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"editorMode": "code",
|
||||
"expr": "sum(rate(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\", tunnel_id=~\"$tunnel_id\"}[$__rate_interval])) by (le)",
|
||||
"format": "heatmap",
|
||||
"legendFormat": "{{le}}",
|
||||
"refId": "A"
|
||||
}
|
||||
],
|
||||
"title": "Tunnel Latency Bucket Rate",
|
||||
"type": "heatmap"
|
||||
}
|
||||
],
|
||||
"refresh": "30s",
|
||||
"schemaVersion": 39,
|
||||
"style": "dark",
|
||||
"tags": [
|
||||
"newt",
|
||||
"otel"
|
||||
],
|
||||
"templating": {
|
||||
"list": [
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "Prometheus",
|
||||
"value": "prometheus"
|
||||
},
|
||||
"hide": 0,
|
||||
"label": "Datasource",
|
||||
"name": "DS_PROMETHEUS",
|
||||
"options": [],
|
||||
"query": "prometheus",
|
||||
"refresh": 1,
|
||||
"type": "datasource"
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"definition": "label_values(target_info, site_id)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Site",
|
||||
"multi": true,
|
||||
"name": "site_id",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(target_info, site_id)",
|
||||
"refId": "SiteIdVar"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
},
|
||||
{
|
||||
"current": {
|
||||
"selected": false,
|
||||
"text": "All",
|
||||
"value": "$__all"
|
||||
},
|
||||
"datasource": {
|
||||
"type": "prometheus",
|
||||
"uid": "prometheus"
|
||||
},
|
||||
"definition": "label_values(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\"}, tunnel_id)",
|
||||
"hide": 0,
|
||||
"includeAll": true,
|
||||
"label": "Tunnel",
|
||||
"multi": true,
|
||||
"name": "tunnel_id",
|
||||
"options": [],
|
||||
"query": {
|
||||
"query": "label_values(newt_tunnel_latency_seconds_bucket{site_id=~\"$site_id\"}, tunnel_id)",
|
||||
"refId": "TunnelVar"
|
||||
},
|
||||
"refresh": 2,
|
||||
"regex": "",
|
||||
"skipUrlSync": false,
|
||||
"sort": 1,
|
||||
"tagValuesQuery": "",
|
||||
"tags": [],
|
||||
"tagsQuery": "",
|
||||
"type": "query",
|
||||
"useTags": false
|
||||
}
|
||||
]
|
||||
},
|
||||
"time": {
|
||||
"from": "now-6h",
|
||||
"to": "now"
|
||||
},
|
||||
"timepicker": {
|
||||
"refresh_intervals": [
|
||||
"10s",
|
||||
"30s",
|
||||
"1m",
|
||||
"5m",
|
||||
"15m",
|
||||
"30m",
|
||||
"1h",
|
||||
"2h",
|
||||
"1d"
|
||||
],
|
||||
"time_options": [
|
||||
"5m",
|
||||
"15m",
|
||||
"1h",
|
||||
"6h",
|
||||
"12h",
|
||||
"24h",
|
||||
"2d",
|
||||
"7d",
|
||||
"30d"
|
||||
]
|
||||
},
|
||||
"timezone": "browser",
|
||||
"title": "Newt Overview",
|
||||
"uid": "newt-overview",
|
||||
"version": 1,
|
||||
"weekStart": ""
|
||||
}
|
||||
9
examples/grafana/provisioning/dashboards/dashboard.yaml
Normal file
9
examples/grafana/provisioning/dashboards/dashboard.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
providers:
|
||||
- name: "newt"
|
||||
folder: "Newt"
|
||||
type: file
|
||||
disableDeletion: false
|
||||
allowUiUpdates: true
|
||||
options:
|
||||
path: /var/lib/grafana/dashboards
|
||||
@@ -0,0 +1,9 @@
|
||||
apiVersion: 1
|
||||
datasources:
|
||||
- name: Prometheus
|
||||
type: prometheus
|
||||
access: proxy
|
||||
url: http://prometheus:9090
|
||||
uid: prometheus
|
||||
isDefault: true
|
||||
editable: true
|
||||
61
examples/otel-collector.yaml
Normal file
61
examples/otel-collector.yaml
Normal file
@@ -0,0 +1,61 @@
|
||||
# Variant A: Direct scrape of Newt (/metrics) via Prometheus (no Collector needed)
|
||||
# Note: Newt already exposes labels like site_id, protocol, direction. Do not promote
|
||||
# resource attributes into labels when scraping Newt directly.
|
||||
#
|
||||
# Example Prometheus scrape config:
|
||||
# global:
|
||||
# scrape_interval: 15s
|
||||
# scrape_configs:
|
||||
# - job_name: newt
|
||||
# static_configs:
|
||||
# - targets: ["newt:2112"]
|
||||
#
|
||||
# Variant B: Use OTEL Collector (Newt -> OTLP -> Collector -> Prometheus)
|
||||
# This pipeline scrapes metrics from the Collector's Prometheus exporter.
|
||||
# Labels are already on datapoints; promotion from resource is OPTIONAL and typically NOT required.
|
||||
# If you enable transform/promote below, ensure you do not duplicate labels.
|
||||
|
||||
receivers:
|
||||
otlp:
|
||||
protocols:
|
||||
grpc:
|
||||
endpoint: ":4317"
|
||||
|
||||
processors:
|
||||
memory_limiter:
|
||||
check_interval: 5s
|
||||
limit_percentage: 80
|
||||
spike_limit_percentage: 25
|
||||
resourcedetection:
|
||||
detectors: [env, system]
|
||||
timeout: 5s
|
||||
batch: {}
|
||||
# OPTIONAL: Only enable if you need to promote resource attributes to labels.
|
||||
# WARNING: Newt already provides site_id as a label; avoid double-promotion.
|
||||
# transform/promote:
|
||||
# error_mode: ignore
|
||||
# metric_statements:
|
||||
# - context: datapoint
|
||||
# statements:
|
||||
# - set(attributes["service_instance_id"], resource.attributes["service.instance.id"]) where resource.attributes["service.instance.id"] != nil
|
||||
# - set(attributes["site_id"], resource.attributes["site_id"]) where resource.attributes["site_id"] != nil
|
||||
|
||||
exporters:
|
||||
prometheus:
|
||||
endpoint: ":8889"
|
||||
send_timestamps: true
|
||||
# prometheusremotewrite:
|
||||
# endpoint: http://mimir:9009/api/v1/push
|
||||
debug:
|
||||
verbosity: basic
|
||||
|
||||
service:
|
||||
pipelines:
|
||||
metrics:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, resourcedetection, batch] # add transform/promote if you really need it
|
||||
exporters: [prometheus]
|
||||
traces:
|
||||
receivers: [otlp]
|
||||
processors: [memory_limiter, resourcedetection, batch]
|
||||
exporters: [debug]
|
||||
16
examples/prometheus.with-collector.yml
Normal file
16
examples/prometheus.with-collector.yml
Normal file
@@ -0,0 +1,16 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
# IMPORTANT: Do not scrape Newt directly; scrape only the Collector!
|
||||
- job_name: 'otel-collector'
|
||||
static_configs:
|
||||
- targets: ['otel-collector:8889']
|
||||
|
||||
# optional: limit metric cardinality
|
||||
relabel_configs:
|
||||
- action: labeldrop
|
||||
regex: 'tunnel_id'
|
||||
# - action: keep
|
||||
# source_labels: [site_id]
|
||||
# regex: '(site-a|site-b)'
|
||||
21
examples/prometheus.yml
Normal file
21
examples/prometheus.yml
Normal file
@@ -0,0 +1,21 @@
|
||||
global:
|
||||
scrape_interval: 15s
|
||||
|
||||
scrape_configs:
|
||||
- job_name: 'newt'
|
||||
scrape_interval: 15s
|
||||
static_configs:
|
||||
- targets: ['newt:2112'] # /metrics
|
||||
relabel_configs:
|
||||
# optional: drop tunnel_id
|
||||
- action: labeldrop
|
||||
regex: 'tunnel_id'
|
||||
# optional: allow only specific sites
|
||||
- action: keep
|
||||
source_labels: [site_id]
|
||||
regex: '(site-a|site-b)'
|
||||
|
||||
# WARNING: Do not enable this together with the 'newt' job above or you will double-count.
|
||||
# - job_name: 'otel-collector'
|
||||
# static_configs:
|
||||
# - targets: ['otel-collector:8889']
|
||||
60
go.mod
60
go.mod
@@ -3,32 +3,49 @@ module github.com/fosrl/newt
|
||||
go 1.25
|
||||
|
||||
require (
|
||||
github.com/docker/docker v28.3.3+incompatible
|
||||
github.com/docker/docker v28.5.1+incompatible
|
||||
github.com/google/gopacket v1.1.19
|
||||
github.com/gorilla/websocket v1.5.3
|
||||
github.com/prometheus/client_golang v1.23.2
|
||||
github.com/vishvananda/netlink v1.3.1
|
||||
golang.org/x/crypto v0.41.0
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0
|
||||
go.opentelemetry.io/otel v1.38.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0
|
||||
go.opentelemetry.io/otel/metric v1.38.0
|
||||
go.opentelemetry.io/otel/sdk v1.38.0
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0
|
||||
golang.org/x/crypto v0.43.0
|
||||
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792
|
||||
golang.org/x/net v0.43.0
|
||||
golang.org/x/net v0.46.0
|
||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10
|
||||
google.golang.org/grpc v1.76.0
|
||||
gopkg.in/yaml.v3 v3.0.1
|
||||
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c
|
||||
software.sslmate.com/src/go-pkcs12 v0.6.0
|
||||
)
|
||||
|
||||
require (
|
||||
github.com/Microsoft/go-winio v0.6.2 // indirect
|
||||
github.com/containerd/errdefs v1.0.0 // indirect
|
||||
github.com/Microsoft/go-winio v0.6.0 // indirect
|
||||
github.com/beorn7/perks v1.0.1 // indirect
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 // indirect
|
||||
github.com/cespare/xxhash/v2 v2.3.0 // indirect
|
||||
github.com/containerd/errdefs v0.3.0 // indirect
|
||||
github.com/containerd/errdefs/pkg v0.3.0 // indirect
|
||||
github.com/distribution/reference v0.6.0 // indirect
|
||||
github.com/docker/go-connections v0.5.0 // indirect
|
||||
github.com/docker/go-units v0.5.0 // indirect
|
||||
github.com/docker/go-connections v0.6.0 // indirect
|
||||
github.com/docker/go-units v0.4.0 // indirect
|
||||
github.com/felixge/httpsnoop v1.0.4 // indirect
|
||||
github.com/go-logr/logr v1.4.3 // indirect
|
||||
github.com/go-logr/stdr v1.2.2 // indirect
|
||||
github.com/gogo/protobuf v1.3.2 // indirect
|
||||
github.com/google/btree v1.1.3 // indirect
|
||||
github.com/google/go-cmp v0.7.0 // indirect
|
||||
github.com/google/uuid v1.6.0 // indirect
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc // indirect
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 // indirect
|
||||
github.com/josharian/native v1.1.0 // indirect
|
||||
github.com/mdlayher/genetlink v1.3.2 // indirect
|
||||
github.com/mdlayher/netlink v1.7.2 // indirect
|
||||
@@ -37,18 +54,29 @@ require (
|
||||
github.com/moby/sys/atomicwriter v0.1.0 // indirect
|
||||
github.com/moby/term v0.5.2 // indirect
|
||||
github.com/morikuni/aec v1.0.0 // indirect
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 // indirect
|
||||
github.com/opencontainers/go-digest v1.0.0 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.1 // indirect
|
||||
github.com/opencontainers/image-spec v1.1.0 // indirect
|
||||
github.com/pkg/errors v0.9.1 // indirect
|
||||
github.com/prometheus/client_model v0.6.2 // indirect
|
||||
github.com/prometheus/common v0.66.1 // indirect
|
||||
github.com/prometheus/otlptranslator v0.0.2 // indirect
|
||||
github.com/prometheus/procfs v0.17.0 // indirect
|
||||
github.com/vishvananda/netns v0.0.5 // indirect
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 // indirect
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 // indirect
|
||||
go.opentelemetry.io/otel v1.37.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 // indirect
|
||||
go.opentelemetry.io/otel/metric v1.37.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.37.0 // indirect
|
||||
golang.org/x/sync v0.16.0 // indirect
|
||||
golang.org/x/sys v0.35.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 // indirect
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0 // indirect
|
||||
go.opentelemetry.io/otel/trace v1.38.0 // indirect
|
||||
go.opentelemetry.io/proto/otlp v1.7.1 // indirect
|
||||
go.yaml.in/yaml/v2 v2.4.2 // indirect
|
||||
golang.org/x/mod v0.28.0 // indirect
|
||||
golang.org/x/sync v0.17.0 // indirect
|
||||
golang.org/x/sys v0.37.0 // indirect
|
||||
golang.org/x/text v0.30.0 // indirect
|
||||
golang.org/x/time v0.12.0 // indirect
|
||||
golang.org/x/tools v0.37.0 // indirect
|
||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 // indirect
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 // indirect
|
||||
google.golang.org/protobuf v1.36.8 // indirect
|
||||
)
|
||||
|
||||
182
go.sum
182
go.sum
@@ -1,26 +1,23 @@
|
||||
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c h1:udKWzYgxTojEKWjV8V+WSxDXJ4NFATAsZjh8iIbsQIg=
|
||||
github.com/Azure/go-ansiterm v0.0.0-20250102033503-faa5f7b0171c/go.mod h1:xomTg63KZ2rFqZQzSB4Vz2SUXa1BpHTVz9L5PTmPC4E=
|
||||
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
|
||||
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
|
||||
github.com/cenkalti/backoff v2.2.1+incompatible h1:tNowT99t7UNflLxfYYSlKYsBpXdEet03Pg2g16Swow4=
|
||||
github.com/cenkalti/backoff/v5 v5.0.2 h1:rIfFVxEf1QsI7E1ZHfp/B4DF/6QBAUhmgkxc0H7Zss8=
|
||||
github.com/cenkalti/backoff/v5 v5.0.2/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||
github.com/containerd/errdefs v1.0.0 h1:tg5yIfIlQIrxYtu9ajqY42W3lpS19XqdxRQeEwYG8PI=
|
||||
github.com/containerd/errdefs v1.0.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
|
||||
github.com/Microsoft/go-winio v0.6.0 h1:slsWYD/zyx7lCXoZVlvQrj0hPTM1HI4+v1sIda2yDvg=
|
||||
github.com/Microsoft/go-winio v0.6.0/go.mod h1:cTAf44im0RAYeL23bpB+fzCyDH2MJiz2BO69KH/soAE=
|
||||
github.com/beorn7/perks v1.0.1 h1:VlbKKnNfV8bJzeqoa4cOKqO6bYr3WgKZxO8Z16+hsOM=
|
||||
github.com/beorn7/perks v1.0.1/go.mod h1:G2ZrVWU2WbWT9wwq4/hrbKbnv/1ERSJQ0ibhJ6rlkpw=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3 h1:ZN+IMa753KfX5hd8vVaMixjnqRZ3y8CuJKRKj1xcsSM=
|
||||
github.com/cenkalti/backoff/v5 v5.0.3/go.mod h1:rkhZdG3JZukswDf7f0cwqPNk4K0sa+F97BxZthm/crw=
|
||||
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
|
||||
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
|
||||
github.com/containerd/errdefs v0.3.0 h1:FSZgGOeK4yuT/+DnF07/Olde/q4KBoMsaamhXxIMDp4=
|
||||
github.com/containerd/errdefs v0.3.0/go.mod h1:+YBYIdtsnF4Iw6nWZhJcqGSg/dwvV7tyJ/kCkyJ2k+M=
|
||||
github.com/containerd/errdefs/pkg v0.3.0 h1:9IKJ06FvyNlexW690DXuQNx2KA2cUJXx151Xdx3ZPPE=
|
||||
github.com/containerd/errdefs/pkg v0.3.0/go.mod h1:NJw6s9HwNuRhnjJhM7pylWwMyAkmCQvQ4GpJHEqRLVk=
|
||||
github.com/containerd/log v0.1.0 h1:TCJt7ioM2cr/tfR8GPbGf9/VRAX8D2B4PjzCpfX540I=
|
||||
github.com/containerd/log v0.1.0/go.mod h1:VRRf09a7mHDIRezVKTRCrOq78v577GXq3bSa3EhrzVo=
|
||||
github.com/davecgh/go-spew v1.1.1 h1:vj9j/u1bqnvCEfJOwUhtlOARqs3+rkHYY13jYWTU97c=
|
||||
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
|
||||
github.com/distribution/reference v0.6.0 h1:0IXCQ5g4/QMHHkarYzh5l+u8T3t73zM5QvfrDyIgxBk=
|
||||
github.com/distribution/reference v0.6.0/go.mod h1:BbU0aIcezP1/5jX/8MP0YiH4SdvB5Y4f/wlDRiLyi3E=
|
||||
github.com/docker/docker v28.3.3+incompatible h1:Dypm25kh4rmk49v1eiVbsAtpAsYURjYkaKubwuBdxEI=
|
||||
github.com/docker/docker v28.3.3+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.5.0 h1:USnMq7hx7gwdVZq1L49hLXaFtUdTADjXGp+uj1Br63c=
|
||||
github.com/docker/go-connections v0.5.0/go.mod h1:ov60Kzw0kKElRwhNs9UlUHAE/F9Fe6GLaXnqyDdmEXc=
|
||||
github.com/docker/go-units v0.5.0 h1:69rxXcBk27SvSaaxTtLh/8llcHD8vYHT7WSdRZ/jvr4=
|
||||
github.com/docker/go-units v0.5.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/docker/docker v28.5.1+incompatible h1:Bm8DchhSD2J6PsFzxC35TZo4TLGR2PdW/E69rU45NhM=
|
||||
github.com/docker/docker v28.5.1+incompatible/go.mod h1:eEKB0N0r5NX/I1kEveEz05bcu8tLC/8azJZsviup8Sk=
|
||||
github.com/docker/go-connections v0.6.0 h1:LlMG9azAe1TqfR7sO+NJttz1gy6KO7VJBh+pMmjSD94=
|
||||
github.com/docker/go-connections v0.6.0/go.mod h1:AahvXYshr6JgfUJGdDCs2b5EZG/vmaMAntpSFH5BFKE=
|
||||
github.com/docker/go-units v0.4.0 h1:3uh0PgVws3nIA0Q+MwDC8yjEPf9zjRfZZWXZYDct3Tw=
|
||||
github.com/docker/go-units v0.4.0/go.mod h1:fgPhTUdO+D/Jk86RDLlptpiXQzgHJF7gydDDbaIK4Dk=
|
||||
github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2Wg=
|
||||
github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U=
|
||||
github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A=
|
||||
@@ -28,8 +25,6 @@ github.com/go-logr/logr v1.4.3 h1:CjnDlHq8ikf6E492q6eKboGOC0T8CDaOvkHCIg8idEI=
|
||||
github.com/go-logr/logr v1.4.3/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
|
||||
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
|
||||
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
|
||||
github.com/gogo/protobuf v1.3.2 h1:Ov1cvc58UF3b5XjBnZv7+opcTcQFZebYjWzi34vdm4Q=
|
||||
github.com/gogo/protobuf v1.3.2/go.mod h1:P1XiOD3dCwIKUDQYPy72D8LYyHL2YPYrpS2s69NZV8Q=
|
||||
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
|
||||
github.com/google/btree v1.1.3/go.mod h1:qOPhT0dTNdNzV6Z/lhRX0YXUafgPLFUh+gZMl761Gm4=
|
||||
github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8=
|
||||
@@ -40,131 +35,122 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0=
|
||||
github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo=
|
||||
github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aNNg=
|
||||
github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3 h1:5ZPtiqj0JL5oKWmcsq4VMaAW5ukBEgSGXEN89zeH1Jo=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.26.3/go.mod h1:ndYquD05frm2vACXE1nsccT4oJzjhw2arTS2cpUD1PI=
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc h1:GN2Lv3MGO7AS6PrRoT6yV5+wkrOpcszoIsO4+4ds248=
|
||||
github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc/go.mod h1:+JKpmjMGhpgPL+rXZ5nsZieVzvarn86asRlBg4uNGnk=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2 h1:8Tjv8EJ+pM1xP8mK6egEbD1OgnVTyacbefKhmbLhIhU=
|
||||
github.com/grpc-ecosystem/grpc-gateway/v2 v2.27.2/go.mod h1:pkJQ2tZHJ0aFOVEEot6oZmaVEZcRme73eIFmhiVuRWs=
|
||||
github.com/josharian/native v1.1.0 h1:uuaP0hAbW7Y4l0ZRQ6C9zfb7Mg1mbFKry/xzDAfmtLA=
|
||||
github.com/josharian/native v1.1.0/go.mod h1:7X/raswPFr05uY3HiLlYeyQntB6OO7E/d2Cu7qoaN2w=
|
||||
github.com/kisielk/errcheck v1.5.0/go.mod h1:pFxgyoBC7bSaBwPgfKdkLd5X25qrDl4LWUI2bnpBCr8=
|
||||
github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck=
|
||||
github.com/mdlayher/genetlink v1.3.2 h1:KdrNKe+CTu+IbZnm/GVUMXSqBBLqcGpRDa0xkQy56gw=
|
||||
github.com/mdlayher/genetlink v1.3.2/go.mod h1:tcC3pkCrPUGIKKsCsp0B3AdaaKuHtaxoJRz3cc+528o=
|
||||
github.com/mdlayher/netlink v1.7.2 h1:/UtM3ofJap7Vl4QWCPDGXY8d3GIY2UGSDbK+QWmY8/g=
|
||||
github.com/mdlayher/netlink v1.7.2/go.mod h1:xraEF7uJbxLhc5fpHL4cPe221LI2bdttWlU+ZGLfQSw=
|
||||
github.com/mdlayher/socket v0.5.1 h1:VZaqt6RkGkt2OE9l3GcC6nZkqD3xKeQLyfleW/uBcos=
|
||||
github.com/mdlayher/socket v0.5.1/go.mod h1:TjPLHI1UgwEv5J1B5q0zTZq12A/6H7nKmtTanQE37IQ=
|
||||
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721 h1:RlZweED6sbSArvlE924+mUcZuXKLBHA35U7LN621Bws=
|
||||
github.com/mikioh/ipaddr v0.0.0-20190404000644-d465c8ab6721/go.mod h1:Ickgr2WtCLZ2MDGd4Gr0geeCH5HybhRJbonOgQpvSxc=
|
||||
github.com/moby/docker-image-spec v1.3.1 h1:jMKff3w6PgbfSa69GfNg+zN/XLhfXJGnEx3Nl2EsFP0=
|
||||
github.com/moby/docker-image-spec v1.3.1/go.mod h1:eKmb5VW8vQEh/BAr2yvVNvuiJuY6UIocYsFu/DxxRpo=
|
||||
github.com/moby/sys/atomicwriter v0.1.0 h1:kw5D/EqkBwsBFi0ss9v1VG3wIkVhzGvLklJ+w3A14Sw=
|
||||
github.com/moby/sys/atomicwriter v0.1.0/go.mod h1:Ul8oqv2ZMNHOceF643P6FKPXeCmYtlQMvpizfsSoaWs=
|
||||
github.com/moby/sys/sequential v0.6.0 h1:qrx7XFUd/5DxtqcoH1h438hF5TmOvzC/lspjy7zgvCU=
|
||||
github.com/moby/sys/sequential v0.6.0/go.mod h1:uyv8EUTrca5PnDsdMGXhZe6CCe8U/UiTWd+lL+7b/Ko=
|
||||
github.com/moby/term v0.5.2 h1:6qk3FJAFDs6i/q3W/pQ97SX192qKfZgGjCQqfCJkgzQ=
|
||||
github.com/moby/term v0.5.2/go.mod h1:d3djjFCrjnB+fl8NJux+EJzu0msscUP+f8it8hPkFLc=
|
||||
github.com/morikuni/aec v1.0.0 h1:nP9CBfwrvYnBRgY6qfDQkygYDmYwOilePFkwzv4dU8A=
|
||||
github.com/morikuni/aec v1.0.0/go.mod h1:BbKIizmSmc5MMPqRYbxO4ZU0S0+P200+tUnFx7PXmsc=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822 h1:C3w9PqII01/Oq1c1nUAm88MOHcQC9l5mIlSMApZMrHA=
|
||||
github.com/munnerz/goautoneg v0.0.0-20191010083416-a7dc8b61c822/go.mod h1:+n7T8mK8HuQTcFwEeznm/DIxMOiR9yIdICNftLE1DvQ=
|
||||
github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8Oi/yOhh5U=
|
||||
github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM=
|
||||
github.com/opencontainers/image-spec v1.1.1 h1:y0fUlFfIZhPF1W537XOLg0/fcx6zcHCJwooC2xJA040=
|
||||
github.com/opencontainers/image-spec v1.1.1/go.mod h1:qpqAh3Dmcf36wStyyWU+kCeDgrGnAve2nCC8+7h8Q0M=
|
||||
github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug=
|
||||
github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM=
|
||||
github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4=
|
||||
github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0=
|
||||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
|
||||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
|
||||
github.com/sirupsen/logrus v1.9.3 h1:dueUQJ1C2q9oE3F7wvmSGAaVtTmUizReu6fjN8uqzbQ=
|
||||
github.com/sirupsen/logrus v1.9.3/go.mod h1:naHLuLoDiP4jHNo9R0sCBMtWGeIprob74mVsIT4qYEQ=
|
||||
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
|
||||
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
|
||||
github.com/prometheus/client_golang v1.23.2 h1:Je96obch5RDVy3FDMndoUsjAhG5Edi49h0RJWRi/o0o=
|
||||
github.com/prometheus/client_golang v1.23.2/go.mod h1:Tb1a6LWHB3/SPIzCoaDXI4I8UHKeFTEQ1YCr+0Gyqmg=
|
||||
github.com/prometheus/client_model v0.6.2 h1:oBsgwpGs7iVziMvrGhE53c/GrLUsZdHnqNwqPLxwZyk=
|
||||
github.com/prometheus/client_model v0.6.2/go.mod h1:y3m2F6Gdpfy6Ut/GBsUqTWZqCUvMVzSfMLjcu6wAwpE=
|
||||
github.com/prometheus/common v0.66.1 h1:h5E0h5/Y8niHc5DlaLlWLArTQI7tMrsfQjHV+d9ZoGs=
|
||||
github.com/prometheus/common v0.66.1/go.mod h1:gcaUsgf3KfRSwHY4dIMXLPV0K/Wg1oZ8+SbZk/HH/dA=
|
||||
github.com/prometheus/otlptranslator v0.0.2 h1:+1CdeLVrRQ6Psmhnobldo0kTp96Rj80DRXRd5OSnMEQ=
|
||||
github.com/prometheus/otlptranslator v0.0.2/go.mod h1:P8AwMgdD7XEr6QRUJ2QWLpiAZTgTE2UYgjlu3svompI=
|
||||
github.com/prometheus/procfs v0.17.0 h1:FuLQ+05u4ZI+SS/w9+BWEM2TXiHKsUQ9TADiRH7DuK0=
|
||||
github.com/prometheus/procfs v0.17.0/go.mod h1:oPQLaDAMRbA+u8H5Pbfq+dl3VDAvHxMUOVhe0wYB2zw=
|
||||
github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW6bV0=
|
||||
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
|
||||
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
|
||||
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
|
||||
github.com/yuin/goldmark v1.1.27/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0 h1:cH53jehLUN6UFLY71z+NDOiNJqDdPRaXzTel0sJySYA=
|
||||
go.opentelemetry.io/auto/sdk v1.1.0/go.mod h1:3wSPjt5PWp2RhlCcmmOial7AvC4DQqZb7a7wCow3W8A=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0 h1:Hf9xI/XLML9ElpiHVDNwvqI0hIFlzV8dgIr35kV1kRU=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.62.0/go.mod h1:NfchwuyNoMcZ5MLHwPrODwUF1HWCXWrL31s8gSAdIKY=
|
||||
go.opentelemetry.io/otel v1.37.0 h1:9zhNfelUvx0KBfu/gb+ZgeAfAgtWrfHJZcAqFC228wQ=
|
||||
go.opentelemetry.io/otel v1.37.0/go.mod h1:ehE/umFRLnuLa/vSccNq9oS1ErUlkkK71gMcN34UG8I=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0 h1:dNzwXjZKpMpE2JhmO+9HsPl42NIXFIFSUSSs0fiqra0=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.36.0/go.mod h1:90PoxvaEB5n6AOdZvi+yWJQoE95U8Dhhw2bSyRqnTD0=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0 h1:nRVXXvf78e00EwY6Wp0YII8ww2JVWshZ20HfTlE11AM=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.36.0/go.mod h1:r49hO7CgrxY9Voaj3Xe8pANWtr0Oq916d0XAmOoCZAQ=
|
||||
go.opentelemetry.io/otel/metric v1.37.0 h1:mvwbQS5m0tbmqML4NqK+e3aDiO02vsf/WgbsdpcPoZE=
|
||||
go.opentelemetry.io/otel/metric v1.37.0/go.mod h1:04wGrZurHYKOc+RKeye86GwKiTb9FKm1WHtO+4EVr2E=
|
||||
go.opentelemetry.io/otel/sdk v1.37.0 h1:ItB0QUqnjesGRvNcmAcU0LyvkVyGJ2xftD29bWdDvKI=
|
||||
go.opentelemetry.io/otel/sdk v1.37.0/go.mod h1:VredYzxUvuo2q3WRcDnKDjbdvmO0sCzOvVAiY+yUkAg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.37.0 h1:90lI228XrB9jCMuSdA0673aubgRobVZFhbjxHHspCPc=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.37.0/go.mod h1:cNen4ZWfiD37l5NhS+Keb5RXVWZWpRE+9WyVCpbo5ps=
|
||||
go.opentelemetry.io/otel/trace v1.37.0 h1:HLdcFNbRQBE2imdSEgm/kwqmQj1Or1l/7bW6mxVK7z4=
|
||||
go.opentelemetry.io/otel/trace v1.37.0/go.mod h1:TlgrlQ+PtQO5XFerSPUYG0JSgGyryXewPGyayAWSBS0=
|
||||
go.opentelemetry.io/proto/otlp v1.6.0 h1:jQjP+AQyTf+Fe7OKj/MfkDrmK4MNVtw2NpXsf9fefDI=
|
||||
go.opentelemetry.io/proto/otlp v1.6.0/go.mod h1:cicgGehlFuNdgZkcALOCh3VE6K/u2tAjzlRhDwmVpZc=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0 h1:RbKq8BG0FI8OiXhBfcRtqqHcZcka+gU3cskNuf05R18=
|
||||
go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.63.0/go.mod h1:h06DGIukJOevXaj/xrNjhi/2098RZzcLTbc0jDAUbsg=
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0 h1:PeBoRj6af6xMI7qCupwFvTbbnd49V7n5YpG6pg8iDYQ=
|
||||
go.opentelemetry.io/contrib/instrumentation/runtime v0.63.0/go.mod h1:ingqBCtMCe8I4vpz/UVzCW6sxoqgZB37nao91mLQ3Bw=
|
||||
go.opentelemetry.io/otel v1.38.0 h1:RkfdswUDRimDg0m2Az18RKOsnI8UDzppJAtj01/Ymk8=
|
||||
go.opentelemetry.io/otel v1.38.0/go.mod h1:zcmtmQ1+YmQM9wrNsTGV/q/uyusom3P8RxwExxkZhjM=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0 h1:vl9obrcoWVKp/lwl8tRE33853I8Xru9HFbw/skNeLs8=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc v1.38.0/go.mod h1:GAXRxmLJcVM3u22IjTg74zWBrRCKq8BnOqUVLodpcpw=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0 h1:GqRJVj7UmLjCVyVJ3ZFLdPRmhDUp2zFmQe3RHIOsw24=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace v1.38.0/go.mod h1:ri3aaHSmCTVYu2AWv44YMauwAQc0aqI9gHKIcSbI1pU=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0 h1:lwI4Dc5leUqENgGuQImwLo4WnuXFPetmPpkLi2IrX54=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc v1.38.0/go.mod h1:Kz/oCE7z5wuyhPxsXDuaPteSWqjSBD5YaSdbxZYGbGk=
|
||||
go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracehttp v1.38.0/go.mod h1:kldtb7jDTeol0l3ewcmd8SDvx3EmIE7lyvqbasU3QC4=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0 h1:cGtQxGvZbnrWdC2GyjZi0PDKVSLWP/Jocix3QWfXtbo=
|
||||
go.opentelemetry.io/otel/exporters/prometheus v0.60.0/go.mod h1:hkd1EekxNo69PTV4OWFGZcKQiIqg0RfuWExcPKFvepk=
|
||||
go.opentelemetry.io/otel/metric v1.38.0 h1:Kl6lzIYGAh5M159u9NgiRkmoMKjvbsKtYRwgfrA6WpA=
|
||||
go.opentelemetry.io/otel/metric v1.38.0/go.mod h1:kB5n/QoRM8YwmUahxvI3bO34eVtQf2i4utNVLr9gEmI=
|
||||
go.opentelemetry.io/otel/sdk v1.38.0 h1:l48sr5YbNf2hpCUj/FoGhW9yDkl+Ma+LrVl8qaM5b+E=
|
||||
go.opentelemetry.io/otel/sdk v1.38.0/go.mod h1:ghmNdGlVemJI3+ZB5iDEuk4bWA3GkTpW+DOoZMYBVVg=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0 h1:aSH66iL0aZqo//xXzQLYozmWrXxyFkBJ6qT5wthqPoM=
|
||||
go.opentelemetry.io/otel/sdk/metric v1.38.0/go.mod h1:dg9PBnW9XdQ1Hd6ZnRz689CbtrUp0wMMs9iPcgT9EZA=
|
||||
go.opentelemetry.io/otel/trace v1.38.0 h1:Fxk5bKrDZJUH+AMyyIXGcFAPah0oRcT+LuNtJrmcNLE=
|
||||
go.opentelemetry.io/otel/trace v1.38.0/go.mod h1:j1P9ivuFsTceSWe1oY+EeW3sc+Pp42sO++GHkg4wwhs=
|
||||
go.opentelemetry.io/proto/otlp v1.7.1 h1:gTOMpGDb0WTBOP8JaO72iL3auEZhVmAQg4ipjOVAtj4=
|
||||
go.opentelemetry.io/proto/otlp v1.7.1/go.mod h1:b2rVh6rfI/s2pHWNlB7ILJcRALpcNDzKhACevjI+ZnE=
|
||||
go.yaml.in/yaml/v2 v2.4.2 h1:DzmwEr2rDGHl7lsFgAHxmNz/1NlQ7xLIrlN2h5d1eGI=
|
||||
go.yaml.in/yaml/v2 v2.4.2/go.mod h1:081UH+NErpNdqlCXm3TtEran0rJZGxAYx9hb/ELlsPU=
|
||||
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
|
||||
golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI=
|
||||
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
|
||||
golang.org/x/crypto v0.41.0 h1:WKYxWedPGCTVVl5+WHSSrOBT0O8lx32+zxmHxijgXp4=
|
||||
golang.org/x/crypto v0.41.0/go.mod h1:pO5AFd7FA68rFak7rOAGVuygIISepHftHnr8dr6+sUc=
|
||||
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792 h1:R9PFI6EUdfVKgwKjZef7QIwGcBKu86OEFpJ9nUEP2l4=
|
||||
golang.org/x/crypto v0.43.0 h1:dduJYIi3A3KOfdGOHX8AVZ/jGiyPa3IbBozJ5kNuE04=
|
||||
golang.org/x/crypto v0.43.0/go.mod h1:BFbav4mRNlXJL4wNeejLpWxB7wMbc79PdRGhWKncxR0=
|
||||
golang.org/x/exp v0.0.0-20250718183923-645b1fa84792/go.mod h1:A+z0yzpGtvnG90cToK5n2tu8UJVP2XUATh+r+sfOOOc=
|
||||
golang.org/x/lint v0.0.0-20200302205851-738671d3881b/go.mod h1:3xt1FjdF8hUf6vQPIChWIBhFzV8gjjsPE/fR3IyQdNY=
|
||||
golang.org/x/mod v0.1.1-0.20191105210325-c90efee705ee/go.mod h1:QqPTAvyqsEbceGzBzNggFXnrqF1CaUcvgkdR5Ot7KZg=
|
||||
golang.org/x/mod v0.2.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA=
|
||||
golang.org/x/mod v0.28.0 h1:gQBtGhjxykdjY9YhZpSlZIsbnaE2+PgjfLWUQTnoZ1U=
|
||||
golang.org/x/mod v0.28.0/go.mod h1:yfB/L0NOf/kmEbXjzCPOx1iK1fRutOydrCMsqRhEBxI=
|
||||
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
|
||||
golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20200226121028-0de0cce0169b/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
|
||||
golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU=
|
||||
golang.org/x/net v0.43.0 h1:lat02VYK2j4aLzMzecihNvTlJNQUq316m2Mr9rnM6YE=
|
||||
golang.org/x/net v0.43.0/go.mod h1:vhO1fvI4dGsIjh73sWfUVjj3N7CA9WkKJNQm2svM6Jg=
|
||||
golang.org/x/net v0.46.0 h1:giFlY12I07fugqwPuWJi68oOnpfqFnJIJzaIIm2JVV4=
|
||||
golang.org/x/net v0.46.0/go.mod h1:Q9BGdFy1y4nkUwiLvT5qtyhAnEHgnQ/zd8PfU6nc210=
|
||||
golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20190911185100-cd5d95a43a6e/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM=
|
||||
golang.org/x/sync v0.16.0 h1:ycBJEhp9p4vXvUZNszeOq0kGTPghopOL8q0fq3vstxw=
|
||||
golang.org/x/sync v0.16.0/go.mod h1:1dzgHSNfp02xaA81J2MS99Qcpr2w7fw1gpm99rleRqA=
|
||||
golang.org/x/sync v0.17.0 h1:l60nONMj9l5drqw6jlhIELNv9I0A4OFgRsG9k2oT9Ug=
|
||||
golang.org/x/sync v0.17.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
|
||||
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
|
||||
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
|
||||
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
|
||||
golang.org/x/sys v0.35.0 h1:vz1N37gP5bs89s7He8XuIYXpyY0+QlsKmzipCbUtyxI=
|
||||
golang.org/x/sys v0.35.0/go.mod h1:BJP2sWEmIv4KK5OTEluFJCKSidICx8ciO85XgH3Ak8k=
|
||||
golang.org/x/sys v0.37.0 h1:fdNQudmxPjkdUTPnLn5mdQv7Zwvbvpaxqs831goi9kQ=
|
||||
golang.org/x/sys v0.37.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
|
||||
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
|
||||
golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ=
|
||||
golang.org/x/text v0.28.0 h1:rhazDwis8INMIwQ4tpjLDzUhx6RlXqZNPEM0huQojng=
|
||||
golang.org/x/text v0.28.0/go.mod h1:U8nCwOR8jO/marOQ0QbDiOngZVEBB7MAiitBuMjXiNU=
|
||||
golang.org/x/text v0.30.0 h1:yznKA/E9zq54KzlzBEAWn1NXSQ8DIp/NYMy88xJjl4k=
|
||||
golang.org/x/text v0.30.0/go.mod h1:yDdHFIX9t+tORqspjENWgzaCVXgk0yYnYuSZ8UzzBVM=
|
||||
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
|
||||
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
|
||||
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
|
||||
golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo=
|
||||
golang.org/x/tools v0.0.0-20200130002326-2f3ba24bd6e7/go.mod h1:TB2adYChydJhpapKDTa4BR/hXlZSLoq2Wpct/0txZ28=
|
||||
golang.org/x/tools v0.0.0-20200619180055-7c47624df98f/go.mod h1:EkVYQZoAsY45+roYkvgYkIh4xh/qjgUK9TdY2XT94GE=
|
||||
golang.org/x/tools v0.0.0-20210106214847-113979e3529a/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA=
|
||||
golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/tools v0.37.0 h1:DVSRzp7FwePZW356yEAChSdNcQo6Nsp+fex1SUW09lE=
|
||||
golang.org/x/tools v0.37.0/go.mod h1:MBN5QPQtLMHVdvsbtarmTNukZDdgwdwlO5qGacAzF0w=
|
||||
golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
|
||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
|
||||
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
|
||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+ZbWg+4sHnLp52d5yiIPUxMBSt4X9A=
|
||||
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb/go.mod h1:rpwXGsirqLqN2L0JDJQlwOboGHmptD5ZD6T2VmcqhTw=
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10 h1:3GDAcqdIg1ozBNLgPy4SLT84nfcBjr6rhGtXYtrkWLU=
|
||||
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10/go.mod h1:T97yPqesLiNrOYxkwmhMI0ZIlJDm+p0PMR8eRVeR5tQ=
|
||||
google.golang.org/genproto v0.0.0-20230920204549-e6e6cdab5c13 h1:vlzZttNJGVqTsRFU9AmdnrcO1Znh8Ew9kCD//yjigk0=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237 h1:Kog3KlB4xevJlAcbbbzPfRG0+X9fdoGM+UBRKVz6Wr0=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250519155744-55703ea1f237/go.mod h1:ezi0AVyMKDWy5xAncvjLWH7UcLBB5n7y2fQ8MzjJcto=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237 h1:cJfm9zPbe1e873mHJzmQ1nwVEeRDU/T1wXDK2kUSU34=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250519155744-55703ea1f237/go.mod h1:qQ0YXyHHx3XkvlzUtpXDkS29lDSafHMZBAZDc03LQ3A=
|
||||
google.golang.org/grpc v1.72.1 h1:HR03wO6eyZ7lknl75XlxABNVLLFc2PAb6mHlYh756mA=
|
||||
google.golang.org/grpc v1.72.1/go.mod h1:wH5Aktxcg25y1I3w7H69nHfXdOG3UiadoBtjh3izSDM=
|
||||
google.golang.org/protobuf v1.36.6 h1:z1NpPI8ku2WgiWnf+t9wTPsn6eP1L7ksHUlkfLvd9xY=
|
||||
google.golang.org/protobuf v1.36.6/go.mod h1:jduwjTPXsFjZGTmRluh+L6NjiWu7pchiJ2/5YcXBHnY=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5 h1:BIRfGDEjiHRrk0QKZe3Xv2ieMhtgRGeLcZQ0mIVn4EY=
|
||||
google.golang.org/genproto/googleapis/api v0.0.0-20250825161204-c5933d9347a5/go.mod h1:j3QtIyytwqGr1JUDtYXwtMXWPKsEa5LtzIFN1Wn5WvE=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5 h1:eaY8u2EuxbRv7c3NiGK0/NedzVsCcV6hDuU5qPX5EGE=
|
||||
google.golang.org/genproto/googleapis/rpc v0.0.0-20250825161204-c5933d9347a5/go.mod h1:M4/wBTSeyLxupu3W3tJtOgB14jILAS/XWPSSa3TAlJc=
|
||||
google.golang.org/grpc v1.76.0 h1:UnVkv1+uMLYXoIz6o7chp59WfQUYA2ex/BXQ9rHZu7A=
|
||||
google.golang.org/grpc v1.76.0/go.mod h1:Ju12QI8M6iQJtbcsV+awF5a4hfJMLi4X0JLo94ULZ6c=
|
||||
google.golang.org/protobuf v1.36.8 h1:xHScyCOEuuwZEc6UtSOvPbAT4zRh0xcNRYekJwfqyMc=
|
||||
google.golang.org/protobuf v1.36.8/go.mod h1:fuxRtAxBytpl4zzqUh6/eyUujkJdNiuEkXntxiD/uRU=
|
||||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
|
||||
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
|
||||
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
|
||||
gotest.tools/v3 v3.4.0 h1:ZazjZUfuVeZGLAmlKKuyv3IKP5orXcwtOwDQH6YVr6o=
|
||||
gotest.tools/v3 v3.4.0/go.mod h1:CtbdzLSsqVhDgMtKsx03ird5YTGB3ar27v0u/yKBW5g=
|
||||
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c h1:m/r7OM+Y2Ty1sgBQ7Qb27VgIMBW8ZZhT4gLnUyDIhzI=
|
||||
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c/go.mod h1:3r5CMtNQMKIvBlrmM9xWUNamjKBYPOWyXOjmg5Kts3g=
|
||||
software.sslmate.com/src/go-pkcs12 v0.6.0 h1:f3sQittAeF+pao32Vb+mkli+ZyT+VwKaD014qFGq6oU=
|
||||
|
||||
@@ -76,7 +76,7 @@ type Monitor struct {
|
||||
|
||||
// NewMonitor creates a new health check monitor
|
||||
func NewMonitor(callback StatusChangeCallback, enforceCert bool) *Monitor {
|
||||
logger.Info("Creating new health check monitor with certificate enforcement: %t", enforceCert)
|
||||
logger.Debug("Creating new health check monitor with certificate enforcement: %t", enforceCert)
|
||||
|
||||
// Configure TLS settings based on certificate enforcement
|
||||
transport := &http.Transport{
|
||||
|
||||
80
internal/state/telemetry_view.go
Normal file
80
internal/state/telemetry_view.go
Normal file
@@ -0,0 +1,80 @@
|
||||
package state
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
)
|
||||
|
||||
// TelemetryView is a minimal, thread-safe implementation to feed observables.
|
||||
// Since one Newt process represents one site, we expose a single logical site.
|
||||
// site_id is a resource attribute, so we do not emit per-site labels here.
|
||||
type TelemetryView struct {
|
||||
online atomic.Bool
|
||||
lastHBUnix atomic.Int64 // unix seconds
|
||||
// per-tunnel sessions
|
||||
sessMu sync.RWMutex
|
||||
sessions map[string]*atomic.Int64
|
||||
}
|
||||
|
||||
var (
|
||||
globalView atomic.Pointer[TelemetryView]
|
||||
)
|
||||
|
||||
// Global returns a singleton TelemetryView.
|
||||
func Global() *TelemetryView {
|
||||
if v := globalView.Load(); v != nil { return v }
|
||||
v := &TelemetryView{ sessions: make(map[string]*atomic.Int64) }
|
||||
globalView.Store(v)
|
||||
telemetry.RegisterStateView(v)
|
||||
return v
|
||||
}
|
||||
|
||||
// Instrumentation helpers
|
||||
func (v *TelemetryView) IncSessions(tunnelID string) {
|
||||
v.sessMu.Lock(); defer v.sessMu.Unlock()
|
||||
c := v.sessions[tunnelID]
|
||||
if c == nil { c = &atomic.Int64{}; v.sessions[tunnelID] = c }
|
||||
c.Add(1)
|
||||
}
|
||||
func (v *TelemetryView) DecSessions(tunnelID string) {
|
||||
v.sessMu.Lock(); defer v.sessMu.Unlock()
|
||||
if c := v.sessions[tunnelID]; c != nil {
|
||||
c.Add(-1)
|
||||
if c.Load() <= 0 { delete(v.sessions, tunnelID) }
|
||||
}
|
||||
}
|
||||
func (v *TelemetryView) ClearTunnel(tunnelID string) {
|
||||
v.sessMu.Lock(); defer v.sessMu.Unlock()
|
||||
delete(v.sessions, tunnelID)
|
||||
}
|
||||
func (v *TelemetryView) SetOnline(b bool) { v.online.Store(b) }
|
||||
func (v *TelemetryView) TouchHeartbeat() { v.lastHBUnix.Store(time.Now().Unix()) }
|
||||
|
||||
// --- telemetry.StateView interface ---
|
||||
|
||||
func (v *TelemetryView) ListSites() []string { return []string{"self"} }
|
||||
func (v *TelemetryView) Online(_ string) (bool, bool) { return v.online.Load(), true }
|
||||
func (v *TelemetryView) LastHeartbeat(_ string) (time.Time, bool) {
|
||||
sec := v.lastHBUnix.Load()
|
||||
if sec == 0 { return time.Time{}, false }
|
||||
return time.Unix(sec, 0), true
|
||||
}
|
||||
func (v *TelemetryView) ActiveSessions(_ string) (int64, bool) {
|
||||
// aggregated sessions (not used for per-tunnel gauge)
|
||||
v.sessMu.RLock(); defer v.sessMu.RUnlock()
|
||||
var sum int64
|
||||
for _, c := range v.sessions { if c != nil { sum += c.Load() } }
|
||||
return sum, true
|
||||
}
|
||||
|
||||
// Extended accessor used by telemetry callback to publish per-tunnel samples.
|
||||
func (v *TelemetryView) SessionsByTunnel() map[string]int64 {
|
||||
v.sessMu.RLock(); defer v.sessMu.RUnlock()
|
||||
out := make(map[string]int64, len(v.sessions))
|
||||
for id, c := range v.sessions { if c != nil && c.Load() > 0 { out[id] = c.Load() } }
|
||||
return out
|
||||
}
|
||||
|
||||
19
internal/telemetry/constants.go
Normal file
19
internal/telemetry/constants.go
Normal file
@@ -0,0 +1,19 @@
|
||||
package telemetry
|
||||
|
||||
// Protocol labels (low-cardinality)
|
||||
const (
|
||||
ProtocolTCP = "tcp"
|
||||
ProtocolUDP = "udp"
|
||||
)
|
||||
|
||||
// Reconnect reason bins (fixed, low-cardinality)
|
||||
const (
|
||||
ReasonServerRequest = "server_request"
|
||||
ReasonTimeout = "timeout"
|
||||
ReasonPeerClose = "peer_close"
|
||||
ReasonNetworkChange = "network_change"
|
||||
ReasonAuthError = "auth_error"
|
||||
ReasonHandshakeError = "handshake_error"
|
||||
ReasonConfigChange = "config_change"
|
||||
ReasonError = "error"
|
||||
)
|
||||
32
internal/telemetry/constants_test.go
Normal file
32
internal/telemetry/constants_test.go
Normal file
@@ -0,0 +1,32 @@
|
||||
package telemetry
|
||||
|
||||
import "testing"
|
||||
|
||||
func TestAllowedConstants(t *testing.T) {
|
||||
allowedReasons := map[string]struct{}{
|
||||
ReasonServerRequest: {},
|
||||
ReasonTimeout: {},
|
||||
ReasonPeerClose: {},
|
||||
ReasonNetworkChange: {},
|
||||
ReasonAuthError: {},
|
||||
ReasonHandshakeError: {},
|
||||
ReasonConfigChange: {},
|
||||
ReasonError: {},
|
||||
}
|
||||
for k := range allowedReasons {
|
||||
if k == "" {
|
||||
t.Fatalf("empty reason constant")
|
||||
}
|
||||
}
|
||||
|
||||
allowedProtocols := map[string]struct{}{
|
||||
ProtocolTCP: {},
|
||||
ProtocolUDP: {},
|
||||
}
|
||||
for k := range allowedProtocols {
|
||||
if k == "" {
|
||||
t.Fatalf("empty protocol constant")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
542
internal/telemetry/metrics.go
Normal file
542
internal/telemetry/metrics.go
Normal file
@@ -0,0 +1,542 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
)
|
||||
|
||||
// Instruments and helpers for Newt metrics following the naming, units, and
|
||||
// low-cardinality label guidance from the issue description.
|
||||
//
|
||||
// Counters end with _total, durations are in seconds, sizes in bytes.
|
||||
// Only low-cardinality stable labels are supported: tunnel_id,
|
||||
// transport, direction, result, reason, error_type.
|
||||
var (
|
||||
initOnce sync.Once
|
||||
|
||||
meter metric.Meter
|
||||
|
||||
// Site / Registration
|
||||
mSiteRegistrations metric.Int64Counter
|
||||
mSiteOnline metric.Int64ObservableGauge
|
||||
mSiteLastHeartbeat metric.Float64ObservableGauge
|
||||
|
||||
// Tunnel / Sessions
|
||||
mTunnelSessions metric.Int64ObservableGauge
|
||||
mTunnelBytes metric.Int64Counter
|
||||
mTunnelLatency metric.Float64Histogram
|
||||
mReconnects metric.Int64Counter
|
||||
|
||||
// Connection / NAT
|
||||
mConnAttempts metric.Int64Counter
|
||||
mConnErrors metric.Int64Counter
|
||||
|
||||
// Config/Restart
|
||||
mConfigReloads metric.Int64Counter
|
||||
mConfigApply metric.Float64Histogram
|
||||
mCertRotationTotal metric.Int64Counter
|
||||
mProcessStartTime metric.Float64ObservableGauge
|
||||
|
||||
// Build info
|
||||
mBuildInfo metric.Int64ObservableGauge
|
||||
|
||||
// WebSocket
|
||||
mWSConnectLatency metric.Float64Histogram
|
||||
mWSMessages metric.Int64Counter
|
||||
mWSDisconnects metric.Int64Counter
|
||||
mWSKeepaliveFailure metric.Int64Counter
|
||||
mWSSessionDuration metric.Float64Histogram
|
||||
mWSConnected metric.Int64ObservableGauge
|
||||
mWSReconnects metric.Int64Counter
|
||||
|
||||
// Proxy
|
||||
mProxyActiveConns metric.Int64ObservableGauge
|
||||
mProxyBufferBytes metric.Int64ObservableGauge
|
||||
mProxyAsyncBacklogByte metric.Int64ObservableGauge
|
||||
mProxyDropsTotal metric.Int64Counter
|
||||
mProxyAcceptsTotal metric.Int64Counter
|
||||
mProxyConnDuration metric.Float64Histogram
|
||||
mProxyConnectionsTotal metric.Int64Counter
|
||||
|
||||
buildVersion string
|
||||
buildCommit string
|
||||
processStartUnix = float64(time.Now().UnixNano()) / 1e9
|
||||
wsConnectedState atomic.Int64
|
||||
)
|
||||
|
||||
// Proxy connection lifecycle events.
|
||||
const (
|
||||
ProxyConnectionOpened = "opened"
|
||||
ProxyConnectionClosed = "closed"
|
||||
)
|
||||
|
||||
// attrsWithSite appends site/region labels only when explicitly enabled to keep
|
||||
// label cardinality low by default.
|
||||
func attrsWithSite(extra ...attribute.KeyValue) []attribute.KeyValue {
|
||||
attrs := make([]attribute.KeyValue, len(extra))
|
||||
copy(attrs, extra)
|
||||
if ShouldIncludeSiteLabels() {
|
||||
attrs = append(attrs, siteAttrs()...)
|
||||
}
|
||||
return attrs
|
||||
}
|
||||
|
||||
func registerInstruments() error {
|
||||
var err error
|
||||
initOnce.Do(func() {
|
||||
meter = otel.Meter("newt")
|
||||
if e := registerSiteInstruments(); e != nil {
|
||||
err = e
|
||||
return
|
||||
}
|
||||
if e := registerTunnelInstruments(); e != nil {
|
||||
err = e
|
||||
return
|
||||
}
|
||||
if e := registerConnInstruments(); e != nil {
|
||||
err = e
|
||||
return
|
||||
}
|
||||
if e := registerConfigInstruments(); e != nil {
|
||||
err = e
|
||||
return
|
||||
}
|
||||
if e := registerBuildWSProxyInstruments(); e != nil {
|
||||
err = e
|
||||
return
|
||||
}
|
||||
})
|
||||
return err
|
||||
}
|
||||
|
||||
func registerSiteInstruments() error {
|
||||
var err error
|
||||
mSiteRegistrations, err = meter.Int64Counter("newt_site_registrations_total",
|
||||
metric.WithDescription("Total site registration attempts"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mSiteOnline, err = meter.Int64ObservableGauge("newt_site_online",
|
||||
metric.WithDescription("Site online (0/1)"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mSiteLastHeartbeat, err = meter.Float64ObservableGauge("newt_site_last_heartbeat_timestamp_seconds",
|
||||
metric.WithDescription("Unix timestamp of the last site heartbeat"),
|
||||
metric.WithUnit("s"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func registerTunnelInstruments() error {
|
||||
var err error
|
||||
mTunnelSessions, err = meter.Int64ObservableGauge("newt_tunnel_sessions",
|
||||
metric.WithDescription("Active tunnel sessions"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mTunnelBytes, err = meter.Int64Counter("newt_tunnel_bytes_total",
|
||||
metric.WithDescription("Tunnel bytes ingress/egress"),
|
||||
metric.WithUnit("By"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mTunnelLatency, err = meter.Float64Histogram("newt_tunnel_latency_seconds",
|
||||
metric.WithDescription("Per-tunnel latency in seconds"),
|
||||
metric.WithUnit("s"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mReconnects, err = meter.Int64Counter("newt_tunnel_reconnects_total",
|
||||
metric.WithDescription("Tunnel reconnect events"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func registerConnInstruments() error {
|
||||
var err error
|
||||
mConnAttempts, err = meter.Int64Counter("newt_connection_attempts_total",
|
||||
metric.WithDescription("Connection attempts"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
mConnErrors, err = meter.Int64Counter("newt_connection_errors_total",
|
||||
metric.WithDescription("Connection errors by type"))
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func registerConfigInstruments() error {
|
||||
mConfigReloads, _ = meter.Int64Counter("newt_config_reloads_total",
|
||||
metric.WithDescription("Configuration reloads"))
|
||||
mConfigApply, _ = meter.Float64Histogram("newt_config_apply_seconds",
|
||||
metric.WithDescription("Configuration apply duration in seconds"),
|
||||
metric.WithUnit("s"))
|
||||
mCertRotationTotal, _ = meter.Int64Counter("newt_cert_rotation_total",
|
||||
metric.WithDescription("Certificate rotation events (success/failure)"))
|
||||
mProcessStartTime, _ = meter.Float64ObservableGauge("process_start_time_seconds",
|
||||
metric.WithDescription("Unix timestamp of the process start time"),
|
||||
metric.WithUnit("s"))
|
||||
if mProcessStartTime != nil {
|
||||
if _, err := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
o.ObserveFloat64(mProcessStartTime, processStartUnix)
|
||||
return nil
|
||||
}, mProcessStartTime); err != nil {
|
||||
otel.Handle(err)
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
func registerBuildWSProxyInstruments() error {
|
||||
// Build info gauge (value 1 with version/commit attributes)
|
||||
mBuildInfo, _ = meter.Int64ObservableGauge("newt_build_info",
|
||||
metric.WithDescription("Newt build information (value is always 1)"))
|
||||
// WebSocket
|
||||
mWSConnectLatency, _ = meter.Float64Histogram("newt_websocket_connect_latency_seconds",
|
||||
metric.WithDescription("WebSocket connect latency in seconds"),
|
||||
metric.WithUnit("s"))
|
||||
mWSMessages, _ = meter.Int64Counter("newt_websocket_messages_total",
|
||||
metric.WithDescription("WebSocket messages by direction and type"))
|
||||
mWSDisconnects, _ = meter.Int64Counter("newt_websocket_disconnects_total",
|
||||
metric.WithDescription("WebSocket disconnects by reason/result"))
|
||||
mWSKeepaliveFailure, _ = meter.Int64Counter("newt_websocket_keepalive_failures_total",
|
||||
metric.WithDescription("WebSocket keepalive (ping/pong) failures"))
|
||||
mWSSessionDuration, _ = meter.Float64Histogram("newt_websocket_session_duration_seconds",
|
||||
metric.WithDescription("Duration of established WebSocket sessions"),
|
||||
metric.WithUnit("s"))
|
||||
mWSConnected, _ = meter.Int64ObservableGauge("newt_websocket_connected",
|
||||
metric.WithDescription("WebSocket connection state (1=connected, 0=disconnected)"))
|
||||
mWSReconnects, _ = meter.Int64Counter("newt_websocket_reconnects_total",
|
||||
metric.WithDescription("WebSocket reconnect attempts by reason"))
|
||||
// Proxy
|
||||
mProxyActiveConns, _ = meter.Int64ObservableGauge("newt_proxy_active_connections",
|
||||
metric.WithDescription("Proxy active connections per tunnel and protocol"))
|
||||
mProxyBufferBytes, _ = meter.Int64ObservableGauge("newt_proxy_buffer_bytes",
|
||||
metric.WithDescription("Proxy buffer bytes (may approximate async backlog)"),
|
||||
metric.WithUnit("By"))
|
||||
mProxyAsyncBacklogByte, _ = meter.Int64ObservableGauge("newt_proxy_async_backlog_bytes",
|
||||
metric.WithDescription("Unflushed async byte backlog per tunnel and protocol"),
|
||||
metric.WithUnit("By"))
|
||||
mProxyDropsTotal, _ = meter.Int64Counter("newt_proxy_drops_total",
|
||||
metric.WithDescription("Proxy drops due to write errors"))
|
||||
mProxyAcceptsTotal, _ = meter.Int64Counter("newt_proxy_accept_total",
|
||||
metric.WithDescription("Proxy connection accepts by protocol and result"))
|
||||
mProxyConnDuration, _ = meter.Float64Histogram("newt_proxy_connection_duration_seconds",
|
||||
metric.WithDescription("Duration of completed proxy connections"),
|
||||
metric.WithUnit("s"))
|
||||
mProxyConnectionsTotal, _ = meter.Int64Counter("newt_proxy_connections_total",
|
||||
metric.WithDescription("Proxy connection lifecycle events by protocol"))
|
||||
// Register a default callback for build info if version/commit set
|
||||
reg, e := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
if buildVersion == "" && buildCommit == "" {
|
||||
return nil
|
||||
}
|
||||
attrs := []attribute.KeyValue{}
|
||||
if buildVersion != "" {
|
||||
attrs = append(attrs, attribute.String("version", buildVersion))
|
||||
}
|
||||
if buildCommit != "" {
|
||||
attrs = append(attrs, attribute.String("commit", buildCommit))
|
||||
}
|
||||
if ShouldIncludeSiteLabels() {
|
||||
attrs = append(attrs, siteAttrs()...)
|
||||
}
|
||||
o.ObserveInt64(mBuildInfo, 1, metric.WithAttributes(attrs...))
|
||||
return nil
|
||||
}, mBuildInfo)
|
||||
if e != nil {
|
||||
otel.Handle(e)
|
||||
} else {
|
||||
// Provide a functional stopper that unregisters the callback
|
||||
obsStopper = func() { _ = reg.Unregister() }
|
||||
}
|
||||
if mWSConnected != nil {
|
||||
if regConn, err := meter.RegisterCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
val := wsConnectedState.Load()
|
||||
o.ObserveInt64(mWSConnected, val, metric.WithAttributes(attrsWithSite()...))
|
||||
return nil
|
||||
}, mWSConnected); err != nil {
|
||||
otel.Handle(err)
|
||||
} else {
|
||||
wsConnStopper = func() { _ = regConn.Unregister() }
|
||||
}
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Observable registration: Newt can register a callback to report gauges.
|
||||
// Call SetObservableCallback once to start observing online status, last
|
||||
// heartbeat seconds, and active sessions.
|
||||
|
||||
var (
|
||||
obsOnce sync.Once
|
||||
obsStopper func()
|
||||
proxyObsOnce sync.Once
|
||||
proxyStopper func()
|
||||
wsConnStopper func()
|
||||
)
|
||||
|
||||
// SetObservableCallback registers a single callback that will be invoked
|
||||
// on collection. Use the provided observer to emit values for the observable
|
||||
// gauges defined here.
|
||||
//
|
||||
// Example inside your code (where you have access to current state):
|
||||
//
|
||||
// telemetry.SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
// o.ObserveInt64(mSiteOnline, 1)
|
||||
// o.ObserveFloat64(mSiteLastHeartbeat, float64(lastHB.Unix()))
|
||||
// o.ObserveInt64(mTunnelSessions, int64(len(activeSessions)))
|
||||
// return nil
|
||||
// })
|
||||
func SetObservableCallback(cb func(context.Context, metric.Observer) error) {
|
||||
obsOnce.Do(func() {
|
||||
reg, e := meter.RegisterCallback(cb, mSiteOnline, mSiteLastHeartbeat, mTunnelSessions)
|
||||
if e != nil {
|
||||
otel.Handle(e)
|
||||
obsStopper = func() {
|
||||
// no-op: registration failed; keep stopper callable
|
||||
}
|
||||
return
|
||||
}
|
||||
// Provide a functional stopper mirroring proxy/build-info behavior
|
||||
obsStopper = func() { _ = reg.Unregister() }
|
||||
})
|
||||
}
|
||||
|
||||
// SetProxyObservableCallback registers a callback to observe proxy gauges.
|
||||
func SetProxyObservableCallback(cb func(context.Context, metric.Observer) error) {
|
||||
proxyObsOnce.Do(func() {
|
||||
reg, e := meter.RegisterCallback(cb, mProxyActiveConns, mProxyBufferBytes, mProxyAsyncBacklogByte)
|
||||
if e != nil {
|
||||
otel.Handle(e)
|
||||
proxyStopper = func() {
|
||||
// no-op: registration failed; keep stopper callable
|
||||
}
|
||||
return
|
||||
}
|
||||
// Provide a functional stopper to unregister later if needed
|
||||
proxyStopper = func() { _ = reg.Unregister() }
|
||||
})
|
||||
}
|
||||
|
||||
// Build info registration
|
||||
func RegisterBuildInfo(version, commit string) {
|
||||
buildVersion = version
|
||||
buildCommit = commit
|
||||
}
|
||||
|
||||
// Config reloads
|
||||
func IncConfigReload(ctx context.Context, result string) {
|
||||
mConfigReloads.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("result", result),
|
||||
)...))
|
||||
}
|
||||
|
||||
// Helpers for counters/histograms
|
||||
|
||||
func IncSiteRegistration(ctx context.Context, result string) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("result", result),
|
||||
}
|
||||
mSiteRegistrations.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
func AddTunnelBytes(ctx context.Context, tunnelID, direction string, n int64) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("direction", direction),
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mTunnelBytes.Add(ctx, n, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
// AddTunnelBytesSet adds bytes using a pre-built attribute.Set to avoid per-call allocations.
|
||||
func AddTunnelBytesSet(ctx context.Context, n int64, attrs attribute.Set) {
|
||||
mTunnelBytes.Add(ctx, n, metric.WithAttributeSet(attrs))
|
||||
}
|
||||
|
||||
// --- WebSocket helpers ---
|
||||
|
||||
func ObserveWSConnectLatency(ctx context.Context, seconds float64, result, errorType string) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("transport", "websocket"),
|
||||
attribute.String("result", result),
|
||||
}
|
||||
if errorType != "" {
|
||||
attrs = append(attrs, attribute.String("error_type", errorType))
|
||||
}
|
||||
mWSConnectLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
func IncWSMessage(ctx context.Context, direction, msgType string) {
|
||||
mWSMessages.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("direction", direction),
|
||||
attribute.String("msg_type", msgType),
|
||||
)...))
|
||||
}
|
||||
|
||||
func IncWSDisconnect(ctx context.Context, reason, result string) {
|
||||
mWSDisconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("reason", reason),
|
||||
attribute.String("result", result),
|
||||
)...))
|
||||
}
|
||||
|
||||
func IncWSKeepaliveFailure(ctx context.Context, reason string) {
|
||||
mWSKeepaliveFailure.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("reason", reason),
|
||||
)...))
|
||||
}
|
||||
|
||||
// SetWSConnectionState updates the backing gauge for the WebSocket connected state.
|
||||
func SetWSConnectionState(connected bool) {
|
||||
if connected {
|
||||
wsConnectedState.Store(1)
|
||||
} else {
|
||||
wsConnectedState.Store(0)
|
||||
}
|
||||
}
|
||||
|
||||
// IncWSReconnect increments the WebSocket reconnect counter with a bounded reason label.
|
||||
func IncWSReconnect(ctx context.Context, reason string) {
|
||||
if reason == "" {
|
||||
reason = "unknown"
|
||||
}
|
||||
mWSReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("reason", reason),
|
||||
)...))
|
||||
}
|
||||
|
||||
func ObserveWSSessionDuration(ctx context.Context, seconds float64, result string) {
|
||||
mWSSessionDuration.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("result", result),
|
||||
)...))
|
||||
}
|
||||
|
||||
// --- Proxy helpers ---
|
||||
|
||||
func ObserveProxyActiveConnsObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
|
||||
o.ObserveInt64(mProxyActiveConns, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
func ObserveProxyBufferBytesObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
|
||||
o.ObserveInt64(mProxyBufferBytes, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
func ObserveProxyAsyncBacklogObs(o metric.Observer, value int64, attrs []attribute.KeyValue) {
|
||||
o.ObserveInt64(mProxyAsyncBacklogByte, value, metric.WithAttributes(attrs...))
|
||||
}
|
||||
|
||||
func IncProxyDrops(ctx context.Context, tunnelID, protocol string) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("protocol", protocol),
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mProxyDropsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
func IncProxyAccept(ctx context.Context, tunnelID, protocol, result, reason string) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("protocol", protocol),
|
||||
attribute.String("result", result),
|
||||
}
|
||||
if reason != "" {
|
||||
attrs = append(attrs, attribute.String("reason", reason))
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mProxyAcceptsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
func ObserveProxyConnectionDuration(ctx context.Context, tunnelID, protocol, result string, seconds float64) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("protocol", protocol),
|
||||
attribute.String("result", result),
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mProxyConnDuration.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
// IncProxyConnectionEvent records proxy connection lifecycle events (opened/closed).
|
||||
func IncProxyConnectionEvent(ctx context.Context, tunnelID, protocol, event string) {
|
||||
if event == "" {
|
||||
event = "unknown"
|
||||
}
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("protocol", protocol),
|
||||
attribute.String("event", event),
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mProxyConnectionsTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
// --- Config/PKI helpers ---
|
||||
|
||||
func ObserveConfigApply(ctx context.Context, phase, result string, seconds float64) {
|
||||
mConfigApply.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("phase", phase),
|
||||
attribute.String("result", result),
|
||||
)...))
|
||||
}
|
||||
|
||||
func IncCertRotation(ctx context.Context, result string) {
|
||||
mCertRotationTotal.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("result", result),
|
||||
)...))
|
||||
}
|
||||
|
||||
func ObserveTunnelLatency(ctx context.Context, tunnelID, transport string, seconds float64) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("transport", transport),
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mTunnelLatency.Record(ctx, seconds, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
func IncReconnect(ctx context.Context, tunnelID, initiator, reason string) {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("initiator", initiator),
|
||||
attribute.String("reason", reason),
|
||||
}
|
||||
if ShouldIncludeTunnelID() && tunnelID != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tunnelID))
|
||||
}
|
||||
mReconnects.Add(ctx, 1, metric.WithAttributes(attrsWithSite(attrs...)...))
|
||||
}
|
||||
|
||||
func IncConnAttempt(ctx context.Context, transport, result string) {
|
||||
mConnAttempts.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("transport", transport),
|
||||
attribute.String("result", result),
|
||||
)...))
|
||||
}
|
||||
|
||||
func IncConnError(ctx context.Context, transport, typ string) {
|
||||
mConnErrors.Add(ctx, 1, metric.WithAttributes(attrsWithSite(
|
||||
attribute.String("transport", transport),
|
||||
attribute.String("error_type", typ),
|
||||
)...))
|
||||
}
|
||||
59
internal/telemetry/metrics_test_helper.go
Normal file
59
internal/telemetry/metrics_test_helper.go
Normal file
@@ -0,0 +1,59 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"sync"
|
||||
"time"
|
||||
)
|
||||
|
||||
func resetMetricsForTest() {
|
||||
initOnce = sync.Once{}
|
||||
obsOnce = sync.Once{}
|
||||
proxyObsOnce = sync.Once{}
|
||||
obsStopper = nil
|
||||
proxyStopper = nil
|
||||
if wsConnStopper != nil {
|
||||
wsConnStopper()
|
||||
}
|
||||
wsConnStopper = nil
|
||||
meter = nil
|
||||
|
||||
mSiteRegistrations = nil
|
||||
mSiteOnline = nil
|
||||
mSiteLastHeartbeat = nil
|
||||
|
||||
mTunnelSessions = nil
|
||||
mTunnelBytes = nil
|
||||
mTunnelLatency = nil
|
||||
mReconnects = nil
|
||||
|
||||
mConnAttempts = nil
|
||||
mConnErrors = nil
|
||||
|
||||
mConfigReloads = nil
|
||||
mConfigApply = nil
|
||||
mCertRotationTotal = nil
|
||||
mProcessStartTime = nil
|
||||
|
||||
mBuildInfo = nil
|
||||
|
||||
mWSConnectLatency = nil
|
||||
mWSMessages = nil
|
||||
mWSDisconnects = nil
|
||||
mWSKeepaliveFailure = nil
|
||||
mWSSessionDuration = nil
|
||||
mWSConnected = nil
|
||||
mWSReconnects = nil
|
||||
|
||||
mProxyActiveConns = nil
|
||||
mProxyBufferBytes = nil
|
||||
mProxyAsyncBacklogByte = nil
|
||||
mProxyDropsTotal = nil
|
||||
mProxyAcceptsTotal = nil
|
||||
mProxyConnDuration = nil
|
||||
mProxyConnectionsTotal = nil
|
||||
|
||||
processStartUnix = float64(time.Now().UnixNano()) / 1e9
|
||||
wsConnectedState.Store(0)
|
||||
includeTunnelIDVal.Store(false)
|
||||
includeSiteLabelVal.Store(false)
|
||||
}
|
||||
106
internal/telemetry/state_view.go
Normal file
106
internal/telemetry/state_view.go
Normal file
@@ -0,0 +1,106 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
)
|
||||
|
||||
// StateView provides a read-only view for observable gauges.
|
||||
// Implementations must be concurrency-safe and avoid blocking operations.
|
||||
// All methods should be fast and use RLocks where applicable.
|
||||
type StateView interface {
|
||||
// ListSites returns a stable, low-cardinality list of site IDs to expose.
|
||||
ListSites() []string
|
||||
// Online returns whether the site is online.
|
||||
Online(siteID string) (online bool, ok bool)
|
||||
// LastHeartbeat returns the last heartbeat time for a site.
|
||||
LastHeartbeat(siteID string) (t time.Time, ok bool)
|
||||
// ActiveSessions returns the current number of active sessions for a site (across tunnels),
|
||||
// or scoped to site if your model is site-scoped.
|
||||
ActiveSessions(siteID string) (n int64, ok bool)
|
||||
}
|
||||
|
||||
var (
|
||||
stateView atomic.Value // of type StateView
|
||||
)
|
||||
|
||||
// RegisterStateView sets the global StateView used by the default observable callback.
|
||||
func RegisterStateView(v StateView) {
|
||||
stateView.Store(v)
|
||||
// If instruments are registered, ensure a callback exists.
|
||||
if v != nil {
|
||||
SetObservableCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
if any := stateView.Load(); any != nil {
|
||||
if sv, ok := any.(StateView); ok {
|
||||
for _, siteID := range sv.ListSites() {
|
||||
observeSiteOnlineFor(o, sv, siteID)
|
||||
observeLastHeartbeatFor(o, sv, siteID)
|
||||
observeSessionsFor(o, siteID, sv)
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
func observeSiteOnlineFor(o metric.Observer, sv StateView, siteID string) {
|
||||
if online, ok := sv.Online(siteID); ok {
|
||||
val := int64(0)
|
||||
if online {
|
||||
val = 1
|
||||
}
|
||||
o.ObserveInt64(mSiteOnline, val, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
func observeLastHeartbeatFor(o metric.Observer, sv StateView, siteID string) {
|
||||
if t, ok := sv.LastHeartbeat(siteID); ok {
|
||||
ts := float64(t.UnixNano()) / 1e9
|
||||
o.ObserveFloat64(mSiteLastHeartbeat, ts, metric.WithAttributes(
|
||||
attribute.String("site_id", siteID),
|
||||
))
|
||||
}
|
||||
}
|
||||
|
||||
func observeSessionsFor(o metric.Observer, siteID string, any interface{}) {
|
||||
if tm, ok := any.(interface{ SessionsByTunnel() map[string]int64 }); ok {
|
||||
sessions := tm.SessionsByTunnel()
|
||||
// If tunnel_id labels are enabled, preserve existing per-tunnel observations
|
||||
if ShouldIncludeTunnelID() {
|
||||
for tid, n := range sessions {
|
||||
attrs := []attribute.KeyValue{
|
||||
attribute.String("site_id", siteID),
|
||||
}
|
||||
if tid != "" {
|
||||
attrs = append(attrs, attribute.String("tunnel_id", tid))
|
||||
}
|
||||
o.ObserveInt64(mTunnelSessions, n, metric.WithAttributes(attrs...))
|
||||
}
|
||||
return
|
||||
}
|
||||
// When tunnel_id is disabled, collapse per-tunnel counts into a single site-level value
|
||||
var total int64
|
||||
for _, n := range sessions {
|
||||
total += n
|
||||
}
|
||||
// If there are no per-tunnel entries, fall back to ActiveSessions() if available
|
||||
if total == 0 {
|
||||
if svAny := stateView.Load(); svAny != nil {
|
||||
if sv, ok := svAny.(StateView); ok {
|
||||
if n, ok2 := sv.ActiveSessions(siteID); ok2 {
|
||||
total = n
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
o.ObserveInt64(mTunnelSessions, total, metric.WithAttributes(attribute.String("site_id", siteID)))
|
||||
return
|
||||
}
|
||||
}
|
||||
384
internal/telemetry/telemetry.go
Normal file
384
internal/telemetry/telemetry.go
Normal file
@@ -0,0 +1,384 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"net/http"
|
||||
"os"
|
||||
"strings"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
promclient "github.com/prometheus/client_golang/prometheus"
|
||||
"github.com/prometheus/client_golang/prometheus/promhttp"
|
||||
"go.opentelemetry.io/contrib/instrumentation/runtime"
|
||||
"go.opentelemetry.io/otel"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlpmetric/otlpmetricgrpc"
|
||||
"go.opentelemetry.io/otel/exporters/otlp/otlptrace/otlptracegrpc"
|
||||
"go.opentelemetry.io/otel/exporters/prometheus"
|
||||
"go.opentelemetry.io/otel/sdk/metric"
|
||||
"go.opentelemetry.io/otel/sdk/resource"
|
||||
"go.opentelemetry.io/otel/sdk/trace"
|
||||
semconv "go.opentelemetry.io/otel/semconv/v1.26.0"
|
||||
"google.golang.org/grpc/credentials"
|
||||
)
|
||||
|
||||
// Config controls telemetry initialization via env flags.
|
||||
//
|
||||
// Defaults align with the issue requirements:
|
||||
// - Prometheus exporter enabled by default (/metrics)
|
||||
// - OTLP exporter disabled by default
|
||||
// - Durations in seconds, bytes in raw bytes
|
||||
// - Admin HTTP server address configurable (for mounting /metrics)
|
||||
type Config struct {
|
||||
ServiceName string
|
||||
ServiceVersion string
|
||||
|
||||
// Optional resource attributes
|
||||
SiteID string
|
||||
Region string
|
||||
|
||||
PromEnabled bool
|
||||
OTLPEnabled bool
|
||||
|
||||
OTLPEndpoint string // host:port
|
||||
OTLPInsecure bool
|
||||
|
||||
MetricExportInterval time.Duration
|
||||
AdminAddr string // e.g.: ":2112"
|
||||
|
||||
// Optional build info for newt_build_info metric
|
||||
BuildVersion string
|
||||
BuildCommit string
|
||||
}
|
||||
|
||||
// FromEnv reads configuration from environment variables.
|
||||
//
|
||||
// NEWT_METRICS_PROMETHEUS_ENABLED (default: true)
|
||||
// NEWT_METRICS_OTLP_ENABLED (default: false)
|
||||
// OTEL_EXPORTER_OTLP_ENDPOINT (default: "localhost:4317")
|
||||
// OTEL_EXPORTER_OTLP_INSECURE (default: true)
|
||||
// OTEL_METRIC_EXPORT_INTERVAL (default: 15s)
|
||||
// OTEL_SERVICE_NAME (default: "newt")
|
||||
// OTEL_SERVICE_VERSION (default: "")
|
||||
// NEWT_ADMIN_ADDR (default: ":2112")
|
||||
func FromEnv() Config {
|
||||
// Prefer explicit NEWT_* env vars, then fall back to OTEL_RESOURCE_ATTRIBUTES
|
||||
site := getenv("NEWT_SITE_ID", "")
|
||||
if site == "" {
|
||||
site = getenv("NEWT_ID", "")
|
||||
}
|
||||
region := os.Getenv("NEWT_REGION")
|
||||
if site == "" || region == "" {
|
||||
if ra := os.Getenv("OTEL_RESOURCE_ATTRIBUTES"); ra != "" {
|
||||
m := parseResourceAttributes(ra)
|
||||
if site == "" {
|
||||
site = m["site_id"]
|
||||
}
|
||||
if region == "" {
|
||||
region = m["region"]
|
||||
}
|
||||
}
|
||||
}
|
||||
return Config{
|
||||
ServiceName: getenv("OTEL_SERVICE_NAME", "newt"),
|
||||
ServiceVersion: os.Getenv("OTEL_SERVICE_VERSION"),
|
||||
SiteID: site,
|
||||
Region: region,
|
||||
PromEnabled: getenv("NEWT_METRICS_PROMETHEUS_ENABLED", "true") == "true",
|
||||
OTLPEnabled: getenv("NEWT_METRICS_OTLP_ENABLED", "false") == "true",
|
||||
OTLPEndpoint: getenv("OTEL_EXPORTER_OTLP_ENDPOINT", "localhost:4317"),
|
||||
OTLPInsecure: getenv("OTEL_EXPORTER_OTLP_INSECURE", "true") == "true",
|
||||
MetricExportInterval: getdur("OTEL_METRIC_EXPORT_INTERVAL", 15*time.Second),
|
||||
AdminAddr: getenv("NEWT_ADMIN_ADDR", ":2112"),
|
||||
}
|
||||
}
|
||||
|
||||
// Setup holds initialized telemetry providers and (optionally) a /metrics handler.
|
||||
// Call Shutdown when the process terminates to flush exporters.
|
||||
type Setup struct {
|
||||
MeterProvider *metric.MeterProvider
|
||||
TracerProvider *trace.TracerProvider
|
||||
|
||||
PrometheusHandler http.Handler // nil if Prometheus exporter disabled
|
||||
|
||||
shutdowns []func(context.Context) error
|
||||
}
|
||||
|
||||
// Init configures OpenTelemetry metrics and (optionally) tracing.
|
||||
//
|
||||
// It sets a global MeterProvider and TracerProvider, registers runtime instrumentation,
|
||||
// installs recommended histogram views for *_latency_seconds, and returns a Setup with
|
||||
// a Shutdown method to flush exporters.
|
||||
func Init(ctx context.Context, cfg Config) (*Setup, error) {
|
||||
// Configure tunnel_id label inclusion from env (default true)
|
||||
if getenv("NEWT_METRICS_INCLUDE_TUNNEL_ID", "true") == "true" {
|
||||
includeTunnelIDVal.Store(true)
|
||||
} else {
|
||||
includeTunnelIDVal.Store(false)
|
||||
}
|
||||
if getenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true") == "true" {
|
||||
includeSiteLabelVal.Store(true)
|
||||
} else {
|
||||
includeSiteLabelVal.Store(false)
|
||||
}
|
||||
res := buildResource(ctx, cfg)
|
||||
UpdateSiteInfo(cfg.SiteID, cfg.Region)
|
||||
|
||||
s := &Setup{}
|
||||
readers, promHandler, shutdowns, err := setupMetricExport(ctx, cfg, res)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
s.PrometheusHandler = promHandler
|
||||
// Build provider
|
||||
mp := buildMeterProvider(res, readers)
|
||||
otel.SetMeterProvider(mp)
|
||||
s.MeterProvider = mp
|
||||
s.shutdowns = append(s.shutdowns, mp.Shutdown)
|
||||
// Optional tracing
|
||||
if cfg.OTLPEnabled {
|
||||
if tp, shutdown := setupTracing(ctx, cfg, res); tp != nil {
|
||||
otel.SetTracerProvider(tp)
|
||||
s.TracerProvider = tp
|
||||
s.shutdowns = append(s.shutdowns, func(c context.Context) error {
|
||||
return errors.Join(shutdown(c), tp.Shutdown(c))
|
||||
})
|
||||
}
|
||||
}
|
||||
// Add metric exporter shutdowns
|
||||
s.shutdowns = append(s.shutdowns, shutdowns...)
|
||||
// Runtime metrics
|
||||
_ = runtime.Start(runtime.WithMeterProvider(mp))
|
||||
// Instruments
|
||||
if err := registerInstruments(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if cfg.BuildVersion != "" || cfg.BuildCommit != "" {
|
||||
RegisterBuildInfo(cfg.BuildVersion, cfg.BuildCommit)
|
||||
}
|
||||
return s, nil
|
||||
}
|
||||
|
||||
func buildResource(ctx context.Context, cfg Config) *resource.Resource {
|
||||
attrs := []attribute.KeyValue{
|
||||
semconv.ServiceName(cfg.ServiceName),
|
||||
semconv.ServiceVersion(cfg.ServiceVersion),
|
||||
}
|
||||
if cfg.SiteID != "" {
|
||||
attrs = append(attrs, attribute.String("site_id", cfg.SiteID))
|
||||
}
|
||||
if cfg.Region != "" {
|
||||
attrs = append(attrs, attribute.String("region", cfg.Region))
|
||||
}
|
||||
res, _ := resource.New(ctx, resource.WithFromEnv(), resource.WithHost(), resource.WithAttributes(attrs...))
|
||||
return res
|
||||
}
|
||||
|
||||
func setupMetricExport(ctx context.Context, cfg Config, _ *resource.Resource) ([]metric.Reader, http.Handler, []func(context.Context) error, error) {
|
||||
var readers []metric.Reader
|
||||
var shutdowns []func(context.Context) error
|
||||
var promHandler http.Handler
|
||||
if cfg.PromEnabled {
|
||||
reg := promclient.NewRegistry()
|
||||
exp, err := prometheus.New(prometheus.WithRegisterer(reg))
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
readers = append(readers, exp)
|
||||
promHandler = promhttp.HandlerFor(reg, promhttp.HandlerOpts{})
|
||||
}
|
||||
if cfg.OTLPEnabled {
|
||||
mopts := []otlpmetricgrpc.Option{otlpmetricgrpc.WithEndpoint(cfg.OTLPEndpoint)}
|
||||
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
|
||||
mopts = append(mopts, otlpmetricgrpc.WithHeaders(hdrs))
|
||||
}
|
||||
if cfg.OTLPInsecure {
|
||||
mopts = append(mopts, otlpmetricgrpc.WithInsecure())
|
||||
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
|
||||
if creds, cerr := credentials.NewClientTLSFromFile(certFile, ""); cerr == nil {
|
||||
mopts = append(mopts, otlpmetricgrpc.WithTLSCredentials(creds))
|
||||
}
|
||||
}
|
||||
mexp, err := otlpmetricgrpc.New(ctx, mopts...)
|
||||
if err != nil {
|
||||
return nil, nil, nil, err
|
||||
}
|
||||
readers = append(readers, metric.NewPeriodicReader(mexp, metric.WithInterval(cfg.MetricExportInterval)))
|
||||
shutdowns = append(shutdowns, mexp.Shutdown)
|
||||
}
|
||||
return readers, promHandler, shutdowns, nil
|
||||
}
|
||||
|
||||
func buildMeterProvider(res *resource.Resource, readers []metric.Reader) *metric.MeterProvider {
|
||||
var mpOpts []metric.Option
|
||||
mpOpts = append(mpOpts, metric.WithResource(res))
|
||||
for _, r := range readers {
|
||||
mpOpts = append(mpOpts, metric.WithReader(r))
|
||||
}
|
||||
mpOpts = append(mpOpts, metric.WithView(metric.NewView(
|
||||
metric.Instrument{Name: "newt_*_latency_seconds"},
|
||||
metric.Stream{Aggregation: metric.AggregationExplicitBucketHistogram{Boundaries: []float64{0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10, 30}}},
|
||||
)))
|
||||
mpOpts = append(mpOpts, metric.WithView(metric.NewView(
|
||||
metric.Instrument{Name: "newt_*"},
|
||||
metric.Stream{AttributeFilter: func(kv attribute.KeyValue) bool {
|
||||
k := string(kv.Key)
|
||||
switch k {
|
||||
case "tunnel_id", "transport", "direction", "protocol", "result", "reason", "initiator", "error_type", "msg_type", "phase", "version", "commit", "site_id", "region":
|
||||
return true
|
||||
default:
|
||||
return false
|
||||
}
|
||||
}},
|
||||
)))
|
||||
return metric.NewMeterProvider(mpOpts...)
|
||||
}
|
||||
|
||||
func setupTracing(ctx context.Context, cfg Config, res *resource.Resource) (*trace.TracerProvider, func(context.Context) error) {
|
||||
topts := []otlptracegrpc.Option{otlptracegrpc.WithEndpoint(cfg.OTLPEndpoint)}
|
||||
if hdrs := parseOTLPHeaders(os.Getenv("OTEL_EXPORTER_OTLP_HEADERS")); len(hdrs) > 0 {
|
||||
topts = append(topts, otlptracegrpc.WithHeaders(hdrs))
|
||||
}
|
||||
if cfg.OTLPInsecure {
|
||||
topts = append(topts, otlptracegrpc.WithInsecure())
|
||||
} else if certFile := os.Getenv("OTEL_EXPORTER_OTLP_CERTIFICATE"); certFile != "" {
|
||||
if creds, cerr := credentials.NewClientTLSFromFile(certFile, ""); cerr == nil {
|
||||
topts = append(topts, otlptracegrpc.WithTLSCredentials(creds))
|
||||
}
|
||||
}
|
||||
exp, err := otlptracegrpc.New(ctx, topts...)
|
||||
if err != nil {
|
||||
return nil, nil
|
||||
}
|
||||
tp := trace.NewTracerProvider(trace.WithBatcher(exp), trace.WithResource(res))
|
||||
return tp, exp.Shutdown
|
||||
}
|
||||
|
||||
// Shutdown flushes exporters and providers in reverse init order.
|
||||
func (s *Setup) Shutdown(ctx context.Context) error {
|
||||
var err error
|
||||
for i := len(s.shutdowns) - 1; i >= 0; i-- {
|
||||
err = errors.Join(err, s.shutdowns[i](ctx))
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
func parseOTLPHeaders(h string) map[string]string {
|
||||
m := map[string]string{}
|
||||
if h == "" {
|
||||
return m
|
||||
}
|
||||
pairs := strings.Split(h, ",")
|
||||
for _, p := range pairs {
|
||||
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
|
||||
if len(kv) == 2 {
|
||||
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// parseResourceAttributes parses OTEL_RESOURCE_ATTRIBUTES formatted as k=v,k2=v2
|
||||
func parseResourceAttributes(s string) map[string]string {
|
||||
m := map[string]string{}
|
||||
if s == "" {
|
||||
return m
|
||||
}
|
||||
parts := strings.Split(s, ",")
|
||||
for _, p := range parts {
|
||||
kv := strings.SplitN(strings.TrimSpace(p), "=", 2)
|
||||
if len(kv) == 2 {
|
||||
m[strings.TrimSpace(kv[0])] = strings.TrimSpace(kv[1])
|
||||
}
|
||||
}
|
||||
return m
|
||||
}
|
||||
|
||||
// Global site/region used to enrich metric labels.
|
||||
var siteIDVal atomic.Value
|
||||
var regionVal atomic.Value
|
||||
var (
|
||||
includeTunnelIDVal atomic.Value // bool; default true
|
||||
includeSiteLabelVal atomic.Value // bool; default false
|
||||
)
|
||||
|
||||
// UpdateSiteInfo updates the global site_id and region used for metric labels.
|
||||
// Thread-safe via atomic.Value: subsequent metric emissions will include
|
||||
// the new labels, prior emissions remain unchanged.
|
||||
func UpdateSiteInfo(siteID, region string) {
|
||||
if siteID != "" {
|
||||
siteIDVal.Store(siteID)
|
||||
}
|
||||
if region != "" {
|
||||
regionVal.Store(region)
|
||||
}
|
||||
}
|
||||
|
||||
func getSiteID() string {
|
||||
if v, ok := siteIDVal.Load().(string); ok {
|
||||
return v
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
func getRegion() string {
|
||||
if v, ok := regionVal.Load().(string); ok {
|
||||
return v
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// siteAttrs returns label KVs for site_id and region (if set).
|
||||
func siteAttrs() []attribute.KeyValue {
|
||||
var out []attribute.KeyValue
|
||||
if s := getSiteID(); s != "" {
|
||||
out = append(out, attribute.String("site_id", s))
|
||||
}
|
||||
if r := getRegion(); r != "" {
|
||||
out = append(out, attribute.String("region", r))
|
||||
}
|
||||
return out
|
||||
}
|
||||
|
||||
// SiteLabelKVs exposes site label KVs for other packages (e.g., proxy manager).
|
||||
func SiteLabelKVs() []attribute.KeyValue {
|
||||
if !ShouldIncludeSiteLabels() {
|
||||
return nil
|
||||
}
|
||||
return siteAttrs()
|
||||
}
|
||||
|
||||
// ShouldIncludeTunnelID returns whether tunnel_id labels should be emitted.
|
||||
func ShouldIncludeTunnelID() bool {
|
||||
if v, ok := includeTunnelIDVal.Load().(bool); ok {
|
||||
return v
|
||||
}
|
||||
return true
|
||||
}
|
||||
|
||||
// ShouldIncludeSiteLabels returns whether site_id/region should be emitted as
|
||||
// metric labels in addition to resource attributes.
|
||||
func ShouldIncludeSiteLabels() bool {
|
||||
if v, ok := includeSiteLabelVal.Load().(bool); ok {
|
||||
return v
|
||||
}
|
||||
return false
|
||||
}
|
||||
|
||||
func getenv(k, d string) string {
|
||||
if v := os.Getenv(k); v != "" {
|
||||
return v
|
||||
}
|
||||
return d
|
||||
}
|
||||
|
||||
func getdur(k string, d time.Duration) time.Duration {
|
||||
if v := os.Getenv(k); v != "" {
|
||||
if p, e := time.ParseDuration(v); e == nil {
|
||||
return p
|
||||
}
|
||||
}
|
||||
return d
|
||||
}
|
||||
53
internal/telemetry/telemetry_attrfilter_test.go
Normal file
53
internal/telemetry/telemetry_attrfilter_test.go
Normal file
@@ -0,0 +1,53 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
)
|
||||
|
||||
// Test that disallowed attributes are filtered from the exposition.
|
||||
func TestAttributeFilterDropsUnknownKeys(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
resetMetricsForTest()
|
||||
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
|
||||
cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0"}
|
||||
tel, err := Init(ctx, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("init: %v", err)
|
||||
}
|
||||
defer func() { _ = tel.Shutdown(context.Background()) }()
|
||||
|
||||
if tel.PrometheusHandler == nil {
|
||||
t.Fatalf("prom handler nil")
|
||||
}
|
||||
ts := httptest.NewServer(tel.PrometheusHandler)
|
||||
defer ts.Close()
|
||||
|
||||
// Add samples with disallowed attribute keys
|
||||
for _, k := range []string{"forbidden", "site_id", "host"} {
|
||||
set := attribute.NewSet(attribute.String(k, "x"))
|
||||
AddTunnelBytesSet(ctx, 123, set)
|
||||
}
|
||||
time.Sleep(50 * time.Millisecond)
|
||||
|
||||
resp, err := http.Get(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("GET: %v", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
b, _ := io.ReadAll(resp.Body)
|
||||
body := string(b)
|
||||
if strings.Contains(body, "forbidden=") {
|
||||
t.Fatalf("unexpected forbidden attribute leaked into metrics: %s", body)
|
||||
}
|
||||
if !strings.Contains(body, "site_id=\"x\"") {
|
||||
t.Fatalf("expected allowed attribute site_id to be present in metrics, got: %s", body)
|
||||
}
|
||||
}
|
||||
76
internal/telemetry/telemetry_golden_test.go
Normal file
76
internal/telemetry/telemetry_golden_test.go
Normal file
@@ -0,0 +1,76 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Golden test that /metrics contains expected metric names.
|
||||
func TestMetricsGoldenContains(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
resetMetricsForTest()
|
||||
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
|
||||
cfg := Config{ServiceName: "newt", PromEnabled: true, AdminAddr: "127.0.0.1:0", BuildVersion: "test"}
|
||||
tel, err := Init(ctx, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("telemetry init error: %v", err)
|
||||
}
|
||||
defer func() { _ = tel.Shutdown(context.Background()) }()
|
||||
|
||||
if tel.PrometheusHandler == nil {
|
||||
t.Fatalf("prom handler nil")
|
||||
}
|
||||
ts := httptest.NewServer(tel.PrometheusHandler)
|
||||
defer ts.Close()
|
||||
|
||||
// Trigger counters to ensure they appear in the scrape
|
||||
IncConnAttempt(ctx, "websocket", "success")
|
||||
IncWSReconnect(ctx, "io_error")
|
||||
IncProxyConnectionEvent(ctx, "", "tcp", ProxyConnectionOpened)
|
||||
if tel.MeterProvider != nil {
|
||||
_ = tel.MeterProvider.ForceFlush(ctx)
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
var body string
|
||||
for i := 0; i < 5; i++ {
|
||||
resp, err := http.Get(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("GET metrics failed: %v", err)
|
||||
}
|
||||
b, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
body = string(b)
|
||||
if strings.Contains(body, "newt_connection_attempts_total") {
|
||||
break
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
|
||||
f, err := os.Open(filepath.Join("testdata", "expected_contains.golden"))
|
||||
if err != nil {
|
||||
t.Fatalf("read golden: %v", err)
|
||||
}
|
||||
defer f.Close()
|
||||
s := bufio.NewScanner(f)
|
||||
for s.Scan() {
|
||||
needle := strings.TrimSpace(s.Text())
|
||||
if needle == "" {
|
||||
continue
|
||||
}
|
||||
if !strings.Contains(body, needle) {
|
||||
t.Fatalf("expected metrics body to contain %q. body=\n%s", needle, body)
|
||||
}
|
||||
}
|
||||
if err := s.Err(); err != nil {
|
||||
t.Fatalf("scan golden: %v", err)
|
||||
}
|
||||
}
|
||||
65
internal/telemetry/telemetry_smoke_test.go
Normal file
65
internal/telemetry/telemetry_smoke_test.go
Normal file
@@ -0,0 +1,65 @@
|
||||
package telemetry
|
||||
|
||||
import (
|
||||
"context"
|
||||
"io"
|
||||
"net/http"
|
||||
"net/http/httptest"
|
||||
"strings"
|
||||
"testing"
|
||||
"time"
|
||||
)
|
||||
|
||||
// Smoke test that /metrics contains at least one newt_* metric when Prom exporter is enabled.
|
||||
func TestMetricsSmoke(t *testing.T) {
|
||||
ctx := context.Background()
|
||||
resetMetricsForTest()
|
||||
t.Setenv("NEWT_METRICS_INCLUDE_SITE_LABELS", "true")
|
||||
cfg := Config{
|
||||
ServiceName: "newt",
|
||||
PromEnabled: true,
|
||||
OTLPEnabled: false,
|
||||
AdminAddr: "127.0.0.1:0",
|
||||
BuildVersion: "test",
|
||||
BuildCommit: "deadbeef",
|
||||
MetricExportInterval: 5 * time.Second,
|
||||
}
|
||||
tel, err := Init(ctx, cfg)
|
||||
if err != nil {
|
||||
t.Fatalf("telemetry init error: %v", err)
|
||||
}
|
||||
defer func() { _ = tel.Shutdown(context.Background()) }()
|
||||
|
||||
// Serve the Prom handler on a test server
|
||||
if tel.PrometheusHandler == nil {
|
||||
t.Fatalf("Prometheus handler nil; PromEnabled should enable it")
|
||||
}
|
||||
ts := httptest.NewServer(tel.PrometheusHandler)
|
||||
defer ts.Close()
|
||||
|
||||
// Record a simple metric and then fetch /metrics
|
||||
IncConnAttempt(ctx, "websocket", "success")
|
||||
if tel.MeterProvider != nil {
|
||||
_ = tel.MeterProvider.ForceFlush(ctx)
|
||||
}
|
||||
// Give the exporter a tick to collect
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
|
||||
var body string
|
||||
for i := 0; i < 5; i++ {
|
||||
resp, err := http.Get(ts.URL)
|
||||
if err != nil {
|
||||
t.Fatalf("GET /metrics failed: %v", err)
|
||||
}
|
||||
b, _ := io.ReadAll(resp.Body)
|
||||
_ = resp.Body.Close()
|
||||
body = string(b)
|
||||
if strings.Contains(body, "newt_connection_attempts_total") {
|
||||
break
|
||||
}
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
}
|
||||
if !strings.Contains(body, "newt_connection_attempts_total") {
|
||||
t.Fatalf("expected newt_connection_attempts_total in metrics, got:\n%s", body)
|
||||
}
|
||||
}
|
||||
3
internal/telemetry/testdata/expected_contains.golden
vendored
Normal file
3
internal/telemetry/testdata/expected_contains.golden
vendored
Normal file
@@ -0,0 +1,3 @@
|
||||
newt_connection_attempts_total
|
||||
newt_websocket_reconnects_total
|
||||
newt_proxy_connections_total
|
||||
315
main.go
315
main.go
@@ -1,7 +1,9 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"net"
|
||||
@@ -22,6 +24,9 @@ import (
|
||||
"github.com/fosrl/newt/updates"
|
||||
"github.com/fosrl/newt/websocket"
|
||||
|
||||
"github.com/fosrl/newt/internal/state"
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
"go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp"
|
||||
"golang.zx2c4.com/wireguard/conn"
|
||||
"golang.zx2c4.com/wireguard/device"
|
||||
"golang.zx2c4.com/wireguard/tun"
|
||||
@@ -74,6 +79,11 @@ type ExitNodePingResult struct {
|
||||
WasPreviouslyConnected bool `json:"wasPreviouslyConnected"`
|
||||
}
|
||||
|
||||
type BlueprintResult struct {
|
||||
Success bool `json:"success"`
|
||||
Message string `json:"message,omitempty"`
|
||||
}
|
||||
|
||||
// Custom flag type for multiple CA files
|
||||
type stringSlice []string
|
||||
|
||||
@@ -86,6 +96,14 @@ func (s *stringSlice) Set(value string) error {
|
||||
return nil
|
||||
}
|
||||
|
||||
const (
|
||||
fmtErrMarshaling = "Error marshaling data: %v"
|
||||
fmtReceivedMsg = "Received: %+v"
|
||||
topicWGRegister = "newt/wg/register"
|
||||
msgNoTunnelOrProxy = "No tunnel IP or proxy manager available"
|
||||
fmtErrParsingTargetData = "Error parsing target data: %v"
|
||||
)
|
||||
|
||||
var (
|
||||
endpoint string
|
||||
id string
|
||||
@@ -115,6 +133,17 @@ var (
|
||||
preferEndpoint string
|
||||
healthMonitor *healthcheck.Monitor
|
||||
enforceHealthcheckCert bool
|
||||
// Build/version (can be overridden via -ldflags "-X main.newtVersion=...")
|
||||
newtVersion = "version_replaceme"
|
||||
|
||||
// Observability/metrics flags
|
||||
metricsEnabled bool
|
||||
otlpEnabled bool
|
||||
adminAddr string
|
||||
region string
|
||||
metricsAsyncBytes bool
|
||||
blueprintFile string
|
||||
noCloud bool
|
||||
|
||||
// New mTLS configuration variables
|
||||
tlsClientCert string
|
||||
@@ -126,6 +155,10 @@ var (
|
||||
)
|
||||
|
||||
func main() {
|
||||
// Prepare context for graceful shutdown and signal handling
|
||||
ctx, stop := signal.NotifyContext(context.Background(), os.Interrupt, syscall.SIGTERM)
|
||||
defer stop()
|
||||
|
||||
// if PANGOLIN_ENDPOINT, NEWT_ID, and NEWT_SECRET are set as environment variables, they will be used as default values
|
||||
endpoint = os.Getenv("PANGOLIN_ENDPOINT")
|
||||
id = os.Getenv("NEWT_ID")
|
||||
@@ -136,16 +169,22 @@ func main() {
|
||||
updownScript = os.Getenv("UPDOWN_SCRIPT")
|
||||
interfaceName = os.Getenv("INTERFACE")
|
||||
generateAndSaveKeyTo = os.Getenv("GENERATE_AND_SAVE_KEY_TO")
|
||||
|
||||
// Metrics/observability env mirrors
|
||||
metricsEnabledEnv := os.Getenv("NEWT_METRICS_PROMETHEUS_ENABLED")
|
||||
otlpEnabledEnv := os.Getenv("NEWT_METRICS_OTLP_ENABLED")
|
||||
adminAddrEnv := os.Getenv("NEWT_ADMIN_ADDR")
|
||||
regionEnv := os.Getenv("NEWT_REGION")
|
||||
asyncBytesEnv := os.Getenv("NEWT_METRICS_ASYNC_BYTES")
|
||||
|
||||
keepInterfaceEnv := os.Getenv("KEEP_INTERFACE")
|
||||
acceptClientsEnv := os.Getenv("ACCEPT_CLIENTS")
|
||||
useNativeInterfaceEnv := os.Getenv("USE_NATIVE_INTERFACE")
|
||||
enforceHealthcheckCertEnv := os.Getenv("ENFORCE_HC_CERT")
|
||||
|
||||
keepInterface = keepInterfaceEnv == "true"
|
||||
acceptClientsEnv := os.Getenv("ACCEPT_CLIENTS")
|
||||
acceptClients = acceptClientsEnv == "true"
|
||||
useNativeInterfaceEnv := os.Getenv("USE_NATIVE_INTERFACE")
|
||||
useNativeInterface = useNativeInterfaceEnv == "true"
|
||||
enforceHealthcheckCertEnv := os.Getenv("ENFORCE_HC_CERT")
|
||||
enforceHealthcheckCert = enforceHealthcheckCertEnv == "true"
|
||||
|
||||
dockerSocket = os.Getenv("DOCKER_SOCKET")
|
||||
pingIntervalStr := os.Getenv("PING_INTERVAL")
|
||||
pingTimeoutStr := os.Getenv("PING_TIMEOUT")
|
||||
@@ -169,9 +208,12 @@ func main() {
|
||||
// Legacy PKCS12 support (deprecated)
|
||||
tlsPrivateKey = os.Getenv("TLS_CLIENT_CERT_PKCS12")
|
||||
// Keep backward compatibility with old environment variable name
|
||||
if tlsPrivateKey == "" {
|
||||
if tlsPrivateKey == "" && tlsClientKey == "" && len(tlsClientCAs) == 0 {
|
||||
tlsPrivateKey = os.Getenv("TLS_CLIENT_CERT")
|
||||
}
|
||||
blueprintFile = os.Getenv("BLUEPRINT_FILE")
|
||||
noCloudEnv := os.Getenv("NO_CLOUD")
|
||||
noCloud = noCloudEnv == "true"
|
||||
|
||||
if endpoint == "" {
|
||||
flag.StringVar(&endpoint, "endpoint", "", "Endpoint of your pangolin server")
|
||||
@@ -186,7 +228,7 @@ func main() {
|
||||
flag.StringVar(&mtu, "mtu", "1280", "MTU to use")
|
||||
}
|
||||
if dns == "" {
|
||||
flag.StringVar(&dns, "dns", "8.8.8.8", "DNS server to use")
|
||||
flag.StringVar(&dns, "dns", "9.9.9.9", "DNS server to use")
|
||||
}
|
||||
if logLevel == "" {
|
||||
flag.StringVar(&logLevel, "log-level", "INFO", "Log level (DEBUG, INFO, WARN, ERROR, FATAL)")
|
||||
@@ -271,6 +313,49 @@ func main() {
|
||||
if healthFile == "" {
|
||||
flag.StringVar(&healthFile, "health-file", "", "Path to health file (if unset, health file won't be written)")
|
||||
}
|
||||
if blueprintFile == "" {
|
||||
flag.StringVar(&blueprintFile, "blueprint-file", "", "Path to blueprint file (if unset, no blueprint will be applied)")
|
||||
}
|
||||
if noCloudEnv == "" {
|
||||
flag.BoolVar(&noCloud, "no-cloud", false, "Disable cloud failover")
|
||||
}
|
||||
|
||||
// Metrics/observability flags (mirror ENV if unset)
|
||||
if metricsEnabledEnv == "" {
|
||||
flag.BoolVar(&metricsEnabled, "metrics", true, "Enable Prometheus /metrics exporter")
|
||||
} else {
|
||||
if v, err := strconv.ParseBool(metricsEnabledEnv); err == nil {
|
||||
metricsEnabled = v
|
||||
} else {
|
||||
metricsEnabled = true
|
||||
}
|
||||
}
|
||||
if otlpEnabledEnv == "" {
|
||||
flag.BoolVar(&otlpEnabled, "otlp", false, "Enable OTLP exporters (metrics/traces) to OTEL_EXPORTER_OTLP_ENDPOINT")
|
||||
} else {
|
||||
if v, err := strconv.ParseBool(otlpEnabledEnv); err == nil {
|
||||
otlpEnabled = v
|
||||
}
|
||||
}
|
||||
if adminAddrEnv == "" {
|
||||
flag.StringVar(&adminAddr, "metrics-admin-addr", "127.0.0.1:2112", "Admin/metrics bind address")
|
||||
} else {
|
||||
adminAddr = adminAddrEnv
|
||||
}
|
||||
// Async bytes toggle
|
||||
if asyncBytesEnv == "" {
|
||||
flag.BoolVar(&metricsAsyncBytes, "metrics-async-bytes", false, "Enable async bytes counting (background flush; lower hot path overhead)")
|
||||
} else {
|
||||
if v, err := strconv.ParseBool(asyncBytesEnv); err == nil {
|
||||
metricsAsyncBytes = v
|
||||
}
|
||||
}
|
||||
// Optional region flag (resource attribute)
|
||||
if regionEnv == "" {
|
||||
flag.StringVar(®ion, "region", "", "Optional region resource attribute (also NEWT_REGION)")
|
||||
} else {
|
||||
region = regionEnv
|
||||
}
|
||||
|
||||
// do a --version check
|
||||
version := flag.Bool("version", false, "Print the version")
|
||||
@@ -286,12 +371,58 @@ func main() {
|
||||
loggerLevel := parseLogLevel(logLevel)
|
||||
logger.GetLogger().SetLevel(parseLogLevel(logLevel))
|
||||
|
||||
newtVersion := "version_replaceme"
|
||||
// Initialize telemetry after flags are parsed (so flags override env)
|
||||
tcfg := telemetry.FromEnv()
|
||||
tcfg.PromEnabled = metricsEnabled
|
||||
tcfg.OTLPEnabled = otlpEnabled
|
||||
if adminAddr != "" {
|
||||
tcfg.AdminAddr = adminAddr
|
||||
}
|
||||
// Resource attributes (if available)
|
||||
tcfg.SiteID = id
|
||||
tcfg.Region = region
|
||||
// Build info
|
||||
tcfg.BuildVersion = newtVersion
|
||||
tcfg.BuildCommit = os.Getenv("NEWT_COMMIT")
|
||||
|
||||
tel, telErr := telemetry.Init(ctx, tcfg)
|
||||
if telErr != nil {
|
||||
logger.Warn("Telemetry init failed: %v", telErr)
|
||||
}
|
||||
if tel != nil {
|
||||
// Admin HTTP server (exposes /metrics when Prometheus exporter is enabled)
|
||||
logger.Info("Starting metrics server on %s", tcfg.AdminAddr)
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/healthz", func(w http.ResponseWriter, r *http.Request) { w.WriteHeader(200) })
|
||||
if tel.PrometheusHandler != nil {
|
||||
mux.Handle("/metrics", tel.PrometheusHandler)
|
||||
}
|
||||
admin := &http.Server{
|
||||
Addr: tcfg.AdminAddr,
|
||||
Handler: otelhttp.NewHandler(mux, "newt-admin"),
|
||||
ReadTimeout: 5 * time.Second,
|
||||
WriteTimeout: 10 * time.Second,
|
||||
ReadHeaderTimeout: 5 * time.Second,
|
||||
IdleTimeout: 30 * time.Second,
|
||||
}
|
||||
go func() {
|
||||
if err := admin.ListenAndServe(); err != nil && !errors.Is(err, http.ErrServerClosed) {
|
||||
logger.Warn("admin http error: %v", err)
|
||||
}
|
||||
}()
|
||||
defer func() {
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
|
||||
defer cancel()
|
||||
_ = admin.Shutdown(ctx)
|
||||
}()
|
||||
defer func() { _ = tel.Shutdown(context.Background()) }()
|
||||
}
|
||||
|
||||
if *version {
|
||||
fmt.Println("Newt version " + newtVersion)
|
||||
os.Exit(0)
|
||||
} else {
|
||||
logger.Info("Newt version " + newtVersion)
|
||||
logger.Info("Newt version %s", newtVersion)
|
||||
}
|
||||
|
||||
if err := updates.CheckForUpdate("fosrl", "newt", newtVersion); err != nil {
|
||||
@@ -362,6 +493,8 @@ func main() {
|
||||
}
|
||||
endpoint = client.GetConfig().Endpoint // Update endpoint from config
|
||||
id = client.GetConfig().ID // Update ID from config
|
||||
// Update site labels for metrics with the resolved ID
|
||||
telemetry.UpdateSiteInfo(id, region)
|
||||
|
||||
// output env var values if set
|
||||
logger.Debug("Endpoint: %v", endpoint)
|
||||
@@ -403,6 +536,7 @@ func main() {
|
||||
var pm *proxy.ProxyManager
|
||||
var connected bool
|
||||
var wgData WgData
|
||||
var dockerEventMonitor *docker.EventMonitor
|
||||
|
||||
if acceptClients {
|
||||
setupClients(client)
|
||||
@@ -468,7 +602,11 @@ func main() {
|
||||
|
||||
// Register handlers for different message types
|
||||
client.RegisterHandler("newt/wg/connect", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received registration message")
|
||||
logger.Debug("Received registration message")
|
||||
regResult := "success"
|
||||
defer func() {
|
||||
telemetry.IncSiteRegistration(ctx, regResult)
|
||||
}()
|
||||
if stopFunc != nil {
|
||||
stopFunc() // stop the ws from sending more requests
|
||||
stopFunc = nil // reset stopFunc to nil to avoid double stopping
|
||||
@@ -487,22 +625,25 @@ func main() {
|
||||
|
||||
jsonData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error marshaling data: %v", err)
|
||||
logger.Info(fmtErrMarshaling, err)
|
||||
regResult = "failure"
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(jsonData, &wgData); err != nil {
|
||||
logger.Info("Error unmarshaling target data: %v", err)
|
||||
regResult = "failure"
|
||||
return
|
||||
}
|
||||
|
||||
logger.Debug("Received: %+v", msg)
|
||||
logger.Debug(fmtReceivedMsg, msg)
|
||||
tun, tnet, err = netstack.CreateNetTUN(
|
||||
[]netip.Addr{netip.MustParseAddr(wgData.TunnelIP)},
|
||||
[]netip.Addr{netip.MustParseAddr(dns)},
|
||||
mtuInt)
|
||||
if err != nil {
|
||||
logger.Error("Failed to create TUN device: %v", err)
|
||||
regResult = "failure"
|
||||
}
|
||||
|
||||
setDownstreamTNetstack(tnet)
|
||||
@@ -516,6 +657,7 @@ func main() {
|
||||
host, _, err := net.SplitHostPort(wgData.Endpoint)
|
||||
if err != nil {
|
||||
logger.Error("Failed to split endpoint: %v", err)
|
||||
regResult = "failure"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -524,6 +666,7 @@ func main() {
|
||||
endpoint, err := resolveDomain(wgData.Endpoint)
|
||||
if err != nil {
|
||||
logger.Error("Failed to resolve endpoint: %v", err)
|
||||
regResult = "failure"
|
||||
return
|
||||
}
|
||||
|
||||
@@ -539,12 +682,14 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
err = dev.IpcSet(config)
|
||||
if err != nil {
|
||||
logger.Error("Failed to configure WireGuard device: %v", err)
|
||||
regResult = "failure"
|
||||
}
|
||||
|
||||
// Bring up the device
|
||||
err = dev.Up()
|
||||
if err != nil {
|
||||
logger.Error("Failed to bring up WireGuard device: %v", err)
|
||||
regResult = "failure"
|
||||
}
|
||||
|
||||
logger.Debug("WireGuard device created. Lets ping the server now...")
|
||||
@@ -557,11 +702,15 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
// Use reliable ping for initial connection test
|
||||
logger.Debug("Testing initial connection with reliable ping...")
|
||||
_, err = reliablePing(tnet, wgData.ServerIP, pingTimeout, 5)
|
||||
lat, err := reliablePing(tnet, wgData.ServerIP, pingTimeout, 5)
|
||||
if err == nil && wgData.PublicKey != "" {
|
||||
telemetry.ObserveTunnelLatency(ctx, wgData.PublicKey, "wireguard", lat.Seconds())
|
||||
}
|
||||
if err != nil {
|
||||
logger.Warn("Initial reliable ping failed, but continuing: %v", err)
|
||||
regResult = "failure"
|
||||
} else {
|
||||
logger.Info("Initial connection test successful")
|
||||
logger.Debug("Initial connection test successful")
|
||||
}
|
||||
|
||||
pingWithRetryStopChan, _ = pingWithRetry(tnet, wgData.ServerIP, pingTimeout)
|
||||
@@ -570,11 +719,14 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
// as the pings will continue in the background
|
||||
if !connected {
|
||||
logger.Debug("Starting ping check")
|
||||
pingStopChan = startPingCheck(tnet, wgData.ServerIP, client)
|
||||
pingStopChan = startPingCheck(tnet, wgData.ServerIP, client, wgData.PublicKey)
|
||||
}
|
||||
|
||||
// Create proxy manager
|
||||
pm = proxy.NewProxyManager(tnet)
|
||||
pm.SetAsyncBytes(metricsAsyncBytes)
|
||||
// Set tunnel_id for metrics (WireGuard peer public key)
|
||||
pm.SetTunnelID(wgData.PublicKey)
|
||||
|
||||
connected = true
|
||||
|
||||
@@ -600,7 +752,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
if err := healthMonitor.AddTargets(wgData.HealthCheckTargets); err != nil {
|
||||
logger.Error("Failed to bulk add health check targets: %v", err)
|
||||
} else {
|
||||
logger.Info("Successfully added %d health check targets", len(wgData.HealthCheckTargets))
|
||||
logger.Debug("Successfully added %d health check targets", len(wgData.HealthCheckTargets))
|
||||
}
|
||||
|
||||
err = pm.Start()
|
||||
@@ -611,10 +763,19 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
|
||||
client.RegisterHandler("newt/wg/reconnect", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received reconnect message")
|
||||
if wgData.PublicKey != "" {
|
||||
telemetry.IncReconnect(ctx, wgData.PublicKey, "server", telemetry.ReasonServerRequest)
|
||||
}
|
||||
|
||||
// Close the WireGuard device and TUN
|
||||
closeWgTunnel()
|
||||
|
||||
// Clear metrics attrs and sessions for the tunnel
|
||||
if pm != nil {
|
||||
pm.ClearTunnelID()
|
||||
state.Global().ClearTunnel(wgData.PublicKey)
|
||||
}
|
||||
|
||||
// Mark as disconnected
|
||||
connected = false
|
||||
|
||||
@@ -624,13 +785,18 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
|
||||
// Request exit nodes from the server
|
||||
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second)
|
||||
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{
|
||||
"noCloud": noCloud,
|
||||
}, 3*time.Second)
|
||||
|
||||
logger.Info("Tunnel destroyed, ready for reconnection")
|
||||
})
|
||||
|
||||
client.RegisterHandler("newt/wg/terminate", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received termination message")
|
||||
if wgData.PublicKey != "" {
|
||||
telemetry.IncReconnect(ctx, wgData.PublicKey, "server", telemetry.ReasonServerRequest)
|
||||
}
|
||||
|
||||
// Close the WireGuard device and TUN
|
||||
closeWgTunnel()
|
||||
@@ -647,7 +813,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
})
|
||||
|
||||
client.RegisterHandler("newt/ping/exitNodes", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received ping message")
|
||||
logger.Debug("Received ping message")
|
||||
if stopFunc != nil {
|
||||
stopFunc() // stop the ws from sending more requests
|
||||
stopFunc = nil // reset stopFunc to nil to avoid double stopping
|
||||
@@ -658,7 +824,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
|
||||
jsonData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error marshaling data: %v", err)
|
||||
logger.Info(fmtErrMarshaling, err)
|
||||
return
|
||||
}
|
||||
if err := json.Unmarshal(jsonData, &exitNodeData); err != nil {
|
||||
@@ -699,7 +865,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
},
|
||||
}
|
||||
|
||||
stopFunc = client.SendMessageInterval("newt/wg/register", map[string]interface{}{
|
||||
stopFunc = client.SendMessageInterval(topicWGRegister, map[string]interface{}{
|
||||
"publicKey": publicKey.String(),
|
||||
"pingResults": pingResults,
|
||||
"newtVersion": newtVersion,
|
||||
@@ -802,7 +968,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
|
||||
// Send the ping results to the cloud for selection
|
||||
stopFunc = client.SendMessageInterval("newt/wg/register", map[string]interface{}{
|
||||
stopFunc = client.SendMessageInterval(topicWGRegister, map[string]interface{}{
|
||||
"publicKey": publicKey.String(),
|
||||
"pingResults": pingResults,
|
||||
"newtVersion": newtVersion,
|
||||
@@ -812,17 +978,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
})
|
||||
|
||||
client.RegisterHandler("newt/tcp/add", func(msg websocket.WSMessage) {
|
||||
logger.Debug("Received: %+v", msg)
|
||||
logger.Debug(fmtReceivedMsg, msg)
|
||||
|
||||
// if there is no wgData or pm, we can't add targets
|
||||
if wgData.TunnelIP == "" || pm == nil {
|
||||
logger.Info("No tunnel IP or proxy manager available")
|
||||
logger.Info(msgNoTunnelOrProxy)
|
||||
return
|
||||
}
|
||||
|
||||
targetData, err := parseTargetData(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error parsing target data: %v", err)
|
||||
logger.Info(fmtErrParsingTargetData, err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -837,17 +1003,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
})
|
||||
|
||||
client.RegisterHandler("newt/udp/add", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received: %+v", msg)
|
||||
logger.Info(fmtReceivedMsg, msg)
|
||||
|
||||
// if there is no wgData or pm, we can't add targets
|
||||
if wgData.TunnelIP == "" || pm == nil {
|
||||
logger.Info("No tunnel IP or proxy manager available")
|
||||
logger.Info(msgNoTunnelOrProxy)
|
||||
return
|
||||
}
|
||||
|
||||
targetData, err := parseTargetData(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error parsing target data: %v", err)
|
||||
logger.Info(fmtErrParsingTargetData, err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -862,17 +1028,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
})
|
||||
|
||||
client.RegisterHandler("newt/udp/remove", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received: %+v", msg)
|
||||
logger.Info(fmtReceivedMsg, msg)
|
||||
|
||||
// if there is no wgData or pm, we can't add targets
|
||||
if wgData.TunnelIP == "" || pm == nil {
|
||||
logger.Info("No tunnel IP or proxy manager available")
|
||||
logger.Info(msgNoTunnelOrProxy)
|
||||
return
|
||||
}
|
||||
|
||||
targetData, err := parseTargetData(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error parsing target data: %v", err)
|
||||
logger.Info(fmtErrParsingTargetData, err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -887,17 +1053,17 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
})
|
||||
|
||||
client.RegisterHandler("newt/tcp/remove", func(msg websocket.WSMessage) {
|
||||
logger.Info("Received: %+v", msg)
|
||||
logger.Info(fmtReceivedMsg, msg)
|
||||
|
||||
// if there is no wgData or pm, we can't add targets
|
||||
if wgData.TunnelIP == "" || pm == nil {
|
||||
logger.Info("No tunnel IP or proxy manager available")
|
||||
logger.Info(msgNoTunnelOrProxy)
|
||||
return
|
||||
}
|
||||
|
||||
targetData, err := parseTargetData(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error parsing target data: %v", err)
|
||||
logger.Info(fmtErrParsingTargetData, err)
|
||||
return
|
||||
}
|
||||
|
||||
@@ -938,7 +1104,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
if err != nil {
|
||||
logger.Error("Failed to send Docker socket check response: %v", err)
|
||||
} else {
|
||||
logger.Info("Docker socket check response sent: available=%t", isAvailable)
|
||||
logger.Debug("Docker socket check response sent: available=%t", isAvailable)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -969,7 +1135,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
if err != nil {
|
||||
logger.Error("Failed to send Docker container list: %v", err)
|
||||
} else {
|
||||
logger.Info("Docker container list sent, count: %d", len(containers))
|
||||
logger.Debug("Docker container list sent, count: %d", len(containers))
|
||||
}
|
||||
})
|
||||
|
||||
@@ -981,7 +1147,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
|
||||
jsonData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error marshaling data: %v", err)
|
||||
logger.Info(fmtErrMarshaling, err)
|
||||
return
|
||||
}
|
||||
if err := json.Unmarshal(jsonData, &sshPublicKeyData); err != nil {
|
||||
@@ -1085,7 +1251,7 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
if err := healthMonitor.AddTargets(config.Targets); err != nil {
|
||||
logger.Error("Failed to add health check targets: %v", err)
|
||||
} else {
|
||||
logger.Info("Added %d health check targets", len(config.Targets))
|
||||
logger.Debug("Added %d health check targets", len(config.Targets))
|
||||
}
|
||||
|
||||
logger.Debug("Health check targets added: %+v", config.Targets)
|
||||
@@ -1138,9 +1304,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
|
||||
if err := healthMonitor.EnableTarget(requestData.ID); err != nil {
|
||||
logger.Error("Failed to enable health check target %s: %v", requestData.ID, err)
|
||||
logger.Error("Failed to enable health check target %d: %v", requestData.ID, err)
|
||||
} else {
|
||||
logger.Info("Enabled health check target: %s", requestData.ID)
|
||||
logger.Info("Enabled health check target: %d", requestData.ID)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1163,9 +1329,9 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
|
||||
if err := healthMonitor.DisableTarget(requestData.ID); err != nil {
|
||||
logger.Error("Failed to disable health check target %s: %v", requestData.ID, err)
|
||||
logger.Error("Failed to disable health check target %d: %v", requestData.ID, err)
|
||||
} else {
|
||||
logger.Info("Disabled health check target: %s", requestData.ID)
|
||||
logger.Info("Disabled health check target: %d", requestData.ID)
|
||||
}
|
||||
})
|
||||
|
||||
@@ -1193,6 +1359,29 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
})
|
||||
|
||||
// Register handler for getting health check status
|
||||
client.RegisterHandler("newt/blueprint/results", func(msg websocket.WSMessage) {
|
||||
logger.Debug("Received blueprint results message")
|
||||
|
||||
var blueprintResult BlueprintResult
|
||||
|
||||
jsonData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error marshaling data: %v", err)
|
||||
return
|
||||
}
|
||||
if err := json.Unmarshal(jsonData, &blueprintResult); err != nil {
|
||||
logger.Info("Error unmarshaling config results data: %v", err)
|
||||
return
|
||||
}
|
||||
|
||||
if blueprintResult.Success {
|
||||
logger.Debug("Blueprint applied successfully!")
|
||||
} else {
|
||||
logger.Warn("Blueprint application failed: %s", blueprintResult.Message)
|
||||
}
|
||||
})
|
||||
|
||||
client.OnConnect(func() error {
|
||||
publicKey = privateKey.PublicKey()
|
||||
logger.Debug("Public key: %s", publicKey)
|
||||
@@ -1203,19 +1392,23 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
if stopFunc != nil {
|
||||
stopFunc()
|
||||
}
|
||||
// request from the server the list of nodes to ping at newt/ping/request
|
||||
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second)
|
||||
logger.Info("Requesting exit nodes from server")
|
||||
// request from the server the list of nodes to ping
|
||||
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{
|
||||
"noCloud": noCloud,
|
||||
}, 3*time.Second)
|
||||
logger.Debug("Requesting exit nodes from server")
|
||||
clientsOnConnect()
|
||||
}
|
||||
|
||||
// Send registration message to the server for backward compatibility
|
||||
err := client.SendMessage("newt/wg/register", map[string]interface{}{
|
||||
err := client.SendMessage(topicWGRegister, map[string]interface{}{
|
||||
"publicKey": publicKey.String(),
|
||||
"newtVersion": newtVersion,
|
||||
"backwardsCompatible": true,
|
||||
})
|
||||
|
||||
sendBlueprint(client)
|
||||
|
||||
if err != nil {
|
||||
logger.Error("Failed to send registration message: %v", err)
|
||||
return err
|
||||
@@ -1230,6 +1423,34 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
}
|
||||
defer client.Close()
|
||||
|
||||
// Initialize Docker event monitoring if Docker socket is available and monitoring is enabled
|
||||
if dockerSocket != "" {
|
||||
logger.Debug("Initializing Docker event monitoring")
|
||||
dockerEventMonitor, err = docker.NewEventMonitor(dockerSocket, dockerEnforceNetworkValidationBool, func(containers []docker.Container) {
|
||||
// Send updated container list via websocket when Docker events occur
|
||||
logger.Debug("Docker event detected, sending updated container list (%d containers)", len(containers))
|
||||
err := client.SendMessage("newt/socket/containers", map[string]interface{}{
|
||||
"containers": containers,
|
||||
})
|
||||
if err != nil {
|
||||
logger.Error("Failed to send updated container list after Docker event: %v", err)
|
||||
} else {
|
||||
logger.Debug("Updated container list sent successfully")
|
||||
}
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
logger.Error("Failed to create Docker event monitor: %v", err)
|
||||
} else {
|
||||
err = dockerEventMonitor.Start()
|
||||
if err != nil {
|
||||
logger.Error("Failed to start Docker event monitoring: %v", err)
|
||||
} else {
|
||||
logger.Debug("Docker event monitoring started successfully")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Wait for interrupt signal
|
||||
sigCh := make(chan os.Signal, 1)
|
||||
signal.Notify(sigCh, syscall.SIGINT, syscall.SIGTERM)
|
||||
@@ -1238,6 +1459,10 @@ persistent_keepalive_interval=5`, fixKey(privateKey.String()), fixKey(wgData.Pub
|
||||
// Close clients first (including WGTester)
|
||||
closeClients()
|
||||
|
||||
if dockerEventMonitor != nil {
|
||||
dockerEventMonitor.Stop()
|
||||
}
|
||||
|
||||
if healthMonitor != nil {
|
||||
healthMonitor.Stop()
|
||||
}
|
||||
|
||||
379
proxy/manager.go
379
proxy/manager.go
@@ -1,18 +1,28 @@
|
||||
package proxy
|
||||
|
||||
import (
|
||||
"context"
|
||||
"errors"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"os"
|
||||
"strings"
|
||||
"sync"
|
||||
"sync/atomic"
|
||||
"time"
|
||||
|
||||
"github.com/fosrl/newt/internal/state"
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
"github.com/fosrl/newt/logger"
|
||||
"go.opentelemetry.io/otel/attribute"
|
||||
"go.opentelemetry.io/otel/metric"
|
||||
"golang.zx2c4.com/wireguard/tun/netstack"
|
||||
"gvisor.dev/gvisor/pkg/tcpip/adapters/gonet"
|
||||
)
|
||||
|
||||
const errUnsupportedProtoFmt = "unsupported protocol: %s"
|
||||
|
||||
// Target represents a proxy target with its address and port
|
||||
type Target struct {
|
||||
Address string
|
||||
@@ -28,6 +38,90 @@ type ProxyManager struct {
|
||||
udpConns []*gonet.UDPConn
|
||||
running bool
|
||||
mutex sync.RWMutex
|
||||
|
||||
// telemetry (multi-tunnel)
|
||||
currentTunnelID string
|
||||
tunnels map[string]*tunnelEntry
|
||||
asyncBytes bool
|
||||
flushStop chan struct{}
|
||||
}
|
||||
|
||||
// tunnelEntry holds per-tunnel attributes and (optional) async counters.
|
||||
type tunnelEntry struct {
|
||||
attrInTCP attribute.Set
|
||||
attrOutTCP attribute.Set
|
||||
attrInUDP attribute.Set
|
||||
attrOutUDP attribute.Set
|
||||
|
||||
bytesInTCP atomic.Uint64
|
||||
bytesOutTCP atomic.Uint64
|
||||
bytesInUDP atomic.Uint64
|
||||
bytesOutUDP atomic.Uint64
|
||||
|
||||
activeTCP atomic.Int64
|
||||
activeUDP atomic.Int64
|
||||
}
|
||||
|
||||
// countingWriter wraps an io.Writer and adds bytes to OTel counter using a pre-built attribute set.
|
||||
type countingWriter struct {
|
||||
ctx context.Context
|
||||
w io.Writer
|
||||
set attribute.Set
|
||||
pm *ProxyManager
|
||||
ent *tunnelEntry
|
||||
out bool // false=in, true=out
|
||||
proto string // "tcp" or "udp"
|
||||
}
|
||||
|
||||
func (cw *countingWriter) Write(p []byte) (int, error) {
|
||||
n, err := cw.w.Write(p)
|
||||
if n > 0 {
|
||||
if cw.pm != nil && cw.pm.asyncBytes && cw.ent != nil {
|
||||
switch cw.proto {
|
||||
case "tcp":
|
||||
if cw.out {
|
||||
cw.ent.bytesOutTCP.Add(uint64(n))
|
||||
} else {
|
||||
cw.ent.bytesInTCP.Add(uint64(n))
|
||||
}
|
||||
case "udp":
|
||||
if cw.out {
|
||||
cw.ent.bytesOutUDP.Add(uint64(n))
|
||||
} else {
|
||||
cw.ent.bytesInUDP.Add(uint64(n))
|
||||
}
|
||||
}
|
||||
} else {
|
||||
telemetry.AddTunnelBytesSet(cw.ctx, int64(n), cw.set)
|
||||
}
|
||||
}
|
||||
return n, err
|
||||
}
|
||||
|
||||
func classifyProxyError(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
if errors.Is(err, net.ErrClosed) {
|
||||
return "closed"
|
||||
}
|
||||
if ne, ok := err.(net.Error); ok {
|
||||
if ne.Timeout() {
|
||||
return "timeout"
|
||||
}
|
||||
if ne.Temporary() {
|
||||
return "temporary"
|
||||
}
|
||||
}
|
||||
msg := strings.ToLower(err.Error())
|
||||
switch {
|
||||
case strings.Contains(msg, "refused"):
|
||||
return "refused"
|
||||
case strings.Contains(msg, "reset"):
|
||||
return "reset"
|
||||
default:
|
||||
return "io_error"
|
||||
}
|
||||
}
|
||||
|
||||
// NewProxyManager creates a new proxy manager instance
|
||||
@@ -38,9 +132,77 @@ func NewProxyManager(tnet *netstack.Net) *ProxyManager {
|
||||
udpTargets: make(map[string]map[int]string),
|
||||
listeners: make([]*gonet.TCPListener, 0),
|
||||
udpConns: make([]*gonet.UDPConn, 0),
|
||||
tunnels: make(map[string]*tunnelEntry),
|
||||
}
|
||||
}
|
||||
|
||||
// SetTunnelID sets the WireGuard peer public key used as tunnel_id label.
|
||||
func (pm *ProxyManager) SetTunnelID(id string) {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
pm.currentTunnelID = id
|
||||
if _, ok := pm.tunnels[id]; !ok {
|
||||
pm.tunnels[id] = &tunnelEntry{}
|
||||
}
|
||||
e := pm.tunnels[id]
|
||||
// include site labels if available
|
||||
site := telemetry.SiteLabelKVs()
|
||||
build := func(base []attribute.KeyValue) attribute.Set {
|
||||
if telemetry.ShouldIncludeTunnelID() {
|
||||
base = append([]attribute.KeyValue{attribute.String("tunnel_id", id)}, base...)
|
||||
}
|
||||
base = append(site, base...)
|
||||
return attribute.NewSet(base...)
|
||||
}
|
||||
e.attrInTCP = build([]attribute.KeyValue{
|
||||
attribute.String("direction", "ingress"),
|
||||
attribute.String("protocol", "tcp"),
|
||||
})
|
||||
e.attrOutTCP = build([]attribute.KeyValue{
|
||||
attribute.String("direction", "egress"),
|
||||
attribute.String("protocol", "tcp"),
|
||||
})
|
||||
e.attrInUDP = build([]attribute.KeyValue{
|
||||
attribute.String("direction", "ingress"),
|
||||
attribute.String("protocol", "udp"),
|
||||
})
|
||||
e.attrOutUDP = build([]attribute.KeyValue{
|
||||
attribute.String("direction", "egress"),
|
||||
attribute.String("protocol", "udp"),
|
||||
})
|
||||
}
|
||||
|
||||
// ClearTunnelID clears cached attribute sets for the current tunnel.
|
||||
func (pm *ProxyManager) ClearTunnelID() {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
id := pm.currentTunnelID
|
||||
if id == "" {
|
||||
return
|
||||
}
|
||||
if e, ok := pm.tunnels[id]; ok {
|
||||
// final flush for this tunnel
|
||||
inTCP := e.bytesInTCP.Swap(0)
|
||||
outTCP := e.bytesOutTCP.Swap(0)
|
||||
inUDP := e.bytesInUDP.Swap(0)
|
||||
outUDP := e.bytesOutUDP.Swap(0)
|
||||
if inTCP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
|
||||
}
|
||||
if outTCP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
|
||||
}
|
||||
if inUDP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
|
||||
}
|
||||
if outUDP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
|
||||
}
|
||||
delete(pm.tunnels, id)
|
||||
}
|
||||
pm.currentTunnelID = ""
|
||||
}
|
||||
|
||||
// init function without tnet
|
||||
func NewProxyManagerWithoutTNet() *ProxyManager {
|
||||
return &ProxyManager{
|
||||
@@ -75,7 +237,7 @@ func (pm *ProxyManager) AddTarget(proto, listenIP string, port int, targetAddr s
|
||||
}
|
||||
pm.udpTargets[listenIP][port] = targetAddr
|
||||
default:
|
||||
return fmt.Errorf("unsupported protocol: %s", proto)
|
||||
return fmt.Errorf(errUnsupportedProtoFmt, proto)
|
||||
}
|
||||
|
||||
if pm.running {
|
||||
@@ -124,13 +286,28 @@ func (pm *ProxyManager) RemoveTarget(proto, listenIP string, port int) error {
|
||||
return fmt.Errorf("target not found: %s:%d", listenIP, port)
|
||||
}
|
||||
default:
|
||||
return fmt.Errorf("unsupported protocol: %s", proto)
|
||||
return fmt.Errorf(errUnsupportedProtoFmt, proto)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// Start begins listening for all configured proxy targets
|
||||
func (pm *ProxyManager) Start() error {
|
||||
// Register proxy observables once per process
|
||||
telemetry.SetProxyObservableCallback(func(ctx context.Context, o metric.Observer) error {
|
||||
pm.mutex.RLock()
|
||||
defer pm.mutex.RUnlock()
|
||||
for _, e := range pm.tunnels {
|
||||
// active connections
|
||||
telemetry.ObserveProxyActiveConnsObs(o, e.activeTCP.Load(), e.attrOutTCP.ToSlice())
|
||||
telemetry.ObserveProxyActiveConnsObs(o, e.activeUDP.Load(), e.attrOutUDP.ToSlice())
|
||||
// backlog bytes (sum of unflushed counters)
|
||||
b := int64(e.bytesInTCP.Load() + e.bytesOutTCP.Load() + e.bytesInUDP.Load() + e.bytesOutUDP.Load())
|
||||
telemetry.ObserveProxyAsyncBacklogObs(o, b, e.attrOutTCP.ToSlice())
|
||||
telemetry.ObserveProxyBufferBytesObs(o, b, e.attrOutTCP.ToSlice())
|
||||
}
|
||||
return nil
|
||||
})
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
|
||||
@@ -160,6 +337,75 @@ func (pm *ProxyManager) Start() error {
|
||||
return nil
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) SetAsyncBytes(b bool) {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
pm.asyncBytes = b
|
||||
if b && pm.flushStop == nil {
|
||||
pm.flushStop = make(chan struct{})
|
||||
go pm.flushLoop()
|
||||
}
|
||||
}
|
||||
func (pm *ProxyManager) flushLoop() {
|
||||
flushInterval := 2 * time.Second
|
||||
if v := os.Getenv("OTEL_METRIC_EXPORT_INTERVAL"); v != "" {
|
||||
if d, err := time.ParseDuration(v); err == nil && d > 0 {
|
||||
if d/2 < flushInterval {
|
||||
flushInterval = d / 2
|
||||
}
|
||||
}
|
||||
}
|
||||
ticker := time.NewTicker(flushInterval)
|
||||
defer ticker.Stop()
|
||||
for {
|
||||
select {
|
||||
case <-ticker.C:
|
||||
pm.mutex.RLock()
|
||||
for _, e := range pm.tunnels {
|
||||
inTCP := e.bytesInTCP.Swap(0)
|
||||
outTCP := e.bytesOutTCP.Swap(0)
|
||||
inUDP := e.bytesInUDP.Swap(0)
|
||||
outUDP := e.bytesOutUDP.Swap(0)
|
||||
if inTCP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
|
||||
}
|
||||
if outTCP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
|
||||
}
|
||||
if inUDP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
|
||||
}
|
||||
if outUDP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
|
||||
}
|
||||
}
|
||||
pm.mutex.RUnlock()
|
||||
case <-pm.flushStop:
|
||||
pm.mutex.RLock()
|
||||
for _, e := range pm.tunnels {
|
||||
inTCP := e.bytesInTCP.Swap(0)
|
||||
outTCP := e.bytesOutTCP.Swap(0)
|
||||
inUDP := e.bytesInUDP.Swap(0)
|
||||
outUDP := e.bytesOutUDP.Swap(0)
|
||||
if inTCP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(inTCP), e.attrInTCP)
|
||||
}
|
||||
if outTCP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(outTCP), e.attrOutTCP)
|
||||
}
|
||||
if inUDP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(inUDP), e.attrInUDP)
|
||||
}
|
||||
if outUDP > 0 {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(outUDP), e.attrOutUDP)
|
||||
}
|
||||
}
|
||||
pm.mutex.RUnlock()
|
||||
return
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) Stop() error {
|
||||
pm.mutex.Lock()
|
||||
defer pm.mutex.Unlock()
|
||||
@@ -227,7 +473,7 @@ func (pm *ProxyManager) startTarget(proto, listenIP string, port int, targetAddr
|
||||
go pm.handleUDPProxy(conn, targetAddr)
|
||||
|
||||
default:
|
||||
return fmt.Errorf("unsupported protocol: %s", proto)
|
||||
return fmt.Errorf(errUnsupportedProtoFmt, proto)
|
||||
}
|
||||
|
||||
logger.Info("Started %s proxy to %s", proto, targetAddr)
|
||||
@@ -236,54 +482,84 @@ func (pm *ProxyManager) startTarget(proto, listenIP string, port int, targetAddr
|
||||
return nil
|
||||
}
|
||||
|
||||
// getEntry returns per-tunnel entry or nil.
|
||||
func (pm *ProxyManager) getEntry(id string) *tunnelEntry {
|
||||
pm.mutex.RLock()
|
||||
e := pm.tunnels[id]
|
||||
pm.mutex.RUnlock()
|
||||
return e
|
||||
}
|
||||
|
||||
func (pm *ProxyManager) handleTCPProxy(listener net.Listener, targetAddr string) {
|
||||
for {
|
||||
conn, err := listener.Accept()
|
||||
if err != nil {
|
||||
// Check if we're shutting down or the listener was closed
|
||||
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "tcp", "failure", classifyProxyError(err))
|
||||
if !pm.running {
|
||||
return
|
||||
}
|
||||
|
||||
// Check for specific network errors that indicate the listener is closed
|
||||
if ne, ok := err.(net.Error); ok && !ne.Temporary() {
|
||||
logger.Info("TCP listener closed, stopping proxy handler for %v", listener.Addr())
|
||||
return
|
||||
}
|
||||
|
||||
logger.Error("Error accepting TCP connection: %v", err)
|
||||
// Don't hammer the CPU if we hit a temporary error
|
||||
time.Sleep(100 * time.Millisecond)
|
||||
continue
|
||||
}
|
||||
|
||||
go func() {
|
||||
tunnelID := pm.currentTunnelID
|
||||
telemetry.IncProxyAccept(context.Background(), tunnelID, "tcp", "success", "")
|
||||
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionOpened)
|
||||
if tunnelID != "" {
|
||||
state.Global().IncSessions(tunnelID)
|
||||
if e := pm.getEntry(tunnelID); e != nil {
|
||||
e.activeTCP.Add(1)
|
||||
}
|
||||
}
|
||||
|
||||
go func(tunnelID string, accepted net.Conn) {
|
||||
connStart := time.Now()
|
||||
target, err := net.Dial("tcp", targetAddr)
|
||||
if err != nil {
|
||||
logger.Error("Error connecting to target: %v", err)
|
||||
conn.Close()
|
||||
accepted.Close()
|
||||
telemetry.IncProxyAccept(context.Background(), tunnelID, "tcp", "failure", classifyProxyError(err))
|
||||
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionClosed)
|
||||
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "tcp", "failure", time.Since(connStart).Seconds())
|
||||
return
|
||||
}
|
||||
|
||||
// Create a WaitGroup to ensure both copy operations complete
|
||||
entry := pm.getEntry(tunnelID)
|
||||
if entry == nil {
|
||||
entry = &tunnelEntry{}
|
||||
}
|
||||
var wg sync.WaitGroup
|
||||
wg.Add(2)
|
||||
|
||||
go func() {
|
||||
go func(ent *tunnelEntry) {
|
||||
defer wg.Done()
|
||||
io.Copy(target, conn)
|
||||
target.Close()
|
||||
}()
|
||||
cw := &countingWriter{ctx: context.Background(), w: target, set: ent.attrInTCP, pm: pm, ent: ent, out: false, proto: "tcp"}
|
||||
_, _ = io.Copy(cw, accepted)
|
||||
_ = target.Close()
|
||||
}(entry)
|
||||
|
||||
go func() {
|
||||
go func(ent *tunnelEntry) {
|
||||
defer wg.Done()
|
||||
io.Copy(conn, target)
|
||||
conn.Close()
|
||||
}()
|
||||
cw := &countingWriter{ctx: context.Background(), w: accepted, set: ent.attrOutTCP, pm: pm, ent: ent, out: true, proto: "tcp"}
|
||||
_, _ = io.Copy(cw, target)
|
||||
_ = accepted.Close()
|
||||
}(entry)
|
||||
|
||||
// Wait for both copies to complete
|
||||
wg.Wait()
|
||||
}()
|
||||
if tunnelID != "" {
|
||||
state.Global().DecSessions(tunnelID)
|
||||
if e := pm.getEntry(tunnelID); e != nil {
|
||||
e.activeTCP.Add(-1)
|
||||
}
|
||||
}
|
||||
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "tcp", "success", time.Since(connStart).Seconds())
|
||||
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "tcp", telemetry.ProxyConnectionClosed)
|
||||
}(tunnelID, conn)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -326,6 +602,18 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
|
||||
}
|
||||
|
||||
clientKey := remoteAddr.String()
|
||||
// bytes from client -> target (direction=in)
|
||||
if pm.currentTunnelID != "" && n > 0 {
|
||||
if pm.asyncBytes {
|
||||
if e := pm.getEntry(pm.currentTunnelID); e != nil {
|
||||
e.bytesInUDP.Add(uint64(n))
|
||||
}
|
||||
} else {
|
||||
if e := pm.getEntry(pm.currentTunnelID); e != nil {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(n), e.attrInUDP)
|
||||
}
|
||||
}
|
||||
}
|
||||
clientsMutex.RLock()
|
||||
targetConn, exists := clientConns[clientKey]
|
||||
clientsMutex.RUnlock()
|
||||
@@ -334,28 +622,44 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
|
||||
targetUDPAddr, err := net.ResolveUDPAddr("udp", targetAddr)
|
||||
if err != nil {
|
||||
logger.Error("Error resolving target address: %v", err)
|
||||
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "udp", "failure", "resolve")
|
||||
continue
|
||||
}
|
||||
|
||||
targetConn, err = net.DialUDP("udp", nil, targetUDPAddr)
|
||||
if err != nil {
|
||||
logger.Error("Error connecting to target: %v", err)
|
||||
telemetry.IncProxyAccept(context.Background(), pm.currentTunnelID, "udp", "failure", classifyProxyError(err))
|
||||
continue
|
||||
}
|
||||
tunnelID := pm.currentTunnelID
|
||||
telemetry.IncProxyAccept(context.Background(), tunnelID, "udp", "success", "")
|
||||
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "udp", telemetry.ProxyConnectionOpened)
|
||||
// Only increment activeUDP after a successful DialUDP
|
||||
if e := pm.getEntry(tunnelID); e != nil {
|
||||
e.activeUDP.Add(1)
|
||||
}
|
||||
|
||||
clientsMutex.Lock()
|
||||
clientConns[clientKey] = targetConn
|
||||
clientsMutex.Unlock()
|
||||
|
||||
go func(clientKey string, targetConn *net.UDPConn, remoteAddr net.Addr) {
|
||||
go func(clientKey string, targetConn *net.UDPConn, remoteAddr net.Addr, tunnelID string) {
|
||||
start := time.Now()
|
||||
result := "success"
|
||||
defer func() {
|
||||
// Always clean up when this goroutine exits
|
||||
clientsMutex.Lock()
|
||||
if storedConn, exists := clientConns[clientKey]; exists && storedConn == targetConn {
|
||||
delete(clientConns, clientKey)
|
||||
targetConn.Close()
|
||||
if e := pm.getEntry(tunnelID); e != nil {
|
||||
e.activeUDP.Add(-1)
|
||||
}
|
||||
}
|
||||
clientsMutex.Unlock()
|
||||
telemetry.ObserveProxyConnectionDuration(context.Background(), tunnelID, "udp", result, time.Since(start).Seconds())
|
||||
telemetry.IncProxyConnectionEvent(context.Background(), tunnelID, "udp", telemetry.ProxyConnectionClosed)
|
||||
}()
|
||||
|
||||
buffer := make([]byte, 65507)
|
||||
@@ -363,25 +667,52 @@ func (pm *ProxyManager) handleUDPProxy(conn *gonet.UDPConn, targetAddr string) {
|
||||
n, _, err := targetConn.ReadFromUDP(buffer)
|
||||
if err != nil {
|
||||
logger.Error("Error reading from target: %v", err)
|
||||
result = "failure"
|
||||
return // defer will handle cleanup
|
||||
}
|
||||
|
||||
// bytes from target -> client (direction=out)
|
||||
if pm.currentTunnelID != "" && n > 0 {
|
||||
if pm.asyncBytes {
|
||||
if e := pm.getEntry(pm.currentTunnelID); e != nil {
|
||||
e.bytesOutUDP.Add(uint64(n))
|
||||
}
|
||||
} else {
|
||||
if e := pm.getEntry(pm.currentTunnelID); e != nil {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(n), e.attrOutUDP)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
_, err = conn.WriteTo(buffer[:n], remoteAddr)
|
||||
if err != nil {
|
||||
logger.Error("Error writing to client: %v", err)
|
||||
telemetry.IncProxyDrops(context.Background(), pm.currentTunnelID, "udp")
|
||||
result = "failure"
|
||||
return // defer will handle cleanup
|
||||
}
|
||||
}
|
||||
}(clientKey, targetConn, remoteAddr)
|
||||
}(clientKey, targetConn, remoteAddr, tunnelID)
|
||||
}
|
||||
|
||||
_, err = targetConn.Write(buffer[:n])
|
||||
written, err := targetConn.Write(buffer[:n])
|
||||
if err != nil {
|
||||
logger.Error("Error writing to target: %v", err)
|
||||
telemetry.IncProxyDrops(context.Background(), pm.currentTunnelID, "udp")
|
||||
targetConn.Close()
|
||||
clientsMutex.Lock()
|
||||
delete(clientConns, clientKey)
|
||||
clientsMutex.Unlock()
|
||||
} else if pm.currentTunnelID != "" && written > 0 {
|
||||
if pm.asyncBytes {
|
||||
if e := pm.getEntry(pm.currentTunnelID); e != nil {
|
||||
e.bytesInUDP.Add(uint64(written))
|
||||
}
|
||||
} else {
|
||||
if e := pm.getEntry(pm.currentTunnelID); e != nil {
|
||||
telemetry.AddTunnelBytesSet(context.Background(), int64(written), e.attrInUDP)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
12
stub.go
12
stub.go
@@ -8,25 +8,27 @@ import (
|
||||
)
|
||||
|
||||
func setupClientsNative(client *websocket.Client, host string) {
|
||||
return // This function is not implemented for non-Linux systems.
|
||||
_ = client
|
||||
_ = host
|
||||
// No-op for non-Linux systems
|
||||
}
|
||||
|
||||
func closeWgServiceNative() {
|
||||
// No-op for non-Linux systems
|
||||
return
|
||||
}
|
||||
|
||||
func clientsOnConnectNative() {
|
||||
// No-op for non-Linux systems
|
||||
return
|
||||
}
|
||||
|
||||
func clientsHandleNewtConnectionNative(publicKey, endpoint string) {
|
||||
_ = publicKey
|
||||
_ = endpoint
|
||||
// No-op for non-Linux systems
|
||||
return
|
||||
}
|
||||
|
||||
func clientsAddProxyTargetNative(pm *proxy.ProxyManager, tunnelIp string) {
|
||||
_ = pm
|
||||
_ = tunnelIp
|
||||
// No-op for non-Linux systems
|
||||
return
|
||||
}
|
||||
|
||||
73
util.go
73
util.go
@@ -2,6 +2,7 @@ package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"context"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
"encoding/json"
|
||||
@@ -14,6 +15,7 @@ import (
|
||||
|
||||
"math/rand"
|
||||
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
"github.com/fosrl/newt/logger"
|
||||
"github.com/fosrl/newt/proxy"
|
||||
"github.com/fosrl/newt/websocket"
|
||||
@@ -21,8 +23,11 @@ import (
|
||||
"golang.org/x/net/ipv4"
|
||||
"golang.zx2c4.com/wireguard/device"
|
||||
"golang.zx2c4.com/wireguard/tun/netstack"
|
||||
"gopkg.in/yaml.v3"
|
||||
)
|
||||
|
||||
const msgHealthFileWriteFailed = "Failed to write health file: %v"
|
||||
|
||||
func fixKey(key string) string {
|
||||
// Remove any whitespace
|
||||
key = strings.TrimSpace(key)
|
||||
@@ -175,7 +180,7 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
|
||||
if healthFile != "" {
|
||||
err := os.WriteFile(healthFile, []byte("ok"), 0644)
|
||||
if err != nil {
|
||||
logger.Warn("Failed to write health file: %v", err)
|
||||
logger.Warn(msgHealthFileWriteFailed, err)
|
||||
}
|
||||
}
|
||||
return stopChan, nil
|
||||
@@ -216,11 +221,13 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
|
||||
if healthFile != "" {
|
||||
err := os.WriteFile(healthFile, []byte("ok"), 0644)
|
||||
if err != nil {
|
||||
logger.Warn("Failed to write health file: %v", err)
|
||||
logger.Warn(msgHealthFileWriteFailed, err)
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
case <-pingStopChan:
|
||||
// Stop the goroutine when signaled
|
||||
return
|
||||
}
|
||||
}
|
||||
}()
|
||||
@@ -229,7 +236,7 @@ func pingWithRetry(tnet *netstack.Net, dst string, timeout time.Duration) (stopC
|
||||
return stopChan, fmt.Errorf("initial ping attempts failed, continuing in background")
|
||||
}
|
||||
|
||||
func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Client) chan struct{} {
|
||||
func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Client, tunnelID string) chan struct{} {
|
||||
maxInterval := 6 * time.Second
|
||||
currentInterval := pingInterval
|
||||
consecutiveFailures := 0
|
||||
@@ -292,6 +299,9 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
|
||||
if !connectionLost {
|
||||
connectionLost = true
|
||||
logger.Warn("Connection to server lost after %d failures. Continuous reconnection attempts will be made.", consecutiveFailures)
|
||||
if tunnelID != "" {
|
||||
telemetry.IncReconnect(context.Background(), tunnelID, "client", telemetry.ReasonTimeout)
|
||||
}
|
||||
stopFunc = client.SendMessageInterval("newt/ping/request", map[string]interface{}{}, 3*time.Second)
|
||||
// Send registration message to the server for backward compatibility
|
||||
err := client.SendMessage("newt/wg/register", map[string]interface{}{
|
||||
@@ -318,6 +328,10 @@ func startPingCheck(tnet *netstack.Net, serverIP string, client *websocket.Clien
|
||||
} else {
|
||||
// Track recent latencies
|
||||
recentLatencies = append(recentLatencies, latency)
|
||||
// Record tunnel latency (limit sampling to this periodic check)
|
||||
if tunnelID != "" {
|
||||
telemetry.ObserveTunnelLatency(context.Background(), tunnelID, "wireguard", latency.Seconds())
|
||||
}
|
||||
if len(recentLatencies) > 10 {
|
||||
recentLatencies = recentLatencies[1:]
|
||||
}
|
||||
@@ -467,7 +481,8 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
|
||||
continue
|
||||
}
|
||||
|
||||
if action == "add" {
|
||||
switch action {
|
||||
case "add":
|
||||
target := parts[1] + ":" + parts[2]
|
||||
|
||||
// Call updown script if provided
|
||||
@@ -493,7 +508,7 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
|
||||
// Add the new target
|
||||
pm.AddTarget(proto, tunnelIP, port, processedTarget)
|
||||
|
||||
} else if action == "remove" {
|
||||
case "remove":
|
||||
logger.Info("Removing target with port %d", port)
|
||||
|
||||
target := parts[1] + ":" + parts[2]
|
||||
@@ -511,6 +526,8 @@ func updateTargets(pm *proxy.ProxyManager, action string, tunnelIP string, proto
|
||||
logger.Error("Failed to remove target: %v", err)
|
||||
return err
|
||||
}
|
||||
default:
|
||||
logger.Info("Unknown action: %s", action)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -558,3 +575,47 @@ func executeUpdownScript(action, proto, target string) (string, error) {
|
||||
|
||||
return target, nil
|
||||
}
|
||||
|
||||
func sendBlueprint(client *websocket.Client) error {
|
||||
if blueprintFile == "" {
|
||||
return nil
|
||||
}
|
||||
// try to read the blueprint file
|
||||
blueprintData, err := os.ReadFile(blueprintFile)
|
||||
if err != nil {
|
||||
logger.Error("Failed to read blueprint file: %v", err)
|
||||
} else {
|
||||
// first we should convert the yaml to json and error if the yaml is bad
|
||||
var yamlObj interface{}
|
||||
var blueprintJsonData string
|
||||
|
||||
err = yaml.Unmarshal(blueprintData, &yamlObj)
|
||||
if err != nil {
|
||||
logger.Error("Failed to parse blueprint YAML: %v", err)
|
||||
} else {
|
||||
// convert to json
|
||||
jsonBytes, err := json.Marshal(yamlObj)
|
||||
if err != nil {
|
||||
logger.Error("Failed to convert blueprint to JSON: %v", err)
|
||||
} else {
|
||||
blueprintJsonData = string(jsonBytes)
|
||||
logger.Debug("Converted blueprint to JSON: %s", blueprintJsonData)
|
||||
}
|
||||
}
|
||||
|
||||
// if we have valid json data, we can send it to the server
|
||||
if blueprintJsonData == "" {
|
||||
logger.Error("No valid blueprint JSON data to send to server")
|
||||
return nil
|
||||
}
|
||||
|
||||
logger.Info("Sending blueprint to server for application")
|
||||
|
||||
// send the blueprint data to the server
|
||||
err = client.SendMessage("newt/blueprint/apply", map[string]interface{}{
|
||||
"blueprint": blueprintJsonData,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -7,6 +7,7 @@ import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"io"
|
||||
"net"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"os"
|
||||
@@ -18,6 +19,11 @@ import (
|
||||
|
||||
"github.com/fosrl/newt/logger"
|
||||
"github.com/gorilla/websocket"
|
||||
|
||||
"context"
|
||||
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
"go.opentelemetry.io/otel"
|
||||
)
|
||||
|
||||
type Client struct {
|
||||
@@ -37,6 +43,9 @@ type Client struct {
|
||||
writeMux sync.Mutex
|
||||
clientType string // Type of client (e.g., "newt", "olm")
|
||||
tlsConfig TLSConfig
|
||||
metricsCtxMu sync.RWMutex
|
||||
metricsCtx context.Context
|
||||
configNeedsSave bool // Flag to track if config needs to be saved
|
||||
}
|
||||
|
||||
type ClientOption func(*Client)
|
||||
@@ -80,6 +89,26 @@ func (c *Client) OnTokenUpdate(callback func(token string)) {
|
||||
c.onTokenUpdate = callback
|
||||
}
|
||||
|
||||
func (c *Client) metricsContext() context.Context {
|
||||
c.metricsCtxMu.RLock()
|
||||
defer c.metricsCtxMu.RUnlock()
|
||||
if c.metricsCtx != nil {
|
||||
return c.metricsCtx
|
||||
}
|
||||
return context.Background()
|
||||
}
|
||||
|
||||
func (c *Client) setMetricsContext(ctx context.Context) {
|
||||
c.metricsCtxMu.Lock()
|
||||
c.metricsCtx = ctx
|
||||
c.metricsCtxMu.Unlock()
|
||||
}
|
||||
|
||||
// MetricsContext exposes the context used for telemetry emission when a connection is active.
|
||||
func (c *Client) MetricsContext() context.Context {
|
||||
return c.metricsContext()
|
||||
}
|
||||
|
||||
// NewClient creates a new websocket client
|
||||
func NewClient(clientType string, ID, secret string, endpoint string, pingInterval time.Duration, pingTimeout time.Duration, opts ...ClientOption) (*Client, error) {
|
||||
config := &Config{
|
||||
@@ -139,6 +168,7 @@ func (c *Client) Close() error {
|
||||
|
||||
// Set connection status to false
|
||||
c.setConnected(false)
|
||||
telemetry.SetWSConnectionState(false)
|
||||
|
||||
// Close the WebSocket connection gracefully
|
||||
if c.conn != nil {
|
||||
@@ -169,7 +199,11 @@ func (c *Client) SendMessage(messageType string, data interface{}) error {
|
||||
|
||||
c.writeMux.Lock()
|
||||
defer c.writeMux.Unlock()
|
||||
return c.conn.WriteJSON(msg)
|
||||
if err := c.conn.WriteJSON(msg); err != nil {
|
||||
return err
|
||||
}
|
||||
telemetry.IncWSMessage(c.metricsContext(), "out", "text")
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) SendMessageInterval(messageType string, data interface{}, interval time.Duration) (stop func()) {
|
||||
@@ -264,8 +298,12 @@ func (c *Client) getToken() (string, error) {
|
||||
return "", fmt.Errorf("failed to marshal token request data: %w", err)
|
||||
}
|
||||
|
||||
ctx, cancel := context.WithTimeout(context.Background(), 30*time.Second)
|
||||
defer cancel()
|
||||
|
||||
// Create a new request
|
||||
req, err := http.NewRequest(
|
||||
req, err := http.NewRequestWithContext(
|
||||
ctx,
|
||||
"POST",
|
||||
baseEndpoint+"/api/v1/auth/"+c.clientType+"/get-token",
|
||||
bytes.NewBuffer(jsonData),
|
||||
@@ -287,6 +325,8 @@ func (c *Client) getToken() (string, error) {
|
||||
}
|
||||
resp, err := client.Do(req)
|
||||
if err != nil {
|
||||
telemetry.IncConnAttempt(ctx, "auth", "failure")
|
||||
telemetry.IncConnError(ctx, "auth", classifyConnError(err))
|
||||
return "", fmt.Errorf("failed to request new token: %w", err)
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
@@ -294,6 +334,16 @@ func (c *Client) getToken() (string, error) {
|
||||
if resp.StatusCode != http.StatusOK {
|
||||
body, _ := io.ReadAll(resp.Body)
|
||||
logger.Error("Failed to get token with status code: %d, body: %s", resp.StatusCode, string(body))
|
||||
telemetry.IncConnAttempt(ctx, "auth", "failure")
|
||||
etype := "io_error"
|
||||
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
|
||||
etype = "auth_failed"
|
||||
}
|
||||
telemetry.IncConnError(ctx, "auth", etype)
|
||||
// Reconnect reason mapping for auth failures
|
||||
if resp.StatusCode == http.StatusUnauthorized || resp.StatusCode == http.StatusForbidden {
|
||||
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonAuthError)
|
||||
}
|
||||
return "", fmt.Errorf("failed to get token with status code: %d, body: %s", resp.StatusCode, string(body))
|
||||
}
|
||||
|
||||
@@ -312,10 +362,55 @@ func (c *Client) getToken() (string, error) {
|
||||
}
|
||||
|
||||
logger.Debug("Received token: %s", tokenResp.Data.Token)
|
||||
telemetry.IncConnAttempt(ctx, "auth", "success")
|
||||
|
||||
return tokenResp.Data.Token, nil
|
||||
}
|
||||
|
||||
// classifyConnError maps to fixed, low-cardinality error_type values.
|
||||
// Allowed enum: dial_timeout, tls_handshake, auth_failed, io_error
|
||||
func classifyConnError(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
msg := strings.ToLower(err.Error())
|
||||
switch {
|
||||
case strings.Contains(msg, "tls") || strings.Contains(msg, "certificate"):
|
||||
return "tls_handshake"
|
||||
case strings.Contains(msg, "timeout") || strings.Contains(msg, "i/o timeout") || strings.Contains(msg, "deadline exceeded"):
|
||||
return "dial_timeout"
|
||||
case strings.Contains(msg, "unauthorized") || strings.Contains(msg, "forbidden"):
|
||||
return "auth_failed"
|
||||
default:
|
||||
// Group remaining network/socket errors as io_error to avoid label explosion
|
||||
return "io_error"
|
||||
}
|
||||
}
|
||||
|
||||
func classifyWSDisconnect(err error) (result, reason string) {
|
||||
if err == nil {
|
||||
return "success", "normal"
|
||||
}
|
||||
if websocket.IsCloseError(err, websocket.CloseNormalClosure) {
|
||||
return "success", "normal"
|
||||
}
|
||||
if ne, ok := err.(net.Error); ok && ne.Timeout() {
|
||||
return "error", "timeout"
|
||||
}
|
||||
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure) {
|
||||
return "error", "unexpected_close"
|
||||
}
|
||||
msg := strings.ToLower(err.Error())
|
||||
switch {
|
||||
case strings.Contains(msg, "eof"):
|
||||
return "error", "eof"
|
||||
case strings.Contains(msg, "reset"):
|
||||
return "error", "connection_reset"
|
||||
default:
|
||||
return "error", "read_error"
|
||||
}
|
||||
}
|
||||
|
||||
func (c *Client) connectWithRetry() {
|
||||
for {
|
||||
select {
|
||||
@@ -334,9 +429,13 @@ func (c *Client) connectWithRetry() {
|
||||
}
|
||||
|
||||
func (c *Client) establishConnection() error {
|
||||
ctx := context.Background()
|
||||
|
||||
// Get token for authentication
|
||||
token, err := c.getToken()
|
||||
if err != nil {
|
||||
telemetry.IncConnAttempt(ctx, "websocket", "failure")
|
||||
telemetry.IncConnError(ctx, "websocket", classifyConnError(err))
|
||||
return fmt.Errorf("failed to get token: %w", err)
|
||||
}
|
||||
|
||||
@@ -369,7 +468,12 @@ func (c *Client) establishConnection() error {
|
||||
q.Set("clientType", c.clientType)
|
||||
u.RawQuery = q.Encode()
|
||||
|
||||
// Connect to WebSocket
|
||||
// Connect to WebSocket (optional span)
|
||||
tr := otel.Tracer("newt")
|
||||
ctx, span := tr.Start(ctx, "ws.connect")
|
||||
defer span.End()
|
||||
|
||||
start := time.Now()
|
||||
dialer := websocket.DefaultDialer
|
||||
|
||||
// Use new TLS configuration method
|
||||
@@ -391,18 +495,42 @@ func (c *Client) establishConnection() error {
|
||||
logger.Debug("WebSocket TLS certificate verification disabled via SKIP_TLS_VERIFY environment variable")
|
||||
}
|
||||
|
||||
conn, _, err := dialer.Dial(u.String(), nil)
|
||||
conn, _, err := dialer.DialContext(ctx, u.String(), nil)
|
||||
lat := time.Since(start).Seconds()
|
||||
if err != nil {
|
||||
telemetry.IncConnAttempt(ctx, "websocket", "failure")
|
||||
etype := classifyConnError(err)
|
||||
telemetry.IncConnError(ctx, "websocket", etype)
|
||||
telemetry.ObserveWSConnectLatency(ctx, lat, "failure", etype)
|
||||
// Map handshake-related errors to reconnect reasons where appropriate
|
||||
if etype == "tls_handshake" {
|
||||
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonHandshakeError)
|
||||
} else if etype == "dial_timeout" {
|
||||
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonTimeout)
|
||||
} else {
|
||||
telemetry.IncReconnect(ctx, c.config.ID, "client", telemetry.ReasonError)
|
||||
}
|
||||
telemetry.IncWSReconnect(ctx, etype)
|
||||
return fmt.Errorf("failed to connect to WebSocket: %w", err)
|
||||
}
|
||||
|
||||
telemetry.IncConnAttempt(ctx, "websocket", "success")
|
||||
telemetry.ObserveWSConnectLatency(ctx, lat, "success", "")
|
||||
c.conn = conn
|
||||
c.setConnected(true)
|
||||
telemetry.SetWSConnectionState(true)
|
||||
c.setMetricsContext(ctx)
|
||||
sessionStart := time.Now()
|
||||
// Wire up pong handler for metrics
|
||||
c.conn.SetPongHandler(func(appData string) error {
|
||||
telemetry.IncWSMessage(c.metricsContext(), "in", "pong")
|
||||
return nil
|
||||
})
|
||||
|
||||
// Start the ping monitor
|
||||
go c.pingMonitor()
|
||||
// Start the read pump with disconnect detection
|
||||
go c.readPumpWithDisconnectDetection()
|
||||
go c.readPumpWithDisconnectDetection(sessionStart)
|
||||
|
||||
if c.onConnect != nil {
|
||||
err := c.saveConfig()
|
||||
@@ -495,6 +623,9 @@ func (c *Client) pingMonitor() {
|
||||
}
|
||||
c.writeMux.Lock()
|
||||
err := c.conn.WriteControl(websocket.PingMessage, []byte{}, time.Now().Add(c.pingTimeout))
|
||||
if err == nil {
|
||||
telemetry.IncWSMessage(c.metricsContext(), "out", "ping")
|
||||
}
|
||||
c.writeMux.Unlock()
|
||||
if err != nil {
|
||||
// Check if we're shutting down before logging error and reconnecting
|
||||
@@ -504,6 +635,8 @@ func (c *Client) pingMonitor() {
|
||||
return
|
||||
default:
|
||||
logger.Error("Ping failed: %v", err)
|
||||
telemetry.IncWSKeepaliveFailure(c.metricsContext(), "ping_write")
|
||||
telemetry.IncWSReconnect(c.metricsContext(), "ping_write")
|
||||
c.reconnect()
|
||||
return
|
||||
}
|
||||
@@ -513,17 +646,26 @@ func (c *Client) pingMonitor() {
|
||||
}
|
||||
|
||||
// readPumpWithDisconnectDetection reads messages and triggers reconnect on error
|
||||
func (c *Client) readPumpWithDisconnectDetection() {
|
||||
func (c *Client) readPumpWithDisconnectDetection(started time.Time) {
|
||||
ctx := c.metricsContext()
|
||||
disconnectReason := "shutdown"
|
||||
disconnectResult := "success"
|
||||
|
||||
defer func() {
|
||||
if c.conn != nil {
|
||||
c.conn.Close()
|
||||
}
|
||||
if !started.IsZero() {
|
||||
telemetry.ObserveWSSessionDuration(ctx, time.Since(started).Seconds(), disconnectResult)
|
||||
}
|
||||
telemetry.IncWSDisconnect(ctx, disconnectReason, disconnectResult)
|
||||
// Only attempt reconnect if we're not shutting down
|
||||
select {
|
||||
case <-c.done:
|
||||
// Shutting down, don't reconnect
|
||||
return
|
||||
default:
|
||||
telemetry.IncWSReconnect(ctx, disconnectReason)
|
||||
c.reconnect()
|
||||
}
|
||||
}()
|
||||
@@ -531,23 +673,33 @@ func (c *Client) readPumpWithDisconnectDetection() {
|
||||
for {
|
||||
select {
|
||||
case <-c.done:
|
||||
disconnectReason = "shutdown"
|
||||
disconnectResult = "success"
|
||||
return
|
||||
default:
|
||||
var msg WSMessage
|
||||
err := c.conn.ReadJSON(&msg)
|
||||
if err == nil {
|
||||
telemetry.IncWSMessage(c.metricsContext(), "in", "text")
|
||||
}
|
||||
if err != nil {
|
||||
// Check if we're shutting down before logging error
|
||||
select {
|
||||
case <-c.done:
|
||||
// Expected during shutdown, don't log as error
|
||||
logger.Debug("WebSocket connection closed during shutdown")
|
||||
disconnectReason = "shutdown"
|
||||
disconnectResult = "success"
|
||||
return
|
||||
default:
|
||||
// Unexpected error during normal operation
|
||||
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure, websocket.CloseNormalClosure) {
|
||||
logger.Error("WebSocket read error: %v", err)
|
||||
} else {
|
||||
logger.Debug("WebSocket connection closed: %v", err)
|
||||
disconnectResult, disconnectReason = classifyWSDisconnect(err)
|
||||
if disconnectResult == "error" {
|
||||
if websocket.IsUnexpectedCloseError(err, websocket.CloseGoingAway, websocket.CloseAbnormalClosure, websocket.CloseNormalClosure) {
|
||||
logger.Error("WebSocket read error: %v", err)
|
||||
} else {
|
||||
logger.Debug("WebSocket connection closed: %v", err)
|
||||
}
|
||||
}
|
||||
return // triggers reconnect via defer
|
||||
}
|
||||
@@ -564,6 +716,7 @@ func (c *Client) readPumpWithDisconnectDetection() {
|
||||
|
||||
func (c *Client) reconnect() {
|
||||
c.setConnected(false)
|
||||
telemetry.SetWSConnectionState(false)
|
||||
if c.conn != nil {
|
||||
c.conn.Close()
|
||||
c.conn = nil
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
|
||||
"github.com/fosrl/newt/logger"
|
||||
)
|
||||
|
||||
func getConfigPath(clientType string) string {
|
||||
@@ -33,14 +35,25 @@ func getConfigPath(clientType string) string {
|
||||
}
|
||||
|
||||
func (c *Client) loadConfig() error {
|
||||
originalConfig := *c.config // Store original config to detect changes
|
||||
configPath := getConfigPath(c.clientType)
|
||||
|
||||
if c.config.ID != "" && c.config.Secret != "" && c.config.Endpoint != "" {
|
||||
logger.Debug("Config already provided, skipping loading from file")
|
||||
// Check if config file exists, if not, we should save it
|
||||
if _, err := os.Stat(configPath); os.IsNotExist(err) {
|
||||
logger.Info("Config file does not exist at %s, will create it", configPath)
|
||||
c.configNeedsSave = true
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
configPath := getConfigPath(c.clientType)
|
||||
logger.Info("Loading config from: %s", configPath)
|
||||
data, err := os.ReadFile(configPath)
|
||||
if err != nil {
|
||||
if os.IsNotExist(err) {
|
||||
logger.Info("Config file does not exist at %s, will create it with provided values", configPath)
|
||||
c.configNeedsSave = true
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
@@ -51,6 +64,12 @@ func (c *Client) loadConfig() error {
|
||||
return err
|
||||
}
|
||||
|
||||
// Track what was loaded from file vs provided by CLI
|
||||
fileHadID := c.config.ID == ""
|
||||
fileHadSecret := c.config.Secret == ""
|
||||
fileHadCert := c.config.TlsClientCert == ""
|
||||
fileHadEndpoint := c.config.Endpoint == ""
|
||||
|
||||
if c.config.ID == "" {
|
||||
c.config.ID = config.ID
|
||||
}
|
||||
@@ -65,14 +84,37 @@ func (c *Client) loadConfig() error {
|
||||
c.baseURL = config.Endpoint
|
||||
}
|
||||
|
||||
// Check if CLI args provided values that override file values
|
||||
if (!fileHadID && originalConfig.ID != "") ||
|
||||
(!fileHadSecret && originalConfig.Secret != "") ||
|
||||
(!fileHadCert && originalConfig.TlsClientCert != "") ||
|
||||
(!fileHadEndpoint && originalConfig.Endpoint != "") {
|
||||
logger.Info("CLI arguments provided, config will be updated")
|
||||
c.configNeedsSave = true
|
||||
}
|
||||
|
||||
logger.Debug("Loaded config from %s", configPath)
|
||||
logger.Debug("Config: %+v", c.config)
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (c *Client) saveConfig() error {
|
||||
if !c.configNeedsSave {
|
||||
logger.Debug("Config has not changed, skipping save")
|
||||
return nil
|
||||
}
|
||||
|
||||
configPath := getConfigPath(c.clientType)
|
||||
data, err := json.MarshalIndent(c.config, "", " ")
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
return os.WriteFile(configPath, data, 0644)
|
||||
|
||||
logger.Info("Saving config to: %s", configPath)
|
||||
err = os.WriteFile(configPath, data, 0644)
|
||||
if err == nil {
|
||||
c.configNeedsSave = false // Reset flag after successful save
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
97
wg/wg.go
97
wg/wg.go
@@ -3,6 +3,7 @@
|
||||
package wg
|
||||
|
||||
import (
|
||||
"context"
|
||||
"encoding/json"
|
||||
"errors"
|
||||
"fmt"
|
||||
@@ -13,16 +14,19 @@ import (
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"math/rand"
|
||||
|
||||
"github.com/fosrl/newt/logger"
|
||||
"github.com/fosrl/newt/network"
|
||||
"github.com/fosrl/newt/websocket"
|
||||
"github.com/vishvananda/netlink"
|
||||
"golang.org/x/crypto/chacha20poly1305"
|
||||
"golang.org/x/crypto/curve25519"
|
||||
"golang.org/x/exp/rand"
|
||||
"golang.zx2c4.com/wireguard/conn"
|
||||
"golang.zx2c4.com/wireguard/wgctrl"
|
||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
)
|
||||
|
||||
type WgConfig struct {
|
||||
@@ -106,7 +110,7 @@ func FindAvailableUDPPort(minPort, maxPort uint16) (uint16, error) {
|
||||
}
|
||||
|
||||
// Fisher-Yates shuffle to randomize the port order
|
||||
rand.Seed(uint64(time.Now().UnixNano()))
|
||||
rand.Seed(time.Now().UnixNano())
|
||||
for i := len(portRange) - 1; i > 0; i-- {
|
||||
j := rand.Intn(i + 1)
|
||||
portRange[i], portRange[j] = portRange[j], portRange[i]
|
||||
@@ -152,6 +156,7 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
|
||||
}
|
||||
|
||||
var key wgtypes.Key
|
||||
var port uint16
|
||||
// if generateAndSaveKeyTo is provided, generate a private key and save it to the file. if the file already exists, load the key from the file
|
||||
key, err = wgtypes.GeneratePrivateKey()
|
||||
if err != nil {
|
||||
@@ -177,40 +182,43 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
|
||||
}
|
||||
}
|
||||
|
||||
service := &WireGuardService{
|
||||
interfaceName: interfaceName,
|
||||
mtu: mtu,
|
||||
client: wsClient,
|
||||
wgClient: wgClient,
|
||||
key: key,
|
||||
keyFilePath: generateAndSaveKeyTo,
|
||||
newtId: newtId,
|
||||
host: host,
|
||||
lastReadings: make(map[string]PeerReading),
|
||||
stopHolepunch: make(chan struct{}),
|
||||
}
|
||||
|
||||
// Get the existing wireguard port (keep this part)
|
||||
device, err := service.wgClient.Device(service.interfaceName)
|
||||
// Get the existing wireguard port
|
||||
device, err := wgClient.Device(interfaceName)
|
||||
if err == nil {
|
||||
service.Port = uint16(device.ListenPort)
|
||||
if service.Port != 0 {
|
||||
logger.Info("WireGuard interface %s already exists with port %d\n", service.interfaceName, service.Port)
|
||||
port = uint16(device.ListenPort)
|
||||
// also set the private key to the existing key
|
||||
key = device.PrivateKey
|
||||
if port != 0 {
|
||||
logger.Info("WireGuard interface %s already exists with port %d\n", interfaceName, port)
|
||||
} else {
|
||||
service.Port, err = FindAvailableUDPPort(49152, 65535)
|
||||
port, err = FindAvailableUDPPort(49152, 65535)
|
||||
if err != nil {
|
||||
fmt.Printf("Error finding available port: %v\n", err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
} else {
|
||||
service.Port, err = FindAvailableUDPPort(49152, 65535)
|
||||
port, err = FindAvailableUDPPort(49152, 65535)
|
||||
if err != nil {
|
||||
fmt.Printf("Error finding available port: %v\n", err)
|
||||
return nil, err
|
||||
}
|
||||
}
|
||||
|
||||
service := &WireGuardService{
|
||||
interfaceName: interfaceName,
|
||||
mtu: mtu,
|
||||
client: wsClient,
|
||||
wgClient: wgClient,
|
||||
key: key,
|
||||
Port: port,
|
||||
keyFilePath: generateAndSaveKeyTo,
|
||||
newtId: newtId,
|
||||
host: host,
|
||||
lastReadings: make(map[string]PeerReading),
|
||||
stopHolepunch: make(chan struct{}),
|
||||
}
|
||||
|
||||
// Register websocket handlers
|
||||
wsClient.RegisterHandler("newt/wg/receive-config", service.handleConfig)
|
||||
wsClient.RegisterHandler("newt/wg/peer/add", service.handleAddPeer)
|
||||
@@ -276,6 +284,15 @@ func (s *WireGuardService) LoadRemoteConfig() error {
|
||||
}
|
||||
|
||||
func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
|
||||
ctx := context.Background()
|
||||
if s.client != nil {
|
||||
ctx = s.client.MetricsContext()
|
||||
}
|
||||
result := "success"
|
||||
defer func() {
|
||||
telemetry.IncConfigReload(ctx, result)
|
||||
}()
|
||||
|
||||
var config WgConfig
|
||||
|
||||
logger.Debug("Received message: %v", msg)
|
||||
@@ -284,11 +301,13 @@ func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
|
||||
jsonData, err := json.Marshal(msg.Data)
|
||||
if err != nil {
|
||||
logger.Info("Error marshaling data: %v", err)
|
||||
result = "failure"
|
||||
return
|
||||
}
|
||||
|
||||
if err := json.Unmarshal(jsonData, &config); err != nil {
|
||||
logger.Info("Error unmarshaling target data: %v", err)
|
||||
result = "failure"
|
||||
return
|
||||
}
|
||||
s.config = config
|
||||
@@ -298,13 +317,29 @@ func (s *WireGuardService) handleConfig(msg websocket.WSMessage) {
|
||||
s.stopGetConfig = nil
|
||||
}
|
||||
|
||||
// Ensure the WireGuard interface and peers are configured
|
||||
if err := s.ensureWireguardInterface(config); err != nil {
|
||||
logger.Error("Failed to ensure WireGuard interface: %v", err)
|
||||
// telemetry: config reload success
|
||||
// Optional reconnect reason mapping: config change
|
||||
if s.serverPubKey != "" {
|
||||
telemetry.IncReconnect(ctx, s.serverPubKey, "client", telemetry.ReasonConfigChange)
|
||||
}
|
||||
|
||||
// Ensure the WireGuard interface and peers are configured
|
||||
start := time.Now()
|
||||
if err := s.ensureWireguardInterface(config); err != nil {
|
||||
logger.Error("Failed to ensure WireGuard interface: %v", err)
|
||||
telemetry.ObserveConfigApply(ctx, "interface", "failure", time.Since(start).Seconds())
|
||||
result = "failure"
|
||||
} else {
|
||||
telemetry.ObserveConfigApply(ctx, "interface", "success", time.Since(start).Seconds())
|
||||
}
|
||||
|
||||
startPeers := time.Now()
|
||||
if err := s.ensureWireguardPeers(config.Peers); err != nil {
|
||||
logger.Error("Failed to ensure WireGuard peers: %v", err)
|
||||
telemetry.ObserveConfigApply(ctx, "peer", "failure", time.Since(startPeers).Seconds())
|
||||
result = "failure"
|
||||
} else {
|
||||
telemetry.ObserveConfigApply(ctx, "peer", "success", time.Since(startPeers).Seconds())
|
||||
}
|
||||
}
|
||||
|
||||
@@ -948,22 +983,30 @@ func (s *WireGuardService) encryptPayload(payload []byte) (interface{}, error) {
|
||||
}
|
||||
|
||||
func (s *WireGuardService) keepSendingUDPHolePunch(host string) {
|
||||
logger.Info("Starting UDP hole punch routine to %s:21820", host)
|
||||
|
||||
// send initial hole punch
|
||||
if err := s.sendUDPHolePunch(host + ":21820"); err != nil {
|
||||
logger.Error("Failed to send initial UDP hole punch: %v", err)
|
||||
logger.Debug("Failed to send initial UDP hole punch: %v", err)
|
||||
}
|
||||
|
||||
ticker := time.NewTicker(3 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
timeout := time.NewTimer(15 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.stopHolepunch:
|
||||
logger.Info("Stopping UDP holepunch")
|
||||
return
|
||||
case <-timeout.C:
|
||||
logger.Info("UDP holepunch routine timed out after 15 seconds")
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := s.sendUDPHolePunch(host + ":21820"); err != nil {
|
||||
logger.Error("Failed to send UDP hole punch: %v", err)
|
||||
logger.Debug("Failed to send UDP hole punch: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
package wgnetstack
|
||||
|
||||
import (
|
||||
"context"
|
||||
"crypto/rand"
|
||||
"encoding/base64"
|
||||
"encoding/hex"
|
||||
@@ -26,6 +27,8 @@ import (
|
||||
"golang.zx2c4.com/wireguard/tun"
|
||||
"golang.zx2c4.com/wireguard/tun/netstack"
|
||||
"golang.zx2c4.com/wireguard/wgctrl/wgtypes"
|
||||
|
||||
"github.com/fosrl/newt/internal/telemetry"
|
||||
)
|
||||
|
||||
type WgConfig struct {
|
||||
@@ -187,6 +190,13 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
|
||||
// Load or generate private key
|
||||
if generateAndSaveKeyTo != "" {
|
||||
if _, err := os.Stat(generateAndSaveKeyTo); os.IsNotExist(err) {
|
||||
// File doesn't exist, save the generated key
|
||||
err = os.WriteFile(generateAndSaveKeyTo, []byte(key.String()), 0600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to save private key: %v", err)
|
||||
}
|
||||
} else {
|
||||
// File exists, read the existing key
|
||||
keyData, err := os.ReadFile(generateAndSaveKeyTo)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to read private key: %v", err)
|
||||
@@ -195,11 +205,6 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse private key: %v", err)
|
||||
}
|
||||
} else {
|
||||
err = os.WriteFile(generateAndSaveKeyTo, []byte(key.String()), 0600)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to save private key: %v", err)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -240,14 +245,20 @@ func NewWireGuardService(interfaceName string, mtu int, generateAndSaveKeyTo str
|
||||
return service, nil
|
||||
}
|
||||
|
||||
// ReportRTT allows reporting native RTTs to telemetry, rate-limited externally.
|
||||
func (s *WireGuardService) ReportRTT(seconds float64) {
|
||||
if s.serverPubKey == "" { return }
|
||||
telemetry.ObserveTunnelLatency(context.Background(), s.serverPubKey, "wireguard", seconds)
|
||||
}
|
||||
|
||||
func (s *WireGuardService) addTcpTarget(msg websocket.WSMessage) {
|
||||
logger.Debug("Received: %+v", msg)
|
||||
|
||||
// if there is no wgData or pm, we can't add targets
|
||||
if s.TunnelIP == "" || s.proxyManager == nil {
|
||||
logger.Info("No tunnel IP or proxy manager available")
|
||||
return
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
targetData, err := parseTargetData(msg.Data)
|
||||
if err != nil {
|
||||
@@ -1074,11 +1085,17 @@ func (s *WireGuardService) keepSendingUDPHolePunch(host string) {
|
||||
ticker := time.NewTicker(3 * time.Second)
|
||||
defer ticker.Stop()
|
||||
|
||||
timeout := time.NewTimer(15 * time.Second)
|
||||
defer timeout.Stop()
|
||||
|
||||
for {
|
||||
select {
|
||||
case <-s.stopHolepunch:
|
||||
logger.Info("Stopping UDP holepunch")
|
||||
return
|
||||
case <-timeout.C:
|
||||
logger.Info("UDP holepunch routine timed out after 15 seconds")
|
||||
return
|
||||
case <-ticker.C:
|
||||
if err := s.sendUDPHolePunch(host + ":21820"); err != nil {
|
||||
logger.Debug("Failed to send UDP hole punch: %v", err)
|
||||
|
||||
@@ -126,7 +126,7 @@ func (s *Server) Stop() {
|
||||
s.conn.Close()
|
||||
}
|
||||
s.isRunning = false
|
||||
logger.Info(s.outputPrefix + "Server stopped")
|
||||
logger.Info("%sServer stopped", s.outputPrefix)
|
||||
}
|
||||
|
||||
// RestartWithNetstack stops the current server and restarts it with netstack
|
||||
@@ -161,7 +161,7 @@ func (s *Server) handleConnections() {
|
||||
// Set read deadline to avoid blocking forever
|
||||
err := s.conn.SetReadDeadline(time.Now().Add(1 * time.Second))
|
||||
if err != nil {
|
||||
logger.Error(s.outputPrefix+"Error setting read deadline: %v", err)
|
||||
logger.Error("%sError setting read deadline: %v", s.outputPrefix, err)
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -187,7 +187,7 @@ func (s *Server) handleConnections() {
|
||||
case <-s.shutdownCh:
|
||||
return // Don't log error if we're shutting down
|
||||
default:
|
||||
logger.Error(s.outputPrefix+"Error reading from UDP: %v", err)
|
||||
logger.Error("%sError reading from UDP: %v", s.outputPrefix, err)
|
||||
}
|
||||
continue
|
||||
}
|
||||
@@ -219,7 +219,7 @@ func (s *Server) handleConnections() {
|
||||
copy(responsePacket[5:13], buffer[5:13])
|
||||
|
||||
// Log response being sent for debugging
|
||||
logger.Debug(s.outputPrefix+"Sending response to %s", addr.String())
|
||||
logger.Debug("%sSending response to %s", s.outputPrefix, addr.String())
|
||||
|
||||
// Send the response packet - handle both regular UDP and netstack UDP
|
||||
if s.useNetstack {
|
||||
@@ -233,9 +233,9 @@ func (s *Server) handleConnections() {
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
logger.Error(s.outputPrefix+"Error sending response: %v", err)
|
||||
logger.Error("%sError sending response: %v", s.outputPrefix, err)
|
||||
} else {
|
||||
logger.Debug(s.outputPrefix + "Response sent successfully")
|
||||
logger.Debug("%sResponse sent successfully", s.outputPrefix)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user