Compare commits

..

12 Commits
v1.4.4 ... dev

Author SHA1 Message Date
Owen
fde70dd15b Update newt 2026-04-27 20:13:20 -07:00
Owen
7bf6da1729 Update newt 2026-04-22 12:22:39 -07:00
Owen
7a8f4ab049 Merge branch 'main' into dev 2026-04-22 12:09:50 -07:00
Owen
334ea156b6 Merge branch 'private-site-ha' into dev 2026-04-21 15:08:11 -07:00
Owen
aa838fec61 Mention the cli 2026-04-19 15:49:08 -07:00
Owen
6eaf8c1475 Basic route selector working 2026-04-13 18:24:11 -07:00
Owen Schwartz
df6a84648b Merge pull request #100 from LaurenceJJones/fix/issue-38-stale-dns-cleanup
feat(DNS): Add static cleanup funcs
2026-04-07 21:26:38 -04:00
Owen
5ef6b21a6e Add CODEOWNERS 2026-04-07 11:34:38 -04:00
Owen
7d83518951 Get peer manager 2026-03-20 17:28:39 -07:00
Owen
964532777a Increase attempts 2026-03-19 17:24:41 -07:00
Laurence
f250702177 feat(DNS): Add static cleanup funcs
To aid CLI in cleaning up configuration we expose static functions that know how to handle each provider and platform linked to https://github.com/fosrl/cli/issues/38
2026-03-12 12:26:03 +00:00
Laurence
8549dc8746 enhance(dns): expose stale cleanup functionality
When the tunnel is forced close an integration may want to manually call cleanup function to fix stale issues without having the knowledge of which configuration to cleanup
2026-02-26 11:30:12 +00:00
19 changed files with 551 additions and 62 deletions

1
.github/CODEOWNERS vendored Normal file
View File

@@ -0,0 +1 @@
* @oschwartz10612 @miloschwartz

View File

@@ -1,4 +1,5 @@
# Olm
Olm is being phased out in favor of the [Pangolin CLI](https://github.com/fosrl/cli) and is only meant for advanced use cases.
Olm is a [WireGuard](https://www.wireguard.com/) tunnel client designed to securely connect your computer to Newt sites running on remote networks.

View File

@@ -14,3 +14,9 @@ func SetupDNSOverride(interfaceName string, proxyIp netip.Addr) error {
func RestoreDNSOverride() error {
return nil
}
// CleanupStaleState is a no-op on Android as DNS configuration is handled by the VpnService API
func CleanupStaleState(interfaceName string) error {
_ = interfaceName
return nil
}

View File

@@ -61,3 +61,20 @@ func RestoreDNSOverride() error {
logger.Info("DNS configuration restored successfully")
return nil
}
// CleanupStaleState removes any stale DNS configuration left over from a previous
// unclean shutdown (e.g., system crash, power loss while tunnel was active).
// This function should be called early during startup, before any network operations,
// to ensure DNS is working properly.
//
// On macOS, this cleans up any scutil DNS keys that were created but not removed.
func CleanupStaleState(interfaceName string) error {
_ = interfaceName
if err := platform.CleanupStaleDarwinDNS(); err != nil {
logger.Warn("Failed to cleanup stale Darwin DNS config: %v", err)
return fmt.Errorf("Darwin DNS cleanup: %w", err)
}
logger.Info("Stale DNS state cleanup completed successfully")
return nil
}

View File

@@ -13,3 +13,9 @@ func SetupDNSOverride(interfaceName string, proxyIp netip.Addr) error {
func RestoreDNSOverride() error {
return nil
}
// CleanupStaleState is a no-op on iOS as DNS configuration is handled by the system
func CleanupStaleState(interfaceName string) error {
_ = interfaceName
return nil
}

View File

@@ -98,3 +98,49 @@ func RestoreDNSOverride() error {
logger.Info("DNS configuration restored successfully")
return nil
}
// CleanupStaleState removes any stale DNS configuration left over from a previous
// unclean shutdown (e.g., system crash, power loss while tunnel was active).
// This function should be called early during startup, before any network operations,
// to ensure DNS is working properly.
//
// It checks and cleans up stale state from all supported DNS managers:
// - NetworkManager: removes /etc/NetworkManager/conf.d/olm-dns.conf
// - resolvconf: removes entry for the provided interface
// - File-based: restores /etc/resolv.conf from backup if it exists
//
// This is safe to call even if no stale state exists.
func CleanupStaleState(interfaceName string) error {
var errs []error
// Clean up NetworkManager stale config
if err := platform.CleanupStaleNetworkManagerDNS(); err != nil {
logger.Warn("Failed to cleanup stale NetworkManager DNS config: %v", err)
errs = append(errs, fmt.Errorf("NetworkManager cleanup: %w", err))
} else {
logger.Debug("NetworkManager DNS cleanup completed")
}
// Clean up resolvconf stale entries for the provided interface
if err := platform.CleanupStaleResolvconfDNS(interfaceName); err != nil {
logger.Warn("Failed to cleanup stale resolvconf DNS config: %v", err)
errs = append(errs, fmt.Errorf("resolvconf cleanup: %w", err))
} else {
logger.Debug("resolvconf DNS cleanup completed")
}
// Clean up file-based stale backup
if err := platform.CleanupStaleFileDNS(); err != nil {
logger.Warn("Failed to cleanup stale file-based DNS config: %v", err)
errs = append(errs, fmt.Errorf("file DNS cleanup: %w", err))
} else {
logger.Debug("File-based DNS cleanup completed")
}
if len(errs) > 0 {
return fmt.Errorf("some DNS cleanup operations failed: %v", errs)
}
logger.Info("Stale DNS state cleanup completed successfully")
return nil
}

View File

@@ -61,3 +61,19 @@ func RestoreDNSOverride() error {
logger.Info("DNS configuration restored successfully")
return nil
}
// CleanupStaleState removes any stale DNS configuration left over from a previous
// unclean shutdown (e.g., system crash, power loss while tunnel was active).
// This function should be called early during startup, before any network operations,
// to ensure DNS is working properly.
//
// On Windows, DNS configuration is tied to the interface GUID. When the WireGuard
// interface is recreated, it gets a new GUID, so there's no stale state to clean up.
func CleanupStaleState(interfaceName string) error {
// Windows DNS configuration via registry is interface-specific.
// When the WireGuard interface is recreated, it gets a new GUID,
// so there's no leftover state to clean up from previous sessions.
_ = interfaceName
logger.Debug("Windows DNS cleanup: no stale state to clean (interface-specific)")
return nil
}

View File

@@ -417,3 +417,59 @@ func (d *DarwinDNSConfigurator) clearState() error {
logger.Debug("Cleared DNS state file")
return nil
}
// CleanupStaleDarwinDNS removes any stale DNS configuration left by the Darwin
// configurator from a previous unclean shutdown. This is a static function that can be
// called without creating a configurator instance, useful for cleanup before network operations.
func CleanupStaleDarwinDNS() error {
stateFilePath := getDNSStateFilePath()
// Check if state file exists
data, err := os.ReadFile(stateFilePath)
if err != nil {
if os.IsNotExist(err) {
// No state file, nothing to clean up
return nil
}
return fmt.Errorf("read state file: %w", err)
}
var state DNSPersistentState
if err := json.Unmarshal(data, &state); err != nil {
// Invalid state file, remove it
os.Remove(stateFilePath)
return nil
}
if len(state.CreatedKeys) == 0 {
// No keys to clean up
return nil
}
logger.Info("Found DNS state from previous session, cleaning up %d keys", len(state.CreatedKeys))
// Remove all keys from previous session using scutil directly
for _, key := range state.CreatedKeys {
logger.Debug("Removing leftover DNS key: %s", key)
cmd := fmt.Sprintf("open\nremove %s\nquit\n", key)
scutilCmd := exec.Command(scutilPath)
scutilCmd.Stdin = strings.NewReader(cmd)
if err := scutilCmd.Run(); err != nil {
logger.Warn("Failed to remove DNS key %s: %v", key, err)
}
}
// Clear state file
if err := os.Remove(stateFilePath); err != nil && !os.IsNotExist(err) {
logger.Warn("Failed to clear DNS state file: %v", err)
}
// Flush DNS cache after cleanup
cacheCmd := exec.Command(dscacheutilPath, "-flushcache")
_ = cacheCmd.Run()
killCmd := exec.Command("killall", "-HUP", "mDNSResponder")
_ = killCmd.Run()
return nil
}

View File

@@ -218,3 +218,27 @@ func copyFile(src, dst string) error {
return nil
}
// CleanupStaleFileDNS removes any stale DNS configuration left by the file-based
// configurator from a previous unclean shutdown. This is a static function that can be
// called without creating a configurator instance, useful for cleanup before network operations.
func CleanupStaleFileDNS() error {
// Check if backup file exists from a previous session
if _, err := os.Stat(resolvConfBackupPath); os.IsNotExist(err) {
// No backup file, nothing to clean up
return nil
}
// A backup exists, which means we crashed while DNS was configured
// Restore the original resolv.conf
if err := copyFile(resolvConfBackupPath, resolvConfPath); err != nil {
return fmt.Errorf("restore from backup during cleanup: %w", err)
}
// Remove backup file
if err := os.Remove(resolvConfBackupPath); err != nil {
return fmt.Errorf("remove backup file during cleanup: %w", err)
}
return nil
}

View File

@@ -323,3 +323,41 @@ func GetNetworkManagerVersion() (string, error) {
return version, nil
}
// CleanupStaleNetworkManagerDNS removes any stale DNS configuration left by NetworkManager
// configurator from a previous unclean shutdown. This is a static function that can be called
// without creating a configurator instance, useful for cleanup before network operations.
func CleanupStaleNetworkManagerDNS() error {
confPath := networkManagerConfDir + "/" + networkManagerDNSConfFile
// Check if our config file exists from a previous session
if _, err := os.Stat(confPath); os.IsNotExist(err) {
// No config file, nothing to clean up
return nil
}
// Remove the stale configuration file
if err := os.Remove(confPath); err != nil && !os.IsNotExist(err) {
return fmt.Errorf("remove stale DNS config file: %w", err)
}
// Try to reload NetworkManager if it's available
if IsNetworkManagerAvailable() {
conn, err := dbus.SystemBus()
if err != nil {
return fmt.Errorf("connect to system bus for reload: %w", err)
}
defer conn.Close()
obj := conn.Object(networkManagerDest, networkManagerDbusObjectNode)
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
defer cancel()
if err := obj.CallWithContext(ctx, networkManagerDest+".Reload", 0, uint32(0)).Store(); err != nil {
return fmt.Errorf("reload NetworkManager after cleanup: %w", err)
}
}
return nil
}

View File

@@ -219,3 +219,37 @@ func IsResolvconfAvailable() bool {
cmd := exec.Command(resolvconfCommand, "--version")
return cmd.Run() == nil
}
// CleanupStaleResolvconfDNS removes any stale DNS configuration left by the resolvconf
// configurator from a previous unclean shutdown. This is a static function that can be
// called without creating a configurator instance, useful for cleanup before network operations.
// The interfaceName parameter specifies which interface entry to clean up (typically "olm").
func CleanupStaleResolvconfDNS(interfaceName string) error {
if !IsResolvconfAvailable() {
// resolvconf not available, nothing to clean up
return nil
}
// Detect resolvconf implementation type
implType, err := detectResolvconfType()
if err != nil {
// Can't detect type, try default
implType = "resolvconf"
}
// Try to delete any existing entry for this interface
// This is idempotent - if no entry exists, resolvconf will just return success
var cmd *exec.Cmd
switch implType {
case "openresolv":
cmd = exec.Command(resolvconfCommand, "-f", "-d", interfaceName)
default:
cmd = exec.Command(resolvconfCommand, "-d", interfaceName)
}
// Ignore errors - the entry may not exist, which is fine
_ = cmd.Run()
return nil
}

14
go.mod
View File

@@ -4,11 +4,11 @@ go 1.25.0
require (
github.com/Microsoft/go-winio v0.6.2
github.com/fosrl/newt v1.10.3
github.com/fosrl/newt v1.12.0
github.com/godbus/dbus/v5 v5.2.2
github.com/gorilla/websocket v1.5.3
github.com/miekg/dns v1.1.70
golang.org/x/sys v0.41.0
golang.org/x/sys v0.42.0
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb
golang.zx2c4.com/wireguard/wgctrl v0.0.0-20241231184526-a9ab2273dd10
gvisor.dev/gvisor v0.0.0-20250503011706-39ed1f5ac29c
@@ -20,13 +20,13 @@ require (
github.com/google/go-cmp v0.7.0 // indirect
github.com/vishvananda/netlink v1.3.1 // indirect
github.com/vishvananda/netns v0.0.5 // indirect
golang.org/x/crypto v0.48.0 // indirect
golang.org/x/crypto v0.49.0 // indirect
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 // indirect
golang.org/x/mod v0.32.0 // indirect
golang.org/x/net v0.51.0 // indirect
golang.org/x/sync v0.19.0 // indirect
golang.org/x/mod v0.33.0 // indirect
golang.org/x/net v0.52.0 // indirect
golang.org/x/sync v0.20.0 // indirect
golang.org/x/time v0.12.0 // indirect
golang.org/x/tools v0.41.0 // indirect
golang.org/x/tools v0.42.0 // indirect
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 // indirect
golang.zx2c4.com/wireguard/windows v0.5.3 // indirect
)

28
go.sum
View File

@@ -1,7 +1,7 @@
github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY=
github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU=
github.com/fosrl/newt v1.10.3 h1:JO9gFK9LP/w2EeDIn4wU+jKggAFPo06hX5hxFSETqcw=
github.com/fosrl/newt v1.10.3/go.mod h1:iYuuCAG7iabheiogMOX87r61uQN31S39nKxMKRuLS+s=
github.com/fosrl/newt v1.12.0 h1:IodzVlsprOYkHvKrXwDfDTh2ZMtXV6IG1rhUj6Jhd44=
github.com/fosrl/newt v1.12.0/go.mod h1:IJW2sZ4WKKLRuxMz6oBm8PMyAEVkOxZk6d1OUV5/LPM=
github.com/godbus/dbus/v5 v5.2.2 h1:TUR3TgtSVDmjiXOgAAyaZbYmIeP3DPkld3jgKGV8mXQ=
github.com/godbus/dbus/v5 v5.2.2/go.mod h1:3AAv2+hPq5rdnr5txxxRwiGjPXamgoIHgz9FPBfOp3c=
github.com/google/btree v1.1.3 h1:CVpQJjYgC4VbzxeGVHfvZrv1ctoYCAI8vbl07Fcxlyg=
@@ -16,24 +16,24 @@ github.com/vishvananda/netlink v1.3.1 h1:3AEMt62VKqz90r0tmNhog0r/PpWKmrEShJU0wJW
github.com/vishvananda/netlink v1.3.1/go.mod h1:ARtKouGSTGchR8aMwmkzC0qiNPrrWO5JS/XMVl45+b4=
github.com/vishvananda/netns v0.0.5 h1:DfiHV+j8bA32MFM7bfEunvT8IAqQ/NzSJHtcmW5zdEY=
github.com/vishvananda/netns v0.0.5/go.mod h1:SpkAiCQRtJ6TvvxPnOSyH3BMl6unz3xZlaprSwhNNJM=
golang.org/x/crypto v0.48.0 h1:/VRzVqiRSggnhY7gNRxPauEQ5Drw9haKdM0jqfcCFts=
golang.org/x/crypto v0.48.0/go.mod h1:r0kV5h3qnFPlQnBSrULhlsRfryS2pmewsg+XfMgkVos=
golang.org/x/crypto v0.49.0 h1:+Ng2ULVvLHnJ/ZFEq4KdcDd/cfjrrjjNSXNzxg0Y4U4=
golang.org/x/crypto v0.49.0/go.mod h1:ErX4dUh2UM+CFYiXZRTcMpEcN8b/1gxEuv3nODoYtCA=
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6 h1:zfMcR1Cs4KNuomFFgGefv5N0czO2XZpUbxGUy8i8ug0=
golang.org/x/exp v0.0.0-20251113190631-e25ba8c21ef6/go.mod h1:46edojNIoXTNOhySWIWdix628clX9ODXwPsQuG6hsK0=
golang.org/x/mod v0.32.0 h1:9F4d3PHLljb6x//jOyokMv3eX+YDeepZSEo3mFJy93c=
golang.org/x/mod v0.32.0/go.mod h1:SgipZ/3h2Ci89DlEtEXWUk/HteuRin+HHhN+WbNhguU=
golang.org/x/net v0.51.0 h1:94R/GTO7mt3/4wIKpcR5gkGmRLOuE/2hNGeWq/GBIFo=
golang.org/x/net v0.51.0/go.mod h1:aamm+2QF5ogm02fjy5Bb7CQ0WMt1/WVM7FtyaTLlA9Y=
golang.org/x/sync v0.19.0 h1:vV+1eWNmZ5geRlYjzm2adRgW2/mcpevXNg50YZtPCE4=
golang.org/x/sync v0.19.0/go.mod h1:9KTHXmSnoGruLpwFjVSX0lNNA75CykiMECbovNTZqGI=
golang.org/x/mod v0.33.0 h1:tHFzIWbBifEmbwtGz65eaWyGiGZatSrT9prnU8DbVL8=
golang.org/x/mod v0.33.0/go.mod h1:swjeQEj+6r7fODbD2cqrnje9PnziFuw4bmLbBZFrQ5w=
golang.org/x/net v0.52.0 h1:He/TN1l0e4mmR3QqHMT2Xab3Aj3L9qjbhRm78/6jrW0=
golang.org/x/net v0.52.0/go.mod h1:R1MAz7uMZxVMualyPXb+VaqGSa3LIaUqk0eEt3w36Sw=
golang.org/x/sync v0.20.0 h1:e0PTpb7pjO8GAtTs2dQ6jYa5BWYlMuX047Dco/pItO4=
golang.org/x/sync v0.20.0/go.mod h1:9xrNwdLfx4jkKbNva9FpL6vEN7evnE43NNNJQ2LF3+0=
golang.org/x/sys v0.2.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.10.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.41.0 h1:Ivj+2Cp/ylzLiEU89QhWblYnOE9zerudt9Ftecq2C6k=
golang.org/x/sys v0.41.0/go.mod h1:OgkHotnGiDImocRcuBABYBEXf8A9a87e/uXjp9XT3ks=
golang.org/x/sys v0.42.0 h1:omrd2nAlyT5ESRdCLYdm3+fMfNFE/+Rf4bDIQImRJeo=
golang.org/x/sys v0.42.0/go.mod h1:4GL1E5IUh+htKOUEOaiffhrAeqysfVGipDYzABqnCmw=
golang.org/x/time v0.12.0 h1:ScB/8o8olJvc+CQPWrK3fPZNfh7qgwCrY0zJmoEQLSE=
golang.org/x/time v0.12.0/go.mod h1:CDIdPxbZBQxdj6cxyCIdrNogrJKMJ7pr37NYpMcMDSg=
golang.org/x/tools v0.41.0 h1:a9b8iMweWG+S0OBnlU36rzLp20z1Rp10w+IY2czHTQc=
golang.org/x/tools v0.41.0/go.mod h1:XSY6eDqxVNiYgezAVqqCeihT4j1U2CCsqvH3WhQpnlg=
golang.org/x/tools v0.42.0 h1:uNgphsn75Tdz5Ji2q36v/nsFSfR/9BRFvqhGBaJGd5k=
golang.org/x/tools v0.42.0/go.mod h1:Ma6lCIwGZvHK6XtgbswSoWroEkhugApmsXyrUmBhfr0=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2 h1:B82qJJgjvYKsXS9jeunTOisW56dUokqW/FOteYJJ/yg=
golang.zx2c4.com/wintun v0.0.0-20230126152724-0fa3db229ce2/go.mod h1:deeaetjYA+DHMHg+sMSMI58GrEteJUUzzw7en6TJQcI=
golang.zx2c4.com/wireguard v0.0.0-20250521234502-f333402bd9cb h1:whnFRlWMcXI9d+ZbWg+4sHnLp52d5yiIPUxMBSt4X9A=

View File

@@ -205,12 +205,13 @@ func (o *Olm) handleConnect(msg websocket.WSMessage) {
// Register JIT handler: when the DNS proxy resolves a local record, check whether
// the owning site is already connected and, if not, initiate a JIT connection.
o.dnsProxy.SetJITHandler(func(siteId int) {
if o.peerManager == nil || o.websocket == nil {
pm := o.getPeerManager()
if pm == nil || o.websocket == nil {
return
}
// Site already has an active peer connection - nothing to do.
if _, exists := o.peerManager.GetPeer(siteId); exists {
if _, exists := pm.GetPeer(siteId); exists {
return
}

View File

@@ -32,21 +32,27 @@ func (o *Olm) handleWgPeerAddData(msg websocket.WSMessage) {
return
}
if _, exists := o.peerManager.GetPeer(addSubnetsData.SiteId); !exists {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring add-remote-subnets-aliases message: peerManager is nil (shutdown in progress)")
return
}
if _, exists := pm.GetPeer(addSubnetsData.SiteId); !exists {
logger.Debug("Peer %d not found for removing remote subnets and aliases", addSubnetsData.SiteId)
return
}
// Add new subnets
for _, subnet := range addSubnetsData.RemoteSubnets {
if err := o.peerManager.AddRemoteSubnet(addSubnetsData.SiteId, subnet); err != nil {
if err := pm.AddRemoteSubnet(addSubnetsData.SiteId, subnet); err != nil {
logger.Error("Failed to add allowed IP %s: %v", subnet, err)
}
}
// Add new aliases
for _, alias := range addSubnetsData.Aliases {
if err := o.peerManager.AddAlias(addSubnetsData.SiteId, alias); err != nil {
if err := pm.AddAlias(addSubnetsData.SiteId, alias); err != nil {
logger.Error("Failed to add alias %s: %v", alias.Alias, err)
}
}
@@ -73,21 +79,27 @@ func (o *Olm) handleWgPeerRemoveData(msg websocket.WSMessage) {
return
}
if _, exists := o.peerManager.GetPeer(removeSubnetsData.SiteId); !exists {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring remove-remote-subnets-aliases message: peerManager is nil (shutdown in progress)")
return
}
if _, exists := pm.GetPeer(removeSubnetsData.SiteId); !exists {
logger.Debug("Peer %d not found for removing remote subnets and aliases", removeSubnetsData.SiteId)
return
}
// Remove subnets
for _, subnet := range removeSubnetsData.RemoteSubnets {
if err := o.peerManager.RemoveRemoteSubnet(removeSubnetsData.SiteId, subnet); err != nil {
if err := pm.RemoveRemoteSubnet(removeSubnetsData.SiteId, subnet); err != nil {
logger.Error("Failed to remove allowed IP %s: %v", subnet, err)
}
}
// Remove aliases
for _, alias := range removeSubnetsData.Aliases {
if err := o.peerManager.RemoveAlias(removeSubnetsData.SiteId, alias.Alias); err != nil {
if err := pm.RemoveAlias(removeSubnetsData.SiteId, alias.Alias); err != nil {
logger.Error("Failed to remove alias %s: %v", alias.Alias, err)
}
}
@@ -114,7 +126,13 @@ func (o *Olm) handleWgPeerUpdateData(msg websocket.WSMessage) {
return
}
if _, exists := o.peerManager.GetPeer(updateSubnetsData.SiteId); !exists {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring update-remote-subnets-aliases message: peerManager is nil (shutdown in progress)")
return
}
if _, exists := pm.GetPeer(updateSubnetsData.SiteId); !exists {
logger.Debug("Peer %d not found for updating remote subnets and aliases", updateSubnetsData.SiteId)
return
}
@@ -123,14 +141,14 @@ func (o *Olm) handleWgPeerUpdateData(msg websocket.WSMessage) {
// This ensures that if an old and new subnet are the same on different peers,
// the route won't be temporarily removed
for _, subnet := range updateSubnetsData.NewRemoteSubnets {
if err := o.peerManager.AddRemoteSubnet(updateSubnetsData.SiteId, subnet); err != nil {
if err := pm.AddRemoteSubnet(updateSubnetsData.SiteId, subnet); err != nil {
logger.Error("Failed to add allowed IP %s: %v", subnet, err)
}
}
// Remove old subnets after new ones are added
for _, subnet := range updateSubnetsData.OldRemoteSubnets {
if err := o.peerManager.RemoveRemoteSubnet(updateSubnetsData.SiteId, subnet); err != nil {
if err := pm.RemoveRemoteSubnet(updateSubnetsData.SiteId, subnet); err != nil {
logger.Error("Failed to remove allowed IP %s: %v", subnet, err)
}
}
@@ -139,14 +157,14 @@ func (o *Olm) handleWgPeerUpdateData(msg websocket.WSMessage) {
// This ensures that if an old and new alias share the same IP, the IP won't be
// temporarily removed from the allowed IPs list
for _, alias := range updateSubnetsData.NewAliases {
if err := o.peerManager.AddAlias(updateSubnetsData.SiteId, alias); err != nil {
if err := pm.AddAlias(updateSubnetsData.SiteId, alias); err != nil {
logger.Error("Failed to add alias %s: %v", alias.Alias, err)
}
}
// Remove old aliases after new ones are added
for _, alias := range updateSubnetsData.OldAliases {
if err := o.peerManager.RemoveAlias(updateSubnetsData.SiteId, alias.Alias); err != nil {
if err := pm.RemoveAlias(updateSubnetsData.SiteId, alias.Alias); err != nil {
logger.Error("Failed to remove alias %s: %v", alias.Alias, err)
}
}
@@ -163,7 +181,8 @@ func (o *Olm) handleSync(msg websocket.WSMessage) {
return
}
if o.peerManager == nil {
pm := o.getPeerManager()
if pm == nil {
logger.Warn("Peer manager not initialized, ignoring sync request")
return
}
@@ -190,7 +209,7 @@ func (o *Olm) handleSync(msg websocket.WSMessage) {
}
// Get all current peers
currentPeers := o.peerManager.GetAllPeers()
currentPeers := pm.GetAllPeers()
currentPeerMap := make(map[int]peers.SiteConfig)
for _, peer := range currentPeers {
currentPeerMap[peer.SiteId] = peer
@@ -200,7 +219,7 @@ func (o *Olm) handleSync(msg websocket.WSMessage) {
for siteId := range currentPeerMap {
if _, exists := expectedPeers[siteId]; !exists {
logger.Info("Sync: Removing peer for site %d (no longer in expected config)", siteId)
if err := o.peerManager.RemovePeer(siteId); err != nil {
if err := pm.RemovePeer(siteId); err != nil {
logger.Error("Sync: Failed to remove peer %d: %v", siteId, err)
} else {
// Remove any exit nodes associated with this peer from hole punching
@@ -301,7 +320,7 @@ func (o *Olm) handleSync(msg websocket.WSMessage) {
siteConfig.Aliases = expectedSite.Aliases
}
if err := o.peerManager.UpdatePeer(siteConfig); err != nil {
if err := pm.UpdatePeer(siteConfig); err != nil {
logger.Error("Sync: Failed to update peer %d: %v", siteId, err)
} else {
// If the endpoint changed, trigger holepunch to refresh NAT mappings

View File

@@ -47,6 +47,7 @@ type Olm struct {
websocket *websocket.Client
holePunchManager *holepunch.Manager
peerManager *peers.PeerManager
peerManagerMu sync.RWMutex
// Power mode management
currentPowerMode string
powerModeMu sync.Mutex
@@ -76,6 +77,15 @@ type Olm struct {
tunnelWg sync.WaitGroup
}
// getPeerManager safely returns the current peerManager under a read-lock.
// Callers must check the returned value for nil before using it.
func (o *Olm) getPeerManager() *peers.PeerManager {
o.peerManagerMu.RLock()
pm := o.peerManager
o.peerManagerMu.RUnlock()
return pm
}
// initTunnelInfo creates the shared UDP socket and holepunch manager.
// This is used during initial tunnel setup and when switching organizations.
func (o *Olm) initTunnelInfo(clientID string) error {
@@ -457,7 +467,7 @@ func (o *Olm) StartTunnel(config TunnelConfig) {
"userToken": userToken,
"fingerprint": o.fingerprint,
"postures": o.postures,
}, 2*time.Second, 10)
}, 2*time.Second, 20)
// Invoke onRegistered callback if configured
if o.olmConfig.OnRegistered != nil {
@@ -591,10 +601,12 @@ func (o *Olm) Close() {
}
// Close() also calls Stop() internally
o.peerManagerMu.Lock()
if o.peerManager != nil {
o.peerManager.Close()
o.peerManager = nil
}
o.peerManagerMu.Unlock()
if o.uapiListener != nil {
_ = o.uapiListener.Close()
@@ -806,14 +818,14 @@ func (o *Olm) SetPowerMode(mode string) error {
lowPowerInterval := 10 * time.Minute
if o.peerManager != nil {
peerMonitor := o.peerManager.GetPeerMonitor()
if pm := o.getPeerManager(); pm != nil {
peerMonitor := pm.GetPeerMonitor()
if peerMonitor != nil {
peerMonitor.SetPeerInterval(lowPowerInterval, lowPowerInterval)
peerMonitor.SetPeerHolepunchInterval(lowPowerInterval, lowPowerInterval)
logger.Info("Set monitoring intervals to 10 minutes for low power mode")
}
o.peerManager.UpdateAllPeersPersistentKeepalive(0) // disable
pm.UpdateAllPeersPersistentKeepalive(0) // disable
}
if o.holePunchManager != nil {
@@ -858,14 +870,14 @@ func (o *Olm) SetPowerMode(mode string) error {
}
// Restore intervals and reconnect websocket
if o.peerManager != nil {
peerMonitor := o.peerManager.GetPeerMonitor()
if pm := o.getPeerManager(); pm != nil {
peerMonitor := pm.GetPeerMonitor()
if peerMonitor != nil {
peerMonitor.ResetPeerHolepunchInterval()
peerMonitor.ResetPeerInterval()
}
o.peerManager.UpdateAllPeersPersistentKeepalive(5)
pm.UpdateAllPeersPersistentKeepalive(5)
}
if o.holePunchManager != nil {

View File

@@ -20,7 +20,8 @@ func (o *Olm) handleWgPeerAdd(msg websocket.WSMessage) {
return
}
if o.peerManager == nil {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring add-peer message: peerManager is nil (shutdown in progress)")
return
}
@@ -64,7 +65,7 @@ func (o *Olm) handleWgPeerAdd(msg websocket.WSMessage) {
_ = o.holePunchManager.TriggerHolePunch() // Trigger immediate hole punch attempt so that if the peer decides to relay we have already punched close to when we need it
if err := o.peerManager.AddPeer(siteConfigMsg.SiteConfig); err != nil {
if err := pm.AddPeer(siteConfigMsg.SiteConfig); err != nil {
logger.Error("Failed to add peer: %v", err)
return
}
@@ -81,7 +82,8 @@ func (o *Olm) handleWgPeerRemove(msg websocket.WSMessage) {
return
}
if o.peerManager == nil {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring remove-peer message: peerManager is nil (shutdown in progress)")
return
}
@@ -98,7 +100,7 @@ func (o *Olm) handleWgPeerRemove(msg websocket.WSMessage) {
return
}
if err := o.peerManager.RemovePeer(removeData.SiteId); err != nil {
if err := pm.RemovePeer(removeData.SiteId); err != nil {
logger.Error("Failed to remove peer: %v", err)
return
}
@@ -123,7 +125,8 @@ func (o *Olm) handleWgPeerUpdate(msg websocket.WSMessage) {
return
}
if o.peerManager == nil {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring update-peer message: peerManager is nil (shutdown in progress)")
return
}
@@ -141,7 +144,7 @@ func (o *Olm) handleWgPeerUpdate(msg websocket.WSMessage) {
}
// Get existing peer from PeerManager
existingPeer, exists := o.peerManager.GetPeer(updateData.SiteId)
existingPeer, exists := pm.GetPeer(updateData.SiteId)
if !exists {
logger.Warn("Peer with site ID %d not found", updateData.SiteId)
return
@@ -169,7 +172,7 @@ func (o *Olm) handleWgPeerUpdate(msg websocket.WSMessage) {
siteConfig.RemoteSubnets = updateData.RemoteSubnets
}
if err := o.peerManager.UpdatePeer(siteConfig); err != nil {
if err := pm.UpdatePeer(siteConfig); err != nil {
logger.Error("Failed to update peer: %v", err)
return
}
@@ -188,7 +191,8 @@ func (o *Olm) handleWgPeerRelay(msg websocket.WSMessage) {
logger.Debug("Received relay-peer message: %v", msg.Data)
// Check if peerManager is still valid (may be nil during shutdown)
if o.peerManager == nil {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring relay message: peerManager is nil (shutdown in progress)")
return
}
@@ -208,7 +212,7 @@ func (o *Olm) handleWgPeerRelay(msg websocket.WSMessage) {
return
}
if monitor := o.peerManager.GetPeerMonitor(); monitor != nil {
if monitor := pm.GetPeerMonitor(); monitor != nil {
monitor.CancelRelaySend(relayData.ChainId)
}
@@ -222,14 +226,15 @@ func (o *Olm) handleWgPeerRelay(msg websocket.WSMessage) {
// Update HTTP server to mark this peer as using relay
o.apiServer.UpdatePeerRelayStatus(relayData.SiteId, relayData.RelayEndpoint, true)
o.peerManager.RelayPeer(relayData.SiteId, primaryRelay, relayData.RelayPort)
pm.RelayPeer(relayData.SiteId, primaryRelay, relayData.RelayPort)
}
func (o *Olm) handleWgPeerUnrelay(msg websocket.WSMessage) {
logger.Debug("Received unrelay-peer message: %v", msg.Data)
// Check if peerManager is still valid (may be nil during shutdown)
if o.peerManager == nil {
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring unrelay message: peerManager is nil (shutdown in progress)")
return
}
@@ -249,7 +254,7 @@ func (o *Olm) handleWgPeerUnrelay(msg websocket.WSMessage) {
return
}
if monitor := o.peerManager.GetPeerMonitor(); monitor != nil {
if monitor := pm.GetPeerMonitor(); monitor != nil {
monitor.CancelRelaySend(relayData.ChainId)
}
@@ -262,7 +267,7 @@ func (o *Olm) handleWgPeerUnrelay(msg websocket.WSMessage) {
// Update HTTP server to mark this peer as using relay
o.apiServer.UpdatePeerRelayStatus(relayData.SiteId, relayData.Endpoint, false)
o.peerManager.UnRelayPeer(relayData.SiteId, primaryRelay)
pm.UnRelayPeer(relayData.SiteId, primaryRelay)
}
func (o *Olm) handleWgPeerHolepunchAddSite(msg websocket.WSMessage) {
@@ -317,7 +322,12 @@ func (o *Olm) handleWgPeerHolepunchAddSite(msg websocket.WSMessage) {
}
// Get existing peer from PeerManager
_, exists := o.peerManager.GetPeer(handshakeData.SiteId)
pm := o.getPeerManager()
if pm == nil {
logger.Debug("Ignoring peer-handshake message: peerManager is nil (shutdown in progress)")
return
}
_, exists := pm.GetPeer(handshakeData.SiteId)
if exists {
logger.Warn("Peer with site ID %d already added", handshakeData.SiteId)
return

View File

@@ -6,6 +6,7 @@ import (
"strconv"
"strings"
"sync"
"time"
"github.com/fosrl/newt/bind"
"github.com/fosrl/newt/logger"
@@ -54,6 +55,9 @@ type PeerManager struct {
publicDNS []string
PersistentKeepalive int
routeOptimizerStop chan struct{}
optimizerTrigger chan struct{}
}
// NewPeerManager creates a new PeerManager with an internal PeerMonitor
@@ -80,6 +84,8 @@ func NewPeerManager(config PeerManagerConfig) *PeerManager {
config.PublicDNS,
)
pm.optimizerTrigger = make(chan struct{}, 1)
return pm
}
@@ -856,10 +862,12 @@ func (pm *PeerManager) Start() {
if pm.peerMonitor != nil {
pm.peerMonitor.Start()
}
pm.startRouteOptimizer()
}
// Stop stops the peer monitor
func (pm *PeerManager) Stop() {
pm.stopRouteOptimizer()
if pm.peerMonitor != nil {
pm.peerMonitor.Stop()
}
@@ -867,6 +875,7 @@ func (pm *PeerManager) Stop() {
// Close stops the peer monitor and cleans up resources
func (pm *PeerManager) Close() {
pm.stopRouteOptimizer()
if pm.peerMonitor != nil {
pm.peerMonitor.Close()
pm.peerMonitor = nil
@@ -928,3 +937,166 @@ endpoint=%s`, util.FixKey(peer.PublicKey), endpoint)
logger.Info("Switched peer %d back to direct connection at %s", siteId, endpoint)
return nil
}
// isBetterConnection returns true if connection quality (a) is better than (b).
// Priority: connected > disconnected, then direct > relayed, then lower RTT.
func isBetterConnection(aConn bool, aRelay bool, aRTT time.Duration,
bConn bool, bRelay bool, bRTT time.Duration) bool {
if aConn != bConn {
return aConn // connected beats disconnected
}
if !aConn {
return false // both offline, no preference
}
if aRelay != bRelay {
return !aRelay // direct beats relayed
}
// Same connectivity class: prefer lower RTT
if aRTT == 0 {
return false // unknown RTT, don't displace
}
if bRTT == 0 {
return true // current has no RTT data, prefer known
}
return aRTT < bRTT
}
// selectBestOwner returns the siteId of the best site to own the given IP,
// based on connection quality. Must be called with pm.mu held.
func (pm *PeerManager) selectBestOwner(claims map[int]bool) int {
bestSiteId := -1
var bestConn, bestRelay bool
var bestRTT time.Duration
for siteId := range claims {
conn, relay, rtt := pm.peerMonitor.GetConnectionQuality(siteId)
if bestSiteId < 0 || isBetterConnection(conn, relay, rtt, bestConn, bestRelay, bestRTT) {
bestSiteId = siteId
bestConn = conn
bestRelay = relay
bestRTT = rtt
}
}
return bestSiteId
}
// getWireGuardAllowedIPs returns the full set of IPs that should be in WireGuard
// for a peer: server IP /32 plus all shared IPs it currently owns.
// Must be called with pm.mu held.
func (pm *PeerManager) getWireGuardAllowedIPs(siteId int) []string {
peer, exists := pm.peers[siteId]
if !exists {
return nil
}
serverIP := strings.Split(peer.ServerIP, "/")[0] + "/32"
ips := []string{serverIP}
for cidr, owner := range pm.allowedIPOwners {
if owner == siteId {
ips = append(ips, cidr)
}
}
return ips
}
// transferOwnership moves WireGuard ownership of cidr from fromSiteId to toSiteId.
// Must be called with pm.mu held.
func (pm *PeerManager) transferOwnership(cidr string, fromSiteId int, toSiteId int) error {
// Update owner map first
pm.allowedIPOwners[cidr] = toSiteId
// Remove cidr from old owner's WireGuard allowed IPs
if fromPeer, exists := pm.peers[fromSiteId]; exists {
remaining := pm.getWireGuardAllowedIPs(fromSiteId) // cidr is no longer in owners, so it won't appear here
if err := RemoveAllowedIP(pm.device, fromPeer.PublicKey, remaining); err != nil {
// Revert
pm.allowedIPOwners[cidr] = fromSiteId
return fmt.Errorf("remove IP %s from site %d: %v", cidr, fromSiteId, err)
}
}
// Add cidr to new owner's WireGuard allowed IPs
if toPeer, exists := pm.peers[toSiteId]; exists {
if err := AddAllowedIP(pm.device, toPeer.PublicKey, cidr); err != nil {
return fmt.Errorf("add IP %s to site %d: %v", cidr, toSiteId, err)
}
}
return nil
}
// optimizeRoutes evaluates all shared IPs and reassigns ownership to the best site.
func (pm *PeerManager) optimizeRoutes() {
pm.mu.Lock()
defer pm.mu.Unlock()
for cidr, claims := range pm.allowedIPClaims {
if len(claims) <= 1 {
continue // No competition, nothing to optimize
}
currentOwner, hasOwner := pm.allowedIPOwners[cidr]
bestOwner := pm.selectBestOwner(claims)
if bestOwner < 0 {
continue
}
if hasOwner && currentOwner == bestOwner {
continue // Already on the best site
}
if !hasOwner {
// No current owner, just assign
pm.allowedIPOwners[cidr] = bestOwner
if toPeer, exists := pm.peers[bestOwner]; exists {
if err := AddAllowedIP(pm.device, toPeer.PublicKey, cidr); err != nil {
logger.Error("Failed to assign IP %s to site %d: %v", cidr, bestOwner, err)
}
}
continue
}
logger.Info("Route optimizer: moving %s from site %d to site %d", cidr, currentOwner, bestOwner)
if err := pm.transferOwnership(cidr, currentOwner, bestOwner); err != nil {
logger.Error("Failed to transfer ownership of %s from site %d to site %d: %v",
cidr, currentOwner, bestOwner, err)
}
}
}
// startRouteOptimizer registers the status-change callback and launches the optimizer goroutine.
func (pm *PeerManager) startRouteOptimizer() {
pm.routeOptimizerStop = make(chan struct{})
// Trigger optimization whenever any peer's connection status changes
if pm.peerMonitor != nil {
pm.peerMonitor.SetStatusChangeCallback(func(_ int) {
select {
case pm.optimizerTrigger <- struct{}{}:
default:
}
})
}
go func() {
ticker := time.NewTicker(5 * time.Second)
defer ticker.Stop()
for {
select {
case <-pm.routeOptimizerStop:
return
case <-pm.optimizerTrigger:
pm.optimizeRoutes()
case <-ticker.C:
pm.optimizeRoutes()
}
}
}()
}
// stopRouteOptimizer stops the route optimizer goroutine if it is running.
func (pm *PeerManager) stopRouteOptimizer() {
if pm.routeOptimizerStop != nil {
close(pm.routeOptimizerStop)
pm.routeOptimizerStop = nil
}
}

View File

@@ -86,6 +86,8 @@ type PeerMonitor struct {
// WG connection status tracking
wgConnectionStatus map[int]bool // siteID -> WG connected status
wgConnectionRTT map[int]time.Duration // siteID -> last known RTT
statusChangeCallback func(siteId int) // called when any peer's connection status changes
}
// NewPeerMonitor creates a new peer monitor with the given callback
@@ -122,6 +124,7 @@ func NewPeerMonitor(wsClient *websocket.Client, middleDev *middleDevice.MiddleDe
rapidTestMaxAttempts: 5, // 5 attempts = ~1-1.5 seconds total
apiServer: apiServer,
wgConnectionStatus: make(map[int]bool),
wgConnectionRTT: make(map[int]time.Duration),
// Exponential backoff settings for holepunch monitor
defaultHolepunchMinInterval: 2 * time.Second,
defaultHolepunchMaxInterval: 30 * time.Second,
@@ -392,6 +395,9 @@ func (pm *PeerMonitor) handleConnectionStatusChange(siteID int, status Connectio
pm.mutex.Lock()
previousStatus, exists := pm.wgConnectionStatus[siteID]
pm.wgConnectionStatus[siteID] = status.Connected
if status.Connected && status.RTT > 0 {
pm.wgConnectionRTT[siteID] = status.RTT
}
isRelayed := pm.relayedPeers[siteID]
endpoint := pm.holepunchEndpoints[siteID]
pm.mutex.Unlock()
@@ -409,6 +415,11 @@ func (pm *PeerMonitor) handleConnectionStatusChange(siteID int, status Connectio
if pm.apiServer != nil {
pm.apiServer.UpdatePeerStatus(siteID, status.Connected, status.RTT, endpoint, isRelayed)
}
// Notify route optimizer of status change
if pm.statusChangeCallback != nil {
pm.statusChangeCallback(siteID)
}
}
// sendRelay sends a relay message to the server with retry, keyed by chainId
@@ -521,6 +532,25 @@ func (pm *PeerMonitor) IsPeerRelayed(siteID int) bool {
return pm.relayedPeers[siteID]
}
// SetStatusChangeCallback registers a callback that is invoked whenever a peer's
// WireGuard connection status changes (connected/disconnected). The callback must
// be non-blocking (e.g., send to a buffered channel).
func (pm *PeerMonitor) SetStatusChangeCallback(cb func(siteId int)) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
pm.statusChangeCallback = cb
}
// GetConnectionQuality returns the current connection quality metrics for a peer.
func (pm *PeerMonitor) GetConnectionQuality(siteId int) (connected bool, relayed bool, rtt time.Duration) {
pm.mutex.Lock()
defer pm.mutex.Unlock()
connected = pm.wgConnectionStatus[siteId]
relayed = pm.relayedPeers[siteId]
rtt = pm.wgConnectionRTT[siteId]
return
}
// startHolepunchMonitor starts the holepunch connection monitoring
// Note: This function assumes the mutex is already held by the caller (called from Start())
func (pm *PeerMonitor) startHolepunchMonitor() error {