mirror of
https://github.com/wavetermdev/waveterm
synced 2026-04-21 14:37:16 +00:00
conn error sub-codes for better classification (#2889)
This commit is contained in:
parent
0cc6c454a9
commit
1201273bd4
5 changed files with 150 additions and 22 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
|
@ -36,3 +36,5 @@ storybook-static/
|
|||
test-results.xml
|
||||
|
||||
docsite/
|
||||
|
||||
.kilo-format-temp-*
|
||||
|
|
|
|||
|
|
@ -749,7 +749,8 @@ func (conn *SSHConn) Connect(ctx context.Context, connFlags *wconfig.ConnKeyword
|
|||
conn.FireConnChangeEvent()
|
||||
err := conn.connectInternal(ctx, connFlags)
|
||||
if err != nil {
|
||||
errorCode := remote.ClassifyConnError(err)
|
||||
errorCode, subCode := remote.ClassifyConnError(err)
|
||||
isContextError := errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded)
|
||||
conn.Infof(ctx, "ERROR [%s] %v\n\n", errorCode, err)
|
||||
conn.WithLock(func() {
|
||||
conn.Status = Status_Error
|
||||
|
|
@ -762,8 +763,10 @@ func (conn *SSHConn) Connect(ctx context.Context, connFlags *wconfig.ConnKeyword
|
|||
telemetry.GoRecordTEventWrap(&telemetrydata.TEvent{
|
||||
Event: "conn:connecterror",
|
||||
Props: telemetrydata.TEventProps{
|
||||
ConnType: "ssh",
|
||||
ConnErrorCode: errorCode,
|
||||
ConnType: "ssh",
|
||||
ConnErrorCode: errorCode,
|
||||
ConnSubErrorCode: subCode,
|
||||
ConnContextError: isContextError,
|
||||
},
|
||||
})
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -52,6 +52,7 @@ const (
|
|||
ConnErrCode_KnownHostsNone = "knownhosts-none"
|
||||
ConnErrCode_KnownHostsFmt = "knownhosts-format"
|
||||
ConnErrCode_Dial = "dial-error"
|
||||
ConnErrCode_ProxyJumpDial = "dial-proxy-jump"
|
||||
ConnErrCode_HostKeyRevoked = "hostkey-revoked"
|
||||
ConnErrCode_HostKeyChanged = "hostkey-changed"
|
||||
ConnErrCode_HostKeyVerify = "hostkey-verify"
|
||||
|
|
@ -61,6 +62,27 @@ const (
|
|||
ConnErrCode_Unknown = "unknown"
|
||||
)
|
||||
|
||||
// Dial error subcodes for more granular classification
|
||||
const (
|
||||
DialSubCode_DNS = "dns"
|
||||
DialSubCode_Refused = "refused"
|
||||
DialSubCode_Timeout = "timeout"
|
||||
DialSubCode_ContextCanceled = "context-canceled"
|
||||
DialSubCode_NoRoute = "no-route"
|
||||
DialSubCode_HostUnreach = "host-unreachable"
|
||||
DialSubCode_NetUnreach = "net-unreachable"
|
||||
DialSubCode_ConnReset = "conn-reset"
|
||||
DialSubCode_PermDenied = "perm-denied"
|
||||
DialSubCode_ProxyJump = "proxy-jump"
|
||||
DialSubCode_Other = "other"
|
||||
)
|
||||
|
||||
// Auth error subcodes for more granular classification
|
||||
const (
|
||||
AuthSubCode_UnableToAuth = "unable-to-auth"
|
||||
AuthSubCode_HandshakeFailed = "handshake-failed"
|
||||
)
|
||||
|
||||
var waveSshConfigUserSettingsInternal *ssh_config.UserSettings
|
||||
var configUserSettingsOnce = &sync.Once{}
|
||||
|
||||
|
|
@ -118,33 +140,110 @@ func SimpleMessageFromPossibleConnectionError(err error) string {
|
|||
return err.Error()
|
||||
}
|
||||
|
||||
func ClassifyConnError(err error) string {
|
||||
func ClassifyConnError(err error) (string, string) {
|
||||
code := utilds.GetErrorCode(err)
|
||||
subCode := utilds.GetErrorSubCode(err)
|
||||
if code != "" {
|
||||
return code
|
||||
return code, subCode
|
||||
}
|
||||
var dnsErr *net.DNSError
|
||||
if errors.As(err, &dnsErr) {
|
||||
return ConnErrCode_Dial
|
||||
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
|
||||
}
|
||||
var opErr *net.OpError
|
||||
if errors.As(err, &opErr) {
|
||||
return ConnErrCode_Dial
|
||||
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
|
||||
}
|
||||
errStr := err.Error()
|
||||
if strings.Contains(errStr, "unable to authenticate") {
|
||||
return ConnErrCode_AuthFailed
|
||||
return ConnErrCode_AuthFailed, AuthSubCode_UnableToAuth
|
||||
}
|
||||
if strings.Contains(errStr, "handshake failed") {
|
||||
return ConnErrCode_AuthFailed
|
||||
return ConnErrCode_AuthFailed, AuthSubCode_HandshakeFailed
|
||||
}
|
||||
if strings.Contains(errStr, "connection refused") {
|
||||
return ConnErrCode_Dial
|
||||
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
|
||||
}
|
||||
if strings.Contains(errStr, "timed out") || strings.Contains(errStr, "timeout") {
|
||||
return ConnErrCode_Dial
|
||||
return ConnErrCode_Dial, ClassifyDialErrorSubCode(err)
|
||||
}
|
||||
return ConnErrCode_Unknown
|
||||
return ConnErrCode_Unknown, ""
|
||||
}
|
||||
|
||||
// ClassifyDialErrorSubCode provides more granular classification of dial errors
|
||||
// to help identify root causes (DNS, VPN, timeouts, etc.)
|
||||
func ClassifyDialErrorSubCode(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Check for context cancellation first
|
||||
if errors.Is(err, context.Canceled) || errors.Is(err, context.DeadlineExceeded) {
|
||||
return DialSubCode_ContextCanceled
|
||||
}
|
||||
|
||||
// Check if it's a DNS error
|
||||
var dnsErr *net.DNSError
|
||||
if errors.As(err, &dnsErr) {
|
||||
return DialSubCode_DNS
|
||||
}
|
||||
|
||||
// Check if it's a network operation error
|
||||
var opErr *net.OpError
|
||||
if errors.As(err, &opErr) {
|
||||
// Check the underlying error for more details
|
||||
if opErr.Err != nil {
|
||||
errStr := opErr.Err.Error()
|
||||
if strings.Contains(errStr, "connection refused") {
|
||||
return DialSubCode_Refused
|
||||
}
|
||||
if strings.Contains(errStr, "no route to host") {
|
||||
return DialSubCode_NoRoute
|
||||
}
|
||||
if strings.Contains(errStr, "host is unreachable") || strings.Contains(errStr, "host unreachable") {
|
||||
return DialSubCode_HostUnreach
|
||||
}
|
||||
if strings.Contains(errStr, "network is unreachable") || strings.Contains(errStr, "network unreachable") {
|
||||
return DialSubCode_NetUnreach
|
||||
}
|
||||
if strings.Contains(errStr, "connection reset") {
|
||||
return DialSubCode_ConnReset
|
||||
}
|
||||
if strings.Contains(errStr, "permission denied") {
|
||||
return DialSubCode_PermDenied
|
||||
}
|
||||
}
|
||||
// Generic timeout detection in OpError
|
||||
if opErr.Timeout() {
|
||||
return DialSubCode_Timeout
|
||||
}
|
||||
}
|
||||
|
||||
// Check error string for common patterns
|
||||
errStr := err.Error()
|
||||
if strings.Contains(errStr, "connection refused") {
|
||||
return DialSubCode_Refused
|
||||
}
|
||||
if strings.Contains(errStr, "timed out") || strings.Contains(errStr, "timeout") || strings.Contains(errStr, "i/o timeout") {
|
||||
return DialSubCode_Timeout
|
||||
}
|
||||
if strings.Contains(errStr, "no route to host") {
|
||||
return DialSubCode_NoRoute
|
||||
}
|
||||
if strings.Contains(errStr, "host is unreachable") || strings.Contains(errStr, "host unreachable") {
|
||||
return DialSubCode_HostUnreach
|
||||
}
|
||||
if strings.Contains(errStr, "network is unreachable") || strings.Contains(errStr, "network unreachable") {
|
||||
return DialSubCode_NetUnreach
|
||||
}
|
||||
if strings.Contains(errStr, "connection reset") {
|
||||
return DialSubCode_ConnReset
|
||||
}
|
||||
if strings.Contains(errStr, "permission denied") {
|
||||
return DialSubCode_PermDenied
|
||||
}
|
||||
|
||||
return DialSubCode_Other
|
||||
}
|
||||
|
||||
// This exists to trick the ssh library into continuing to try
|
||||
|
|
@ -747,15 +846,17 @@ func connectInternal(ctx context.Context, networkAddr string, clientConfig *ssh.
|
|||
blocklogger.Infof(ctx, "[conndebug] ssh dial %s\n", networkAddr)
|
||||
clientConn, err = d.DialContext(ctx, "tcp", networkAddr)
|
||||
if err != nil {
|
||||
blocklogger.Infof(ctx, "[conndebug] ERROR dial error: %v\n", err)
|
||||
return nil, utilds.MakeCodedError(ConnErrCode_Dial, err)
|
||||
subCode := ClassifyDialErrorSubCode(err)
|
||||
blocklogger.Infof(ctx, "[conndebug] ERROR dial error [%s]: %v\n", subCode, err)
|
||||
return nil, utilds.MakeSubCodedError(ConnErrCode_Dial, subCode, err)
|
||||
}
|
||||
} else {
|
||||
blocklogger.Infof(ctx, "[conndebug] ssh dial (from client) %s\n", networkAddr)
|
||||
clientConn, err = currentClient.DialContext(ctx, "tcp", networkAddr)
|
||||
if err != nil {
|
||||
blocklogger.Infof(ctx, "[conndebug] ERROR dial error: %v\n", err)
|
||||
return nil, utilds.MakeCodedError(ConnErrCode_Dial, err)
|
||||
subCode := ClassifyDialErrorSubCode(err)
|
||||
blocklogger.Infof(ctx, "[conndebug] ERROR dial error [%s]: %v\n", subCode, err)
|
||||
return nil, utilds.MakeSubCodedError(ConnErrCode_ProxyJumpDial, subCode, err)
|
||||
}
|
||||
}
|
||||
c, chans, reqs, err := ssh.NewClientConn(clientConn, networkAddr, clientConfig)
|
||||
|
|
|
|||
|
|
@ -126,9 +126,11 @@ type TEventProps struct {
|
|||
WshCmd string `json:"wsh:cmd,omitempty"`
|
||||
WshHadError bool `json:"wsh:haderror,omitempty"`
|
||||
|
||||
ConnType string `json:"conn:conntype,omitempty"`
|
||||
ConnWshErrorCode string `json:"conn:wsherrorcode,omitempty"`
|
||||
ConnErrorCode string `json:"conn:errorcode,omitempty"`
|
||||
ConnType string `json:"conn:conntype,omitempty"`
|
||||
ConnWshErrorCode string `json:"conn:wsherrorcode,omitempty"`
|
||||
ConnErrorCode string `json:"conn:errorcode,omitempty"`
|
||||
ConnSubErrorCode string `json:"conn:suberrorcode,omitempty"`
|
||||
ConnContextError bool `json:"conn:contexterror,omitempty"`
|
||||
|
||||
OnboardingFeature string `json:"onboarding:feature,omitempty" tstype:"\"waveai\" | \"durable\" | \"magnify\" | \"wsh\""`
|
||||
OnboardingVersion string `json:"onboarding:version,omitempty"`
|
||||
|
|
|
|||
|
|
@ -10,9 +10,11 @@ import (
|
|||
|
||||
// CodedError wraps an error with a string code for categorization.
|
||||
// The code can be extracted from anywhere in an error chain using GetErrorCode.
|
||||
// SubCode provides additional granularity for error classification.
|
||||
type CodedError struct {
|
||||
Code string
|
||||
Err error
|
||||
Code string
|
||||
SubCode string
|
||||
Err error
|
||||
}
|
||||
|
||||
func (e CodedError) Error() string {
|
||||
|
|
@ -25,7 +27,12 @@ func (e CodedError) Unwrap() error {
|
|||
|
||||
// MakeCodedError creates a new CodedError with the given code and error.
|
||||
func MakeCodedError(code string, err error) CodedError {
|
||||
return CodedError{Code: code, Err: err}
|
||||
return CodedError{Code: code, SubCode: "", Err: err}
|
||||
}
|
||||
|
||||
// MakeSubCodedError creates a new CodedError with the given code, subcode, and error.
|
||||
func MakeSubCodedError(code string, subCode string, err error) CodedError {
|
||||
return CodedError{Code: code, SubCode: subCode, Err: err}
|
||||
}
|
||||
|
||||
// GetErrorCode extracts the error code from anywhere in the error chain.
|
||||
|
|
@ -41,6 +48,19 @@ func GetErrorCode(err error) string {
|
|||
return ""
|
||||
}
|
||||
|
||||
// GetErrorSubCode extracts the error subcode from anywhere in the error chain.
|
||||
// Returns empty string if no CodedError is found or if SubCode is not set.
|
||||
func GetErrorSubCode(err error) string {
|
||||
if err == nil {
|
||||
return ""
|
||||
}
|
||||
var coded CodedError
|
||||
if errors.As(err, &coded) {
|
||||
return coded.SubCode
|
||||
}
|
||||
return ""
|
||||
}
|
||||
|
||||
// Errorf creates a formatted error wrapped in a CodedError.
|
||||
// This is a convenience function that combines fmt.Errorf with MakeCodedError.
|
||||
func Errorf(code string, format string, args ...interface{}) error {
|
||||
|
|
|
|||
Loading…
Reference in a new issue