Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions cmd/msgvault/cmd/serve.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import (
"go.kenn.io/msgvault/internal/search"
"go.kenn.io/msgvault/internal/store"
"go.kenn.io/msgvault/internal/sync"
"go.kenn.io/msgvault/internal/syncerr"
"golang.org/x/oauth2"
)

Expand Down Expand Up @@ -468,6 +469,13 @@ func runScheduledGmailSync(ctx context.Context, email string, src *store.Source,
}
tokenSource, tsErr = oauthMgr.TokenSource(ctx, email)
if tsErr != nil {
// Distinguish transient network failures (DNS lookup timeout,
// dial timeout after laptop sleep/wake, Wi-Fi flap) from real
// auth errors. Suggesting reauth on every network blip sends
// the user down the wrong path.
if syncerr.IsTransientNetwork(tsErr) {
return nil, fmt.Errorf("get token source: %w (transient network error; will retry on next schedule)", tsErr)
}
if oauthMgr.HasToken(email) {
return nil, fmt.Errorf("get token source: %w (token may be expired; run 'sync %s' or 'verify %s' from an interactive terminal to re-authorize)", tsErr, email, email)
}
Expand Down
19 changes: 15 additions & 4 deletions internal/scheduler/scheduler.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import (

"github.com/robfig/cron/v3"
"go.kenn.io/msgvault/internal/config"
"go.kenn.io/msgvault/internal/syncerr"
)

// SyncFunc is the callback invoked when a scheduled sync should run.
Expand Down Expand Up @@ -275,10 +276,20 @@ func (s *Scheduler) runSync(email string) {
s.mu.Lock()
if err != nil {
s.lastErr[email] = err
s.logger.Error("scheduled sync failed",
"email", email,
"duration", time.Since(start),
"error", err)
// Transient network failures (e.g., DNS lookup timeout after
// laptop sleep/wake) aren't actionable — the next scheduled tick
// will retry. Log at WARN to keep them out of error dashboards.
if syncerr.IsTransientNetwork(err) {
s.logger.Warn("scheduled sync skipped (transient network)",
"email", email,
"duration", time.Since(start),
"error", err)
} else {
s.logger.Error("scheduled sync failed",
"email", email,
"duration", time.Since(start),
"error", err)
}
} else {
s.lastRun[email] = time.Now()
s.lastErr[email] = nil
Expand Down
38 changes: 33 additions & 5 deletions internal/store/store.go
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,13 @@ type Store struct {
closeCleanup func()
}

const defaultSQLiteParams = "?_journal_mode=WAL&_busy_timeout=30000&_synchronous=NORMAL&_foreign_keys=ON"
// synchronous=FULL + fullfsync=true protects WAL writes against OS/power crashes
// (NORMAL only protects against process crashes). msgvault is commonly run as a
// laptop daemon (`msgvault serve`) where sleep/wake, forced reboots, and OOM kills
// give many opportunities to leave a torn page on disk; the write volume is tiny
// so the durability cost is negligible. fullfsync is macOS-only (F_FULLFSYNC
// fcntl) and a no-op on other platforms.
const defaultSQLiteParams = "?_journal_mode=WAL&_busy_timeout=30000&_synchronous=FULL&_fullfsync=true&_foreign_keys=ON"

// isSQLiteError checks if err is a sqlite3.Error with a message containing substr.
// This is more robust than strings.Contains on err.Error() because it first
Expand All @@ -63,18 +69,40 @@ func isPostgresURL(dbPath string) bool {
return strings.HasPrefix(dbPath, "postgresql://") || strings.HasPrefix(dbPath, "postgres://")
}

// testSQLiteParams configures SQLite for ephemeral test databases: WAL mode
// for concurrency parity with production, but synchronous=OFF (no fsync per
// commit). Test DBs live in t.TempDir() and are discarded at test exit, so
// durability against OS crashes is irrelevant — and on slow-fsync platforms
// like Windows CI runners, the production FULL setting can push bulk-import
// tests past their timing tripwires.
const testSQLiteParams = "?_journal_mode=WAL&_busy_timeout=30000&_synchronous=OFF&_foreign_keys=ON"

// Open opens or creates the database at the given path.
// If dbPath is a postgres:// or postgresql:// URL, opens a PostgreSQL connection.
// Otherwise, opens a SQLite database at the file path.
func Open(dbPath string) (*Store, error) {
if isPostgresURL(dbPath) {
return openPostgres(dbPath)
}
return openSQLite(dbPath)
return openSQLite(dbPath, defaultSQLiteParams)
}

// OpenForTest opens or creates a database tuned for test use: ephemeral,
// fast, with durability disabled. PostgreSQL URLs go through the normal
// connection path (durability is a server-side concern there).
//
// Not for production use — a process crash mid-test can leave a corrupt
// database, which is fine because tests recreate it from scratch.
func OpenForTest(dbPath string) (*Store, error) {
if isPostgresURL(dbPath) {
return openPostgres(dbPath)
}
return openSQLite(dbPath, testSQLiteParams)
}

// openSQLite opens a SQLite database at the given file path.
func openSQLite(dbPath string) (*Store, error) {
// openSQLite opens a SQLite database at the given file path with the
// supplied DSN parameters appended.
func openSQLite(dbPath, params string) (*Store, error) {
// Ensure directory exists (skip for in-memory databases)
if dbPath != ":memory:" && !strings.Contains(dbPath, ":memory:") {
dir := filepath.Dir(dbPath)
Expand All @@ -83,7 +111,7 @@ func openSQLite(dbPath string) (*Store, error) {
}
}

dsn := dbPath + defaultSQLiteParams
dsn := dbPath + params
db, err := sql.Open("sqlite3", dsn)
if err != nil {
return nil, fmt.Errorf("open database: %w", err)
Expand Down
50 changes: 50 additions & 0 deletions internal/syncerr/transient.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
// Package syncerr provides shared error classification for sync flows.
package syncerr

import (
"errors"
"net"
"strings"
)

// IsTransientNetwork reports whether err (or anything it wraps) indicates a
// transient network problem rather than a logical or auth failure. It catches
// DNS lookup failures, dial timeouts, and connection refused — common after
// laptop sleep/wake or Wi-Fi flaps. Callers should retry on the next schedule
// instead of alerting the user; the underlying credentials are not invalid.
func IsTransientNetwork(err error) bool {
if err == nil {
return false
}
var dnsErr *net.DNSError
if errors.As(err, &dnsErr) {
return true
}
var opErr *net.OpError
if errors.As(err, &opErr) {
return true
}
var netErr net.Error
if errors.As(err, &netErr) && netErr.Timeout() {
return true
}
// Some libraries (notably golang.org/x/oauth2) wrap transport errors
// with fmt.Errorf without %w, hiding the typed *url.Error / *net.OpError
// from errors.As. Fall back to substring matching for those cases.
s := err.Error()
switch {
case strings.Contains(s, "dial tcp"):
return true
case strings.Contains(s, "no such host"):
return true
case strings.Contains(s, "i/o timeout"):
return true
case strings.Contains(s, "connection refused"):
return true
case strings.Contains(s, "network is unreachable"):
return true
case strings.Contains(s, "no route to host"):
return true
}
return false
}
70 changes: 70 additions & 0 deletions internal/syncerr/transient_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
package syncerr

import (
"errors"
"fmt"
"net"
"net/url"
"testing"
)

func TestIsTransientNetwork(t *testing.T) {
dnsErr := &net.DNSError{Err: "no such host", Name: "oauth2.googleapis.com", IsNotFound: true}
dialTimeoutErr := &url.Error{
Op: "Post",
URL: "https://oauth2.googleapis.com/token",
Err: &net.OpError{
Op: "dial",
Net: "tcp",
Err: errors.New("i/o timeout"),
},
}

tests := []struct {
name string
err error
want bool
}{
{"nil", nil, false},
{"plain string error", errors.New("something broke"), false},
{"typed DNSError", dnsErr, true},
{"typed url.Error wrapping dial timeout", dialTimeoutErr, true},
{"wrapped DNSError via %w", fmt.Errorf("refresh token: %w", dnsErr), true},
// oauth2 lib wraps with %v (not %w), so typed-unwrap fails;
// substring fallback must catch it.
{
"oauth2-style %v-wrapped dial error",
fmt.Errorf(`refresh token: Post "https://oauth2.googleapis.com/token": dial tcp: lookup oauth2.googleapis.com: i/o timeout`),
true,
},
{
"no such host substring",
fmt.Errorf(`refresh token: Post "...": dial tcp: lookup oauth2.googleapis.com: no such host`),
true,
},
{
"connection refused substring",
errors.New("dial tcp 127.0.0.1:443: connection refused"),
true,
},
{
"actual auth error (invalid_grant) is NOT transient",
errors.New(`oauth2: "invalid_grant" "Token has been expired or revoked."`),
false,
},
{
"generic SQL error is NOT transient",
errors.New("database disk image is malformed"),
false,
},
}

for _, tc := range tests {
t.Run(tc.name, func(t *testing.T) {
got := IsTransientNetwork(tc.err)
if got != tc.want {
t.Errorf("IsTransientNetwork(%v) = %v, want %v", tc.err, got, tc.want)
}
})
}
}
2 changes: 1 addition & 1 deletion internal/testutil/store_helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ func NewTestStore(t *testing.T) *store.Store {
}

dbPath := filepath.Join(t.TempDir(), "test.db")
st, err := store.Open(dbPath)
st, err := store.OpenForTest(dbPath)
if err != nil {
t.Fatalf("open store: %v", err)
}
Expand Down
Loading