From 588003f34f72a3efa919ac14912bf5bbff942ccc Mon Sep 17 00:00:00 2001 From: Evsyukov Denis Date: Wed, 6 Aug 2025 17:10:59 +0300 Subject: [PATCH 01/10] feat: enhance security validation for repository URLs and improve error handling --- main.go | 453 +++++++++++++++++++++++++++++++++++++++++++++---- main_test.go | 468 ++++++++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 888 insertions(+), 33 deletions(-) diff --git a/main.go b/main.go index 72921f2..0922161 100644 --- a/main.go +++ b/main.go @@ -1,13 +1,17 @@ package main import ( + "context" + "errors" "fmt" - "log" + "net/url" "os" "os/exec" "path/filepath" "regexp" "strings" + "sync" + "time" "github.com/spf13/pflag" ) @@ -18,7 +22,190 @@ var ( date = "unknown" ) -var r = regexp.MustCompile(`^(?:.*://)?(?:[^@]+@)?([^:/]+)(?::\d+)?[/:]?(.*)$`) +var regexPool = sync.Pool{ + New: func() any { + return regexp.MustCompile(`^(?:.*://)?(?:[^@]+@)?([^:/]+)(?::\d+)?[/:]?(.*)$`) + }, +} + +type cacheEntry struct { + exists bool + timestamp time.Time +} + +type DirCache struct { + cache map[string]cacheEntry + mutex sync.RWMutex + ttl time.Duration +} + +var dirCache = &DirCache{ + cache: make(map[string]cacheEntry), + ttl: 30 * time.Second, +} + +// Security validation functions + +var ( + // Allowed URL schemes for git repositories + allowedSchemes = map[string]bool{ + "https": true, + "http": true, + "ssh": true, + "git": true, + } + + // Dangerous characters that could be used for command injection + dangerousChars = regexp.MustCompile(`[;&|$\x60<>(){}[\]!*?]`) + + // Valid hostname pattern - more restrictive than RFC but safer + validHostname = regexp.MustCompile(`^[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?(\.[a-zA-Z0-9]([a-zA-Z0-9-]*[a-zA-Z0-9])?)*$`) + + // Valid path characters for Git repositories + validRepoPath = regexp.MustCompile(`^[a-zA-Z0-9._/-]+$`) +) + +// validateRepositoryURL performs comprehensive security validation of repository URLs +func validateRepositoryURL(repo string) error { + if repo == "" { + return errors.New("repository URL cannot be empty") + } + + // Check for dangerous characters that could indicate command injection + if dangerousChars.MatchString(repo) { + return errors.New("repository URL contains dangerous characters") + } + + // Handle SSH URLs like git@github.com:user/repo.git + if strings.Contains(repo, "@") && strings.Contains(repo, ":") && !strings.Contains(repo, "://") { + return validateSSHURL(repo) + } + + // Parse as regular URL + parsedURL, err := url.Parse(repo) + if err != nil { + return fmt.Errorf("invalid URL format: %w", err) + } + + // Validate scheme + if parsedURL.Scheme != "" && !allowedSchemes[parsedURL.Scheme] { + return fmt.Errorf("unsupported URL scheme: %s", parsedURL.Scheme) + } + + // Validate hostname + if parsedURL.Host != "" && !validHostname.MatchString(parsedURL.Host) { + return fmt.Errorf("invalid hostname: %s", parsedURL.Host) + } + + // Validate path for traversal attacks + if err := validatePath(parsedURL.Path); err != nil { + return err + } + + return nil +} + +// validateSSHURL validates SSH-style Git URLs (git@host:path) +func validateSSHURL(repo string) error { + parts := strings.SplitN(repo, "@", 2) + if len(parts) != 2 { + return errors.New("invalid SSH URL format") + } + + hostPath := parts[1] + hostPathParts := strings.SplitN(hostPath, ":", 2) + if len(hostPathParts) != 2 { + return errors.New("invalid SSH URL format - missing colon separator") + } + + host := hostPathParts[0] + path := hostPathParts[1] + + // Validate hostname + if !validHostname.MatchString(host) { + return fmt.Errorf("invalid hostname in SSH URL: %s", host) + } + + // Validate path + if err := validatePath(path); err != nil { + return err + } + + return nil +} + +// validatePath checks for path traversal attacks and invalid characters +func validatePath(path string) error { + if path == "" { + return nil // Empty path is acceptable + } + + // Check for path traversal attempts + if strings.Contains(path, "..") { + return errors.New("path traversal detected in URL") + } + + // Check for absolute paths that could escape intended directory + if strings.HasPrefix(path, "/") { + // Remove leading slash for validation but allow it + path = strings.TrimPrefix(path, "/") + } + + // Allow tilde for user directories but validate the rest + if strings.HasPrefix(path, "~") { + path = strings.TrimPrefix(path, "~") + if strings.HasPrefix(path, "/") { + path = strings.TrimPrefix(path, "/") + } + } + + // Validate remaining path characters (allow common Git repo path characters) + if path != "" && !validRepoPath.MatchString(path) { + return fmt.Errorf("invalid characters in repository path: %s", path) + } + + return nil +} + +// secureGitClone performs git clone with additional security measures including timeout +func secureGitClone(repository, targetDir string, quiet bool) error { + // Double-check validation (defense in depth) + if err := validateRepositoryURL(repository); err != nil { + return fmt.Errorf("security validation failed: %w", err) + } + + // Validate target directory to prevent directory traversal + cleanTargetDir := filepath.Clean(targetDir) + if strings.Contains(cleanTargetDir, "..") { + return errors.New("target directory contains path traversal") + } + + // Create context with timeout to prevent hanging operations + ctx, cancel := context.WithTimeout(context.Background(), 10*time.Minute) + defer cancel() + + // Create command with explicit arguments and timeout context (no shell interpretation) + cmd := exec.CommandContext(ctx, "git", "clone", "--", repository) + + // Set working directory + cmd.Dir = cleanTargetDir + + // Configure output + if !quiet { + cmd.Stdout = os.Stderr + cmd.Stderr = os.Stderr + } + + // Execute with timeout protection + if err := cmd.Run(); err != nil { + if ctx.Err() == context.DeadlineExceeded { + return fmt.Errorf("git clone operation timed out after 10 minutes: %s", repository) + } + return err + } + + return nil +} func main() { var showCommandHelp, showVersionInfo, quiet bool @@ -41,91 +228,297 @@ func main() { return } + args := pflag.Args() + + // Validate that we have arguments + if len(args) == 0 { + prnt("error: no repository URLs provided") + usage() + os.Exit(1) + } + + // Validate each argument is not empty + for i, arg := range args { + if strings.TrimSpace(arg) == "" { + prnt("error: argument %d is empty", i+1) + os.Exit(1) + } + } + + // Check git availability with better error message if _, err := exec.LookPath("git"); err != nil { - prnt("git not found") + prnt("error: git command not found in PATH") + prnt("please install git: https://git-scm.com/downloads") os.Exit(1) } - var projectDir string - for _, arg := range pflag.Args() { - repository := arg - projectDir = getProjectDir(repository) + var lastSuccessfulProjectDir string + var processedCount int + var failedCount int + + for _, arg := range args { + repository := strings.TrimSpace(arg) + processedCount++ + + // Security: Validate repository URL before processing + if err := validateRepositoryURL(repository); err != nil { + prnt("invalid repository URL '%s': %s", repository, err) + failedCount++ + continue + } + + projectDir, err := getProjectDir(repository) + if err != nil { + prnt("failed to determine project directory for '%s': %s", repository, err) + failedCount++ + continue + } + // Check if directory already exists and is not empty if ok := isDirectoryNotEmpty(projectDir); ok { + if !quiet { + prnt("repository already exists: %s", projectDir) + } + // Still consider this successful for output purposes + lastSuccessfulProjectDir = projectDir continue } + // Create parent directory if err := os.MkdirAll(filepath.Dir(projectDir), 0750); err != nil { prnt("failed create directory: %s", err) + failedCount++ continue } - cmd := exec.Command("git", "clone", repository) - if !quiet { - cmd.Stdout = os.Stderr - cmd.Stderr = os.Stderr - } - cmd.Dir = filepath.Dir(projectDir) - if err := cmd.Run(); err != nil { - prnt("failed clone repository: %s", err) + // Security: Use secure git clone with validated arguments + if err := secureGitClone(repository, filepath.Dir(projectDir), quiet); err != nil { + prnt("failed clone repository '%s': %s", repository, err) + failedCount++ continue } + + // Clone was successful + lastSuccessfulProjectDir = projectDir if !quiet { fmt.Fprintln(os.Stderr) } } - // Print latest project directory - abs, _ := filepath.Abs(projectDir) - fmt.Println(abs) + // Print summary if multiple repositories were processed + if processedCount > 1 && !quiet { + successCount := processedCount - failedCount + prnt("processed %d repositories: %d successful, %d failed", processedCount, successCount, failedCount) + } + + // Print the last successfully processed project directory + if lastSuccessfulProjectDir != "" { + abs, err := filepath.Abs(lastSuccessfulProjectDir) + if err != nil { + prnt("failed to get absolute path for %s: %s", lastSuccessfulProjectDir, err) + fmt.Println(lastSuccessfulProjectDir) // fallback to relative path + } else { + fmt.Println(abs) + } + } else { + // No successful repositories processed + if !quiet { + prnt("no repositories were successfully processed") + } + os.Exit(1) + } } // normalize normalizes the given repository string and returns the parsed repository URL. -func normalize(repo string) string { +// Enhanced with security validation against path traversal attacks. +// Returns error instead of empty string for better error handling. +func normalize(repo string) (string, error) { + if repo == "" { + return "", errors.New("repository URL is empty") + } + + r := regexPool.Get().(*regexp.Regexp) + defer regexPool.Put(r) + match := r.FindStringSubmatch(repo) if len(match) != 3 { - return "" + return "", errors.New("failed to parse repository URL format") } + + host := match[1] path := match[2] + + // Security: Validate host component + if !validHostname.MatchString(host) { + return "", fmt.Errorf("invalid hostname: %s", host) + } + + // Security: Sanitize path to prevent traversal attacks + sanitizedPath, err := sanitizePathWithError(path) + if err != nil { + return "", fmt.Errorf("invalid repository path: %w", err) + } + + // Security: Validate final path doesn't contain dangerous patterns + if strings.Contains(sanitizedPath, "..") || strings.Contains(sanitizedPath, "//") { + return "", errors.New("repository path contains dangerous patterns") + } + + return filepath.Join(host, sanitizedPath), nil +} + +// sanitizePathWithError cleans and validates repository paths against security threats +// Returns error for better error handling instead of empty string +func sanitizePathWithError(path string) (string, error) { + if path == "" { + return "", nil // Empty path is acceptable + } + + originalPath := path + + // Remove dangerous prefixes and suffixes + path = strings.TrimPrefix(path, "/") + path = strings.TrimPrefix(path, "~") + path = strings.TrimPrefix(path, "/") + path = strings.TrimSuffix(path, "/") + path = strings.TrimSuffix(path, ".git") + + // Security: Check for path traversal attempts + if strings.Contains(path, "..") { + return "", fmt.Errorf("path traversal detected: %s", originalPath) + } + + // Security: Remove consecutive slashes + for strings.Contains(path, "//") { + path = strings.ReplaceAll(path, "//", "/") + } + + // Security: Validate path contains only safe characters + if path != "" && !validRepoPath.MatchString(path) { + return "", fmt.Errorf("path contains invalid characters: %s", originalPath) + } + + // Security: Ensure path doesn't start with dangerous patterns + dangerousPatterns := []string{".", "-", "_"} + for _, pattern := range dangerousPatterns { + if strings.HasPrefix(path, pattern+"/") { + return "", fmt.Errorf("path starts with dangerous pattern '%s': %s", pattern, originalPath) + } + } + + return path, nil +} + +// sanitizePath cleans and validates repository paths against security threats +func sanitizePath(path string) string { + if path == "" { + return "" + } + + // Remove dangerous prefixes and suffixes path = strings.TrimPrefix(path, "/") path = strings.TrimPrefix(path, "~") path = strings.TrimPrefix(path, "/") path = strings.TrimSuffix(path, "/") path = strings.TrimSuffix(path, ".git") - return filepath.Join(match[1], path) + // Security: Check for path traversal attempts + if strings.Contains(path, "..") { + return "" + } + + // Security: Remove consecutive slashes + for strings.Contains(path, "//") { + path = strings.ReplaceAll(path, "//", "/") + } + + // Security: Validate path contains only safe characters + if path != "" && !validRepoPath.MatchString(path) { + return "" + } + + // Security: Ensure path doesn't start with dangerous patterns + dangerousPatterns := []string{".", "-", "_"} + for _, pattern := range dangerousPatterns { + if strings.HasPrefix(path, pattern+"/") { + return "" + } + } + + return path } // getProjectDir returns the project directory based on the given repository URL. // It retrieves the GIT_PROJECT_DIR environment variable and normalizes it. // If the GIT_PROJECT_DIR starts with "~", it replaces it with the user's home directory. // The normalized repository URL is then joined with the GIT_PROJECT_DIR to form the project directory path. -// The project directory path is returned as a string. -func getProjectDir(repository string) string { +// Returns error for better error handling instead of empty string. +func getProjectDir(repository string) (string, error) { gitProjectDir := os.Getenv("GIT_PROJECT_DIR") + if strings.HasPrefix(gitProjectDir, "~") { homeDir, err := os.UserHomeDir() if err != nil { - log.Fatal(err) + return "", fmt.Errorf("failed to get user home directory: %w", err) } gitProjectDir = filepath.Join(homeDir, gitProjectDir[1:]) } - return filepath.Join(gitProjectDir, normalize(repository)) + normalizedRepo, err := normalize(repository) + if err != nil { + return "", fmt.Errorf("failed to normalize repository URL: %w", err) + } + + // Security: Validate and clean the final path + projectDir := filepath.Join(gitProjectDir, normalizedRepo) + cleanedPath := filepath.Clean(projectDir) + + // Security: Ensure the path doesn't escape the base directory + if gitProjectDir != "" && !strings.HasPrefix(cleanedPath, filepath.Clean(gitProjectDir)) { + return "", errors.New("security: path traversal detected in project directory") + } + + return cleanedPath, nil } -// isDirectoryNotEmpty checks if the specified directory is not empty. +// isDirectoryNotEmptyRaw checks if the specified directory is not empty without caching. // It uses the Readdirnames function to get the directory contents without loading full FileInfo // structures for each entry. If there are any entries, it returns true. Otherwise, it returns false. -func isDirectoryNotEmpty(name string) bool { +func isDirectoryNotEmptyRaw(name string) bool { f, err := os.Open(name) if err != nil { return false } - defer f.Close() - _, err = f.Readdirnames(1) - return err == nil + names, err := f.Readdirnames(1) + f.Close() // Direct call without defer for better performance + + return err == nil && len(names) > 0 +} + +// IsDirectoryNotEmpty checks if the specified directory is not empty with caching. +func (dc *DirCache) IsDirectoryNotEmpty(name string) bool { + dc.mutex.RLock() + if entry, ok := dc.cache[name]; ok { + if time.Since(entry.timestamp) < dc.ttl { + dc.mutex.RUnlock() + return entry.exists + } + } + dc.mutex.RUnlock() + + exists := isDirectoryNotEmptyRaw(name) + + dc.mutex.Lock() + dc.cache[name] = cacheEntry{exists, time.Now()} + dc.mutex.Unlock() + + return exists +} + +// isDirectoryNotEmpty is a wrapper that uses the global cache. +func isDirectoryNotEmpty(name string) bool { + return dirCache.IsDirectoryNotEmpty(name) } // Usage prints the usage of the program. diff --git a/main_test.go b/main_test.go index 8ad8211..e1dbdd4 100644 --- a/main_test.go +++ b/main_test.go @@ -1,7 +1,9 @@ package main import ( + "os" "path/filepath" + "strings" "testing" ) @@ -75,7 +77,12 @@ func Test_getProjectDir(t *testing.T) { t.Setenv("HOME", tt.homeVar) t.Setenv("GIT_PROJECT_DIR", tt.gitProjectDir) - if got := getProjectDir(tt.repository); got != tt.want { + got, err := getProjectDir(tt.repository) + if err != nil { + t.Errorf("getProjectDir() unexpected error = %v", err) + return + } + if got != tt.want { t.Errorf("getProjectDir() = %v, want %v", got, tt.want) } }) @@ -163,9 +170,464 @@ func Test_normalize(t *testing.T) { } for _, tt := range tests { t.Run(tt.name, func(t *testing.T) { - gotRepo := normalize(tt.args.repository) + gotRepo, err := normalize(tt.args.repository) + if err != nil { + t.Errorf("normalize() unexpected error = %v", err) + return + } if gotRepo != tt.wantRepo { - t.Errorf("parse() gotRepo = %v, want %v", gotRepo, tt.wantRepo) + t.Errorf("normalize() gotRepo = %v, want %v", gotRepo, tt.wantRepo) + } + }) + } +} + +// Benchmark functions +func BenchmarkNormalize(b *testing.B) { + repository := "https://github.com/user/repo" + b.ResetTimer() + for i := 0; i < b.N; i++ { + _, _ = normalize(repository) + } +} + +func BenchmarkIsDirectoryNotEmpty(b *testing.B) { + // Create test directory + tempDir, err := os.MkdirTemp("", "benchmark-test") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Create a non-empty directory + nonEmptyDir := filepath.Join(tempDir, "non-empty") + if err := os.Mkdir(nonEmptyDir, 0755); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nonEmptyDir, "test.txt"), []byte("test"), 0644); err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + isDirectoryNotEmpty(nonEmptyDir) + } +} + +func BenchmarkIsDirectoryNotEmptyRaw(b *testing.B) { + // Create test directory + tempDir, err := os.MkdirTemp("", "benchmark-test") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Create a non-empty directory + nonEmptyDir := filepath.Join(tempDir, "non-empty") + if err := os.Mkdir(nonEmptyDir, 0755); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nonEmptyDir, "test.txt"), []byte("test"), 0644); err != nil { + b.Fatal(err) + } + + b.ResetTimer() + for i := 0; i < b.N; i++ { + isDirectoryNotEmptyRaw(nonEmptyDir) + } +} + +func BenchmarkIsDirectoryNotEmptyCache(b *testing.B) { + // Create test directory + tempDir, err := os.MkdirTemp("", "benchmark-test") + if err != nil { + b.Fatal(err) + } + defer os.RemoveAll(tempDir) + + // Create a non-empty directory + nonEmptyDir := filepath.Join(tempDir, "non-empty") + if err := os.Mkdir(nonEmptyDir, 0755); err != nil { + b.Fatal(err) + } + if err := os.WriteFile(filepath.Join(nonEmptyDir, "test.txt"), []byte("test"), 0644); err != nil { + b.Fatal(err) + } + + // Clear cache before benchmark + dirCache.mutex.Lock() + dirCache.cache = make(map[string]cacheEntry) + dirCache.mutex.Unlock() + + b.ResetTimer() + for i := 0; i < b.N; i++ { + dirCache.IsDirectoryNotEmpty(nonEmptyDir) // This will benefit from caching after first call + } +} + +// Security tests +func TestValidateRepositoryURL(t *testing.T) { + tests := []struct { + name string + url string + wantErr bool + errMsg string + }{ + { + name: "valid https URL", + url: "https://github.com/user/repo.git", + wantErr: false, + }, + { + name: "valid SSH URL", + url: "git@github.com:user/repo.git", + wantErr: false, + }, + { + name: "command injection attempt", + url: "https://github.com/user/repo.git; rm -rf /", + wantErr: true, + errMsg: "dangerous characters", + }, + { + name: "path traversal in URL", + url: "https://github.com/../../../etc/passwd", + wantErr: true, + errMsg: "path traversal", + }, + { + name: "invalid scheme", + url: "ftp://github.com/user/repo", + wantErr: true, + errMsg: "unsupported URL scheme", + }, + { + name: "empty URL", + url: "", + wantErr: true, + errMsg: "cannot be empty", + }, + { + name: "backticks for command substitution", + url: "https://github.com/user/`whoami`.git", + wantErr: true, + errMsg: "dangerous characters", + }, + { + name: "pipe character", + url: "https://github.com/user/repo | cat /etc/passwd", + wantErr: true, + errMsg: "dangerous characters", + }, + { + name: "invalid hostname", + url: "https://github..com/user/repo", + wantErr: true, + errMsg: "invalid hostname", + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := validateRepositoryURL(tt.url) + if tt.wantErr { + if err == nil { + t.Errorf("validateRepositoryURL() expected error but got none for URL: %s", tt.url) + return + } + if !strings.Contains(err.Error(), tt.errMsg) { + t.Errorf("validateRepositoryURL() error = %v, expected to contain %v", err, tt.errMsg) + } + } else { + if err != nil { + t.Errorf("validateRepositoryURL() unexpected error = %v for URL: %s", err, tt.url) + } + } + }) + } +} + +func TestNormalizeSecurity(t *testing.T) { + tests := []struct { + name string + input string + expected string // Empty string means should be rejected + }{ + { + name: "normal repo", + input: "https://github.com/user/repo", + expected: "github.com/user/repo", + }, + { + name: "path traversal attempt", + input: "https://github.com/../../../etc/passwd", + expected: "", // Should be rejected + }, + { + name: "double slash", + input: "https://github.com//user//repo", + expected: "github.com/user/repo", + }, + { + name: "invalid hostname", + input: "https://github..com/user/repo", + expected: "", // Should be rejected + }, + { + name: "dangerous path start with dot", + input: "https://github.com/./repo", + expected: "", // Should be rejected + }, + { + name: "invalid characters in path", + input: "https://github.com/user/repo