diff --git a/CHANGELOG.md b/CHANGELOG.md index 44555fb5a..8515a9271 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,7 @@ ## Features +- Add `--json` flag for JSONL format output. ## Bugfixes diff --git a/Cargo.lock b/Cargo.lock index b99ebd54e..1c3f1d4a3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -78,6 +78,12 @@ dependencies = [ "once_cell", ] +[[package]] +name = "base64" +version = "0.22.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" + [[package]] name = "bitflags" version = "1.3.2" @@ -294,6 +300,7 @@ dependencies = [ "aho-corasick", "anyhow", "argmax", + "base64", "clap", "clap_complete", "crossbeam-channel", @@ -312,6 +319,7 @@ dependencies = [ "nu-ansi-term", "regex", "regex-syntax", + "serde_json", "tempfile", "test-case", "tikv-jemallocator", @@ -388,6 +396,12 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7943c866cc5cd64cbc25b2e01621d07fa8eb2a1a23160ee81ce38704e97b8ecf" +[[package]] +name = "itoa" +version = "1.0.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" + [[package]] name = "jiff" version = "0.2.15" @@ -621,6 +635,12 @@ dependencies = [ "windows-sys 0.61.2", ] +[[package]] +name = "ryu" +version = "1.0.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "28d3b2b1366ec20994f1fd18c3c594f05c5dd4bc44d8bb0c1c632c8d6829481f" + [[package]] name = "same-file" version = "1.0.6" @@ -659,6 +679,19 @@ dependencies = [ "syn", ] +[[package]] +name = "serde_json" +version = "1.0.145" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "402a6f66d8c709116cf22f558eab210f5a50187f702eb4d7e5ef38d9a7f1c79c" +dependencies = [ + "itoa", + "memchr", + "ryu", + "serde", + "serde_core", +] + [[package]] name = "shlex" version = "1.3.0" diff --git a/Cargo.toml b/Cargo.toml index eb27ddf52..380bbaeb9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -5,19 +5,13 @@ description = "fd is a simple, fast and user-friendly alternative to find." exclude = ["/benchmarks/*"] homepage = "https://github.com/sharkdp/fd" documentation = "https://docs.rs/fd-find" -keywords = [ - "search", - "find", - "file", - "filesystem", - "tool", -] +keywords = ["search", "find", "file", "filesystem", "tool"] license = "MIT OR Apache-2.0" name = "fd-find" readme = "README.md" repository = "https://github.com/sharkdp/fd" version = "10.3.0" -edition= "2024" +edition = "2024" rust-version = "1.90.0" [badges.appveyor] @@ -43,9 +37,10 @@ anyhow = "1.0" etcetera = "0.11" normpath = "1.1.1" crossbeam-channel = "0.5.15" -clap_complete = {version = "4.5.60", optional = true} +clap_complete = { version = "4.5.60", optional = true } faccess = "0.2.4" jiff = "0.2.14" +base64 = "0.22.1" [dependencies.clap] version = "4.5.51" @@ -57,7 +52,11 @@ default-features = false features = ["nu-ansi-term"] [target.'cfg(unix)'.dependencies] -nix = { version = "0.30.1", default-features = false, features = ["signal", "user", "hostname"] } +nix = { version = "0.30.1", default-features = false, features = [ + "signal", + "user", + "hostname", +] } [target.'cfg(all(unix, not(target_os = "redox")))'.dependencies] libc = "0.2" @@ -68,13 +67,14 @@ libc = "0.2" # This has to be kept in sync with src/main.rs where the allocator for # the program is set. [target.'cfg(all(not(windows), not(target_os = "android"), not(target_os = "macos"), not(target_os = "freebsd"), not(target_os = "openbsd"), not(target_os = "illumos"), not(all(target_env = "musl", target_pointer_width = "32")), not(target_arch = "riscv64")))'.dependencies] -tikv-jemallocator = {version = "0.6.0", optional = true} +tikv-jemallocator = { version = "0.6.0", optional = true } [dev-dependencies] diff = "0.1" tempfile = "3.23" filetime = "0.2" test-case = "3.3" +serde_json = "1.0.145" [profile.release] lto = true diff --git a/doc/fd.1 b/doc/fd.1 index df42b1724..fd6c2f41e 100644 --- a/doc/fd.1 +++ b/doc/fd.1 @@ -510,6 +510,30 @@ Maximum number of arguments to pass to the command given with -X. If the number greater than the given size, the command given with -X is run again with remaining arguments. A batch size of zero means there is no limit (default), but note that batching might still happen due to OS restrictions on the maximum length of command lines. +.TP +.BI "\-\-json " +.RS +Specify JSONL (as known as NDJSON) format to use for the output. + +Output fields: + + - "path": The file path as a UTF\-8 string. + + Note that when the path contains invalid UTF-8 sequences, it is encoded in base64 and stored in the "path_b64" field instead. + + - "type": The file type (e.g., "file", "directory"). + + - "size": The file size in bytes. + + - "mode": The file permissions in octal (e.g., 644). + + - "modified": The last modification time in ISO 8601 format (e.g., 2000-01-01T12:00:00Z). + + - "accessed": The last access time in ISO 8601 format. + + - "created": The creation time in ISO 8601 format. +.RE +.TP .SH PATTERN SYNTAX The regular expression syntax used by fd is documented here: diff --git a/src/cli.rs b/src/cli.rs index d5174689d..249570b2d 100644 --- a/src/cli.rs +++ b/src/cli.rs @@ -648,6 +648,15 @@ pub struct Opts { )] search_path: Vec, + /// Print results in JSONL format. + #[arg( + long, + value_name = "json", + help = "Print results in JSONL format so you can pipe it to tools.", + long_help + )] + pub json: bool, + /// By default, relative paths are prefixed with './' when -x/--exec, /// -X/--exec-batch, or -0/--print0 are given, to reduce the risk of a /// path starting with '-' being treated as a command line option. Use @@ -825,6 +834,15 @@ pub enum HyperlinkWhen { Never, } +#[derive(Copy, Clone, PartialEq, Eq, Debug, ValueEnum)] +pub enum OutputFormat { + /// Plain text output (default) + Plain, + /// JSONL (JSON Lines, as known as Newline Delimited JSON) output + #[value(alias = "ndjson")] + Jsonl, +} + // there isn't a derive api for getting grouped values yet, // so we have to use hand-rolled parsing for exec and exec-batch pub struct Exec { diff --git a/src/config.rs b/src/config.rs index 9e18120c4..b1ecee3d7 100644 --- a/src/config.rs +++ b/src/config.rs @@ -130,6 +130,9 @@ pub struct Config { /// Whether or not to use hyperlinks on paths pub hyperlink: bool, + + /// Whether to print results in JSONL format + pub jsonl: bool, } impl Config { diff --git a/src/main.rs b/src/main.rs index fafb3b900..f3c08b729 100644 --- a/src/main.rs +++ b/src/main.rs @@ -326,6 +326,7 @@ fn construct_config(mut opts: Opts, pattern_regexps: &[String]) -> Result String { path.replace(std::path::MAIN_SEPARATOR, new_path_separator) } -// TODO: this function is performance critical and can probably be optimized -pub fn print_entry(stdout: &mut W, entry: &DirEntry, config: &Config) -> io::Result<()> { - let mut has_hyperlink = false; - if config.hyperlink - && let Some(url) = PathUrl::new(entry.path()) - { - write!(stdout, "\x1B]8;;{url}\x1B\\")?; - has_hyperlink = true; +fn encode_path(path: &Path) -> PathEncoding<'_> { + match path.to_str() { + Some(utf8) => PathEncoding::Utf8(utf8.escape_default()), + None => PathEncoding::Bytes(path.as_os_str().as_encoded_bytes()), } +} + +enum PathEncoding<'a> { + Utf8(std::str::EscapeDefault<'a>), + Bytes(&'a [u8]), +} + +struct FileDetail<'a> { + path: PathEncoding<'a>, + file_type: &'static str, + size: Option, + mode: Option, + modified: Option, + accessed: Option, + created: Option, +} - if let Some(ref format) = config.format { - print_entry_format(stdout, entry, config, format)?; - } else if let Some(ref ls_colors) = config.ls_colors { - print_entry_colorized(stdout, entry, config, ls_colors)?; - } else { - print_entry_uncolorized(stdout, entry, config)?; - }; +pub struct Printer<'a, W> { + config: &'a Config, + stdout: W, +} - if has_hyperlink { - write!(stdout, "\x1B]8;;\x1B\\")?; +impl<'a, W: Write> Printer<'a, W> { + pub fn new(config: &'a Config, stdout: W) -> Self { + Self { config, stdout } } - if config.null_separator { - write!(stdout, "\0") - } else { - writeln!(stdout) + pub fn flush(&mut self) -> io::Result<()> { + self.stdout.flush() } -} -// Display a trailing slash if the path is a directory and the config option is enabled. -// If the path_separator option is set, display that instead. -// The trailing slash will not be colored. -#[inline] -fn print_trailing_slash( - stdout: &mut W, - entry: &DirEntry, - config: &Config, - style: Option<&Style>, -) -> io::Result<()> { - if entry.file_type().is_some_and(|ft| ft.is_dir()) { - write!( - stdout, - "{}", - style - .map(Style::to_nu_ansi_term_style) - .unwrap_or_default() - .paint(&config.actual_path_separator) - )?; + // TODO: this function is performance critical and can probably be optimized + pub fn print_entry(&mut self, entry: &DirEntry) -> io::Result<()> { + let mut has_hyperlink = false; + if self.config.hyperlink + && let Some(url) = PathUrl::new(entry.path()) + { + write!(self.stdout, "\x1B]8;;{url}\x1B\\")?; + has_hyperlink = true; + } + match ( + &self.config.format, + &self.config.jsonl, + &self.config.ls_colors, + ) { + (Some(template), _, _) => self.print_entry_format(entry, template)?, + (None, true, _) => self.print_entry_detail(OutputFormat::Jsonl, entry)?, + (None, false, Some(ls_colors)) => self.print_entry_colorized(entry, ls_colors)?, + (None, false, None) => self.print_entry_uncolorized(entry)?, + }; + + if has_hyperlink { + write!(self.stdout, "\x1B]8;;\x1B\\")?; + } + + if self.config.null_separator { + write!(self.stdout, "\0") + } else { + writeln!(self.stdout) + } } - Ok(()) -} -// TODO: this function is performance critical and can probably be optimized -fn print_entry_format( - stdout: &mut W, - entry: &DirEntry, - config: &Config, - format: &FormatTemplate, -) -> io::Result<()> { - let output = format.generate( - entry.stripped_path(config), - config.path_separator.as_deref(), - ); - // TODO: support writing raw bytes on unix? - write!(stdout, "{}", output.to_string_lossy()) -} + // Display a trailing slash if the path is a directory and the config option is enabled. + // If the path_separator option is set, display that instead. + // The trailing slash will not be colored. + #[inline] + fn print_trailing_slash(&mut self, entry: &DirEntry, style: Option<&Style>) -> io::Result<()> { + if entry.file_type().is_some_and(|ft| ft.is_dir()) { + write!( + self.stdout, + "{}", + style + .map(Style::to_nu_ansi_term_style) + .unwrap_or_default() + .paint(&self.config.actual_path_separator) + )?; + } + Ok(()) + } + + // TODO: this function is performance critical and can probably be optimized + fn print_entry_format(&mut self, entry: &DirEntry, format: &FormatTemplate) -> io::Result<()> { + let output = format.generate( + entry.stripped_path(self.config), + self.config.path_separator.as_deref(), + ); + // TODO: support writing raw bytes on unix? + write!(self.stdout, "{}", output.to_string_lossy()) + } + + // TODO: this function is performance critical and can probably be optimized + fn print_entry_colorized(&mut self, entry: &DirEntry, ls_colors: &LsColors) -> io::Result<()> { + // Split the path between the parent and the last component + let mut offset = 0; + let path = entry.stripped_path(self.config); + let path_str = path.to_string_lossy(); -// TODO: this function is performance critical and can probably be optimized -fn print_entry_colorized( - stdout: &mut W, - entry: &DirEntry, - config: &Config, - ls_colors: &LsColors, -) -> io::Result<()> { - // Split the path between the parent and the last component - let mut offset = 0; - let path = entry.stripped_path(config); - let path_str = path.to_string_lossy(); - - if let Some(parent) = path.parent() { - offset = parent.to_string_lossy().len(); - for c in path_str[offset..].chars() { - if std::path::is_separator(c) { - offset += c.len_utf8(); - } else { - break; + if let Some(parent) = path.parent() { + offset = parent.to_string_lossy().len(); + for c in path_str[offset..].chars() { + if std::path::is_separator(c) { + offset += c.len_utf8(); + } else { + break; + } } } - } - if offset > 0 { - let mut parent_str = Cow::from(&path_str[..offset]); - if let Some(ref separator) = config.path_separator { - *parent_str.to_mut() = replace_path_separator(&parent_str, separator); + if offset > 0 { + let mut parent_str = Cow::from(&path_str[..offset]); + if let Some(ref separator) = self.config.path_separator { + *parent_str.to_mut() = replace_path_separator(&parent_str, separator); + } + + let style = ls_colors + .style_for_indicator(Indicator::Directory) + .map(Style::to_nu_ansi_term_style) + .unwrap_or_default(); + write!(self.stdout, "{}", style.paint(parent_str))?; } - let style = ls_colors - .style_for_indicator(Indicator::Directory) + let style = entry + .style(ls_colors) .map(Style::to_nu_ansi_term_style) .unwrap_or_default(); - write!(stdout, "{}", style.paint(parent_str))?; + write!(self.stdout, "{}", style.paint(&path_str[offset..]))?; + + self.print_trailing_slash(entry, ls_colors.style_for_indicator(Indicator::Directory))?; + + Ok(()) } - let style = entry - .style(ls_colors) - .map(Style::to_nu_ansi_term_style) - .unwrap_or_default(); - write!(stdout, "{}", style.paint(&path_str[offset..]))?; + // TODO: this function is performance critical and can probably be optimized + fn print_entry_uncolorized_base(&mut self, entry: &DirEntry) -> io::Result<()> { + let path = entry.stripped_path(self.config); - print_trailing_slash( - stdout, - entry, - config, - ls_colors.style_for_indicator(Indicator::Directory), - )?; + let mut path_string = path.to_string_lossy(); + if let Some(ref separator) = self.config.path_separator { + *path_string.to_mut() = replace_path_separator(&path_string, separator); + } + write!(self.stdout, "{path_string}")?; + self.print_trailing_slash(entry, None) + } - Ok(()) -} + #[cfg(not(unix))] + fn print_entry_uncolorized(&mut self, entry: &DirEntry) -> io::Result<()> { + self.print_entry_uncolorized_base(entry) + } + + #[cfg(unix)] + fn print_entry_uncolorized(&mut self, entry: &DirEntry) -> io::Result<()> { + use std::os::unix::ffi::OsStrExt; -// TODO: this function is performance critical and can probably be optimized -fn print_entry_uncolorized_base( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - let path = entry.stripped_path(config); - - let mut path_string = path.to_string_lossy(); - if let Some(ref separator) = config.path_separator { - *path_string.to_mut() = replace_path_separator(&path_string, separator); + if self.config.interactive_terminal || self.config.path_separator.is_some() { + // Fall back to the base implementation + self.print_entry_uncolorized_base(entry) + } else { + // Print path as raw bytes, allowing invalid UTF-8 filenames to be passed to other processes + self.stdout + .write_all(entry.stripped_path(self.config).as_os_str().as_bytes())?; + self.print_trailing_slash(entry, None) + } } - write!(stdout, "{path_string}")?; - print_trailing_slash(stdout, entry, config, None) -} -#[cfg(not(unix))] -fn print_entry_uncolorized( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - print_entry_uncolorized_base(stdout, entry, config) -} + fn print_entry_json_obj(&mut self, detail: &FileDetail) -> io::Result<()> { + write!(self.stdout, "{{")?; -#[cfg(unix)] -fn print_entry_uncolorized( - stdout: &mut W, - entry: &DirEntry, - config: &Config, -) -> io::Result<()> { - use std::os::unix::ffi::OsStrExt; - - if config.interactive_terminal || config.path_separator.is_some() { - // Fall back to the base implementation - print_entry_uncolorized_base(stdout, entry, config) - } else { - // Print path as raw bytes, allowing invalid UTF-8 filenames to be passed to other processes - stdout.write_all(entry.stripped_path(config).as_os_str().as_bytes())?; - print_trailing_slash(stdout, entry, config, None) + match &detail.path { + PathEncoding::Utf8(path_utf8) => { + write!(self.stdout, "\"path\":\"{}\"", path_utf8)?; + } + PathEncoding::Bytes(path_bytes) => { + write!( + self.stdout, + "\"path_b64\":\"{}\"", + BASE64_STANDARD.encode(path_bytes) + )?; + } + } + + write!(self.stdout, ",\"type\":\"{}\"", detail.file_type)?; + + if let Some(size) = detail.size { + write!(self.stdout, ",\"size\":{size}")?; + } + if let Some(mode) = detail.mode { + write!(self.stdout, ",\"mode\":{mode:o}")?; + } + if let Some(modified) = &detail.modified { + write!(self.stdout, ",\"modified\":\"{}\"", modified)?; + } + if let Some(accessed) = &detail.accessed { + write!(self.stdout, ",\"accessed\":\"{}\"", accessed)?; + } + if let Some(created) = &detail.created { + write!(self.stdout, ",\"created\":\"{}\"", created)?; + } + write!(self.stdout, "}}") + } + + fn print_entry_detail(&mut self, format: OutputFormat, entry: &DirEntry) -> io::Result<()> { + let path = entry.stripped_path(self.config); + let encoded_path = encode_path(path); + let metadata = entry.metadata(); + + let detail = if let Some(meta) = metadata { + let size = meta.len(); + let mode = { + #[cfg(unix)] + { + Some(meta.permissions().mode() & 0o7777) + } + #[cfg(not(unix))] + { + None + } + }; + let ft = match meta.file_type() { + ft if ft.is_dir() => "directory", + ft if ft.is_file() => "file", + ft if ft.is_symlink() => "symlink", + _ => "unknown", + }; + + let modified = meta.modified().ok().and_then(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .ok() + .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) + }); + + let accessed = meta.accessed().ok().and_then(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .ok() + .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) + }); + + let created = meta.created().ok().and_then(|t| { + t.duration_since(std::time::UNIX_EPOCH) + .ok() + .and_then(|d| Timestamp::from_second(d.as_secs() as i64).ok()) + }); + + FileDetail { + path: encoded_path, + file_type: ft, + size: Some(size), + mode, + modified, + accessed, + created, + } + } else { + FileDetail { + path: encoded_path, + file_type: "unknown", + size: None, + mode: None, + modified: None, + accessed: None, + created: None, + } + }; + match format { + OutputFormat::Jsonl => self.print_entry_json_obj(&detail), + OutputFormat::Plain => unreachable!("Plain format should not call print_entry_detail"), + } } } diff --git a/src/walk.rs b/src/walk.rs index 27f295db5..b0b3a8fb8 100644 --- a/src/walk.rs +++ b/src/walk.rs @@ -136,8 +136,6 @@ struct ReceiverBuffer<'a, W> { interrupt_flag: &'a AtomicBool, /// Receiver for worker results. rx: Receiver, - /// Standard output. - stdout: W, /// The current buffer mode. mode: ReceiverMode, /// The deadline to switch to streaming mode. @@ -146,9 +144,11 @@ struct ReceiverBuffer<'a, W> { buffer: Vec, /// Result count. num_results: usize, + /// The stdout printer instance. + printer: output::Printer<'a, W>, } -impl<'a, W: Write> ReceiverBuffer<'a, W> { +impl<'a, W: Write + 'a> ReceiverBuffer<'a, W> { /// Create a new receiver buffer. fn new(state: &'a WorkerState, rx: Receiver, stdout: W) -> Self { let config = &state.config; @@ -162,20 +162,20 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { quit_flag, interrupt_flag, rx, - stdout, mode: ReceiverMode::Buffering, deadline, buffer: Vec::with_capacity(MAX_BUFFER_LENGTH), num_results: 0, + printer: output::Printer::new(config, stdout), } } /// Process results until finished. fn process(&mut self) -> ExitCode { loop { - if let Err(ec) = self.poll() { + if let Err(err) = self.poll() { self.quit_flag.store(true, Ordering::Relaxed); - return ec; + return err; } } } @@ -250,7 +250,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Output a path. fn print(&mut self, entry: &DirEntry) -> Result<(), ExitCode> { - if let Err(e) = output::print_entry(&mut self.stdout, entry, self.config) + if let Err(e) = self.printer.print_entry(entry) && e.kind() != ::std::io::ErrorKind::BrokenPipe { print_error(format!("Could not write to output: {e}")); @@ -294,7 +294,7 @@ impl<'a, W: Write> ReceiverBuffer<'a, W> { /// Flush stdout if necessary. fn flush(&mut self) -> Result<(), ExitCode> { - if self.stdout.flush().is_err() { + if self.printer.flush().is_err() { // Probably a broken pipe. Exit gracefully. return Err(ExitCode::GeneralError); } diff --git a/tests/tests.rs b/tests/tests.rs index 6d477f662..c6aec16ea 100644 --- a/tests/tests.rs +++ b/tests/tests.rs @@ -2707,3 +2707,50 @@ fn test_hyperlink() { te.assert_output(&["--hyperlink=always", "a.foo"], &expected); } + +/// Test various output formats +#[test] +fn test_output_format() { + let te = TestEnv::new(DEFAULT_DIRS, DEFAULT_FILES); + + let re = te.assert_success_and_get_output(".", &["--json", "."]); + let stdout = String::from_utf8_lossy(&re.stdout); + let mut count = 0; + stdout.split("\n").for_each(|line| { + println!("line: {}", line); + if line.trim().is_empty() { + return; + } + let file: serde_json::Value = serde_json::from_str(line).unwrap(); + assert!(file.is_object() && file["path"].is_string()); + count += 1; + }); + + assert_eq!(count, DEFAULT_FILES.len() + DEFAULT_DIRS.len()); +} + +/// Filenames with invalid UTF-8 sequences +#[cfg(target_os = "linux")] +#[test] +fn test_output_format_invalid_utf8() { + use std::ffi::OsStr; + use std::os::unix::ffi::OsStrExt; + + let dirs = &["test1"]; + let files = &[]; + let te = TestEnv::new(dirs, files); + + fs::File::create( + te.test_root() + .join(OsStr::from_bytes(b"test1/test_\xFEinvalid.txt")), + ) + .unwrap(); + + let re = te.assert_success_and_get_output(".", &["", "--json", "test1/"]); + let stdout = String::from_utf8_lossy(&re.stdout); + let files: serde_json::Value = serde_json::from_str(&stdout).unwrap(); + assert!(files.is_object()); + assert_eq!(files["path_b64"], "dGVzdDEvdGVzdF/+aW52YWxpZC50eHQ="); + + te.assert_output(&["invalid", "test1/"], "test1/test_�invalid.txt"); +}