Skip to content

Commit f105455

Browse files
committed
feat: add glob list support for file patterns
1 parent db20094 commit f105455

File tree

9 files changed

+301
-57
lines changed

9 files changed

+301
-57
lines changed

CHANGELOG.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,11 @@
11
# Changelog
22

3+
## Unreleased
4+
5+
### Features
6+
7+
- Add glob pattern support for `files`/`exclude` (single glob or list) while retaining regex defaults.
8+
39
## 0.2.19
410

511
Released on 2025-11-26.

Cargo.lock

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,6 +84,7 @@ serde_json = { version = "1.0.132", features = ["unbounded_depth"] }
8484
serde_stacker = { version = "0.1.12" }
8585
serde_yaml = { version = "0.9.34" }
8686
shlex = { version = "1.3.0" }
87+
globset = { version = "0.4.18" }
8788
smallvec = { version = "1.15.1" }
8889
target-lexicon = { version = "0.13.0" }
8990
tempfile = { version = "3.13.0" }

docs/configuration.md

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,25 @@ For configuration details, refer to the official pre-commit docs:
2323
2424
## Prek specific configurations
2525
26+
### `files` / `exclude` patterns
27+
28+
Patterns remain regex by default (matching pre-commit), but you can also opt into globs:
29+
30+
```yaml
31+
repos:
32+
- repo: local
33+
hooks:
34+
- id: format-python
35+
entry: bash -c 'echo formatting "$@"' _
36+
language: system
37+
files:
38+
glob: src/**/*.py
39+
exclude:
40+
glob: src/vendor/**
41+
```
42+
43+
Both forms work at the top-level config and per-hook.
44+
2645
### `minimum_prek_version`
2746

2847
Specify the minimum required version of prek for the configuration. If the installed version is lower, prek will exit with an error.

src/cli/run/filter.rs

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,14 @@
11
use std::path::{Path, PathBuf};
22

33
use anyhow::{Context, Result};
4-
use fancy_regex::Regex;
54
use itertools::{Either, Itertools};
65
use path_clean::PathClean;
76
use prek_consts::env_vars::EnvVars;
87
use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
98
use rustc_hash::FxHashSet;
109
use tracing::{debug, error, instrument};
1110

12-
use crate::config::Stage;
11+
use crate::config::{FilePattern, Stage};
1312
use crate::git::GIT_ROOT;
1413
use crate::hook::Hook;
1514
use crate::identify::{TagSet, tags_from_path};
@@ -18,26 +17,26 @@ use crate::{fs, git, warn_user};
1817

1918
/// Filter filenames by include/exclude patterns.
2019
pub(crate) struct FilenameFilter<'a> {
21-
include: Option<&'a Regex>,
22-
exclude: Option<&'a Regex>,
20+
include: Option<&'a FilePattern>,
21+
exclude: Option<&'a FilePattern>,
2322
}
2423

2524
impl<'a> FilenameFilter<'a> {
26-
pub(crate) fn new(include: Option<&'a Regex>, exclude: Option<&'a Regex>) -> Self {
25+
pub(crate) fn new(include: Option<&'a FilePattern>, exclude: Option<&'a FilePattern>) -> Self {
2726
Self { include, exclude }
2827
}
2928

3029
pub(crate) fn filter(&self, filename: &Path) -> bool {
3130
let Some(filename) = filename.to_str() else {
3231
return false;
3332
};
34-
if let Some(re) = &self.include {
35-
if !re.is_match(filename).unwrap_or(false) {
33+
if let Some(pattern) = &self.include {
34+
if !pattern.is_match(filename) {
3635
return false;
3736
}
3837
}
39-
if let Some(re) = &self.exclude {
40-
if re.is_match(filename).unwrap_or(false) {
38+
if let Some(pattern) = &self.exclude {
39+
if pattern.is_match(filename) {
4140
return false;
4241
}
4342
}
@@ -95,10 +94,7 @@ impl<'a> FileFilter<'a> {
9594
where
9695
I: Iterator<Item = &'a PathBuf> + Send,
9796
{
98-
let filter = FilenameFilter::new(
99-
project.config().files.as_deref(),
100-
project.config().exclude.as_deref(),
101-
);
97+
let filter = FilenameFilter::new(project.config().files.as_ref(), project.config().exclude.as_ref());
10298

10399
let orphan = project.config().orphan.unwrap_or(false);
104100

@@ -163,7 +159,7 @@ impl<'a> FileFilter<'a> {
163159
#[instrument(level = "trace", skip_all, fields(hook = ?hook.id))]
164160
pub(crate) fn for_hook(&self, hook: &Hook) -> Vec<&Path> {
165161
// Filter by hook `files` and `exclude` patterns.
166-
let filter = FilenameFilter::new(hook.files.as_deref(), hook.exclude.as_deref());
162+
let filter = FilenameFilter::new(hook.files.as_ref(), hook.exclude.as_ref());
167163

168164
let filenames = self.filenames.par_iter().filter(|filename| {
169165
if let Ok(stripped) = filename.strip_prefix(self.filename_prefix) {

src/config.rs

Lines changed: 164 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ use std::sync::LazyLock;
66

77
use anyhow::Result;
88
use fancy_regex::Regex;
9+
use globset::{Glob, GlobMatcher};
910
use itertools::Itertools;
1011
use prek_consts::{ALT_CONFIG_FILE, CONFIG_FILE};
1112
use rustc_hash::FxHashMap;
@@ -45,6 +46,12 @@ impl<'de> Deserialize<'de> for SerdeRegex {
4546
}
4647
}
4748

49+
impl SerdeRegex {
50+
pub(crate) fn from_regex(regex: Regex) -> Self {
51+
Self(regex)
52+
}
53+
}
54+
4855
pub(crate) static CONFIG_FILE_REGEX: LazyLock<SerdeRegex> = LazyLock::new(|| {
4956
let pattern = format!(
5057
"^{}|{}$",
@@ -54,6 +61,103 @@ pub(crate) static CONFIG_FILE_REGEX: LazyLock<SerdeRegex> = LazyLock::new(|| {
5461
SerdeRegex(Regex::new(&pattern).expect("config regex must compile"))
5562
});
5663

64+
#[derive(Clone)]
65+
pub(crate) struct FilePattern {
66+
sources: Vec<String>,
67+
kind: FilePatternKind,
68+
}
69+
70+
#[derive(Clone)]
71+
enum FilePatternKind {
72+
Regex(Regex),
73+
Glob(Vec<GlobMatcher>),
74+
}
75+
76+
impl FilePattern {
77+
pub(crate) fn is_match(&self, text: &str) -> bool {
78+
match &self.kind {
79+
FilePatternKind::Regex(regex) => regex.is_match(text).unwrap_or(false),
80+
FilePatternKind::Glob(globs) => globs.iter().any(|g| g.is_match(text)),
81+
}
82+
}
83+
84+
pub(crate) fn from_regex(regex: SerdeRegex) -> Self {
85+
Self {
86+
sources: vec![regex.as_str().to_string()],
87+
kind: FilePatternKind::Regex(regex.0),
88+
}
89+
}
90+
91+
pub(crate) fn sources(&self) -> &[String] {
92+
&self.sources
93+
}
94+
95+
pub(crate) fn sources_display(&self) -> String {
96+
self.sources.join(", ")
97+
}
98+
}
99+
100+
impl std::fmt::Debug for FilePattern {
101+
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
102+
let kind = match self.kind {
103+
FilePatternKind::Regex(_) => "Regex",
104+
FilePatternKind::Glob(_) => "Glob",
105+
};
106+
f.debug_struct("FilePattern")
107+
.field("kind", &kind)
108+
.field("sources", &self.sources)
109+
.finish()
110+
}
111+
}
112+
113+
impl<'de> Deserialize<'de> for FilePattern {
114+
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
115+
where
116+
D: Deserializer<'de>,
117+
{
118+
#[derive(Deserialize)]
119+
#[serde(untagged)]
120+
enum PatternWire {
121+
Glob { glob: String },
122+
GlobList { glob: Vec<String> },
123+
Regex(String),
124+
}
125+
126+
match PatternWire::deserialize(deserializer)? {
127+
PatternWire::Glob { glob } => Glob::new(&glob)
128+
.map(|g| Self {
129+
sources: vec![glob],
130+
kind: FilePatternKind::Glob(vec![g.compile_matcher()]),
131+
})
132+
.map_err(serde::de::Error::custom),
133+
PatternWire::GlobList { glob } => {
134+
if glob.is_empty() {
135+
return Err(serde::de::Error::custom("glob list cannot be empty"));
136+
}
137+
let mut matchers = Vec::with_capacity(glob.len());
138+
for g in &glob {
139+
matchers.push(
140+
Glob::new(g)
141+
.map_err(serde::de::Error::custom)?
142+
.compile_matcher(),
143+
);
144+
}
145+
Ok(Self {
146+
sources: glob,
147+
kind: FilePatternKind::Glob(matchers),
148+
})
149+
}
150+
PatternWire::Regex(pattern) => Regex::new(&pattern)
151+
.map(|r| Self {
152+
sources: vec![pattern],
153+
kind: FilePatternKind::Regex(r),
154+
})
155+
.map_err(serde::de::Error::custom),
156+
}
157+
}
158+
}
159+
160+
57161
#[derive(Debug, Copy, Clone, PartialEq, Eq, Hash, Deserialize, Serialize, clap::ValueEnum)]
58162
#[serde(rename_all = "snake_case")]
59163
pub enum Language {
@@ -252,10 +356,10 @@ pub(crate) struct HookOptions {
252356
/// Not documented in the official docs.
253357
pub alias: Option<String>,
254358
/// The pattern of files to run on.
255-
pub files: Option<SerdeRegex>,
359+
pub files: Option<FilePattern>,
256360
/// Exclude files that were matched by `files`.
257361
/// Default is `$^`, which matches nothing.
258-
pub exclude: Option<SerdeRegex>,
362+
pub exclude: Option<FilePattern>,
259363
/// List of file types to run on (AND).
260364
/// Default is `[file]`, which matches all files.
261365
#[serde(deserialize_with = "deserialize_and_validate_tags", default)]
@@ -613,9 +717,9 @@ pub(crate) struct Config {
613717
/// Default to all stages.
614718
pub default_stages: Option<Vec<Stage>>,
615719
/// Global file include pattern.
616-
pub files: Option<SerdeRegex>,
720+
pub files: Option<FilePattern>,
617721
/// Global file exclude pattern.
618-
pub exclude: Option<SerdeRegex>,
722+
pub exclude: Option<FilePattern>,
619723
/// Set to true to have prek stop running hooks after the first failure.
620724
/// Default is false.
621725
pub fail_fast: Option<bool>,
@@ -877,6 +981,62 @@ mod tests {
877981
use super::*;
878982
use std::io::Write as _;
879983

984+
#[test]
985+
fn parse_file_patterns_regex_and_glob() {
986+
#[derive(Debug, Deserialize)]
987+
struct Wrapper {
988+
files: FilePattern,
989+
exclude: FilePattern,
990+
}
991+
992+
let regex_yaml = indoc::indoc! {r"
993+
files: ^src/
994+
exclude: ^target/
995+
"};
996+
let parsed: Wrapper = serde_yaml::from_str(regex_yaml).expect("regex patterns should parse");
997+
match parsed.files.kind {
998+
FilePatternKind::Regex(_) => {}
999+
_ => panic!("expected regex pattern"),
1000+
}
1001+
assert!(parsed.files.is_match("src/main.rs"));
1002+
assert!(!parsed.files.is_match("other/main.rs"));
1003+
assert!(parsed.exclude.is_match("target/debug/app"));
1004+
1005+
let glob_yaml = indoc::indoc! {r"
1006+
files:
1007+
glob: src/**/*.rs
1008+
exclude:
1009+
glob: target/**
1010+
"};
1011+
let parsed: Wrapper = serde_yaml::from_str(glob_yaml).expect("glob patterns should parse");
1012+
match parsed.files.kind {
1013+
FilePatternKind::Glob(_) => {}
1014+
_ => panic!("expected glob pattern"),
1015+
}
1016+
assert!(parsed.files.is_match("src/lib/main.rs"));
1017+
assert!(!parsed.files.is_match("src/lib/main.py"));
1018+
assert!(parsed.exclude.is_match("target/debug/app"));
1019+
assert!(!parsed.exclude.is_match("src/lib/main.rs"));
1020+
1021+
let glob_list_yaml = indoc::indoc! {r"
1022+
files:
1023+
glob:
1024+
- src/**/*.rs
1025+
- crates/**/src/**/*.rs
1026+
exclude:
1027+
glob:
1028+
- target/**
1029+
- dist/**
1030+
"};
1031+
let parsed: Wrapper =
1032+
serde_yaml::from_str(glob_list_yaml).expect("glob list patterns should parse");
1033+
assert!(parsed.files.is_match("src/lib/main.rs"));
1034+
assert!(parsed.files.is_match("crates/foo/src/lib.rs"));
1035+
assert!(!parsed.files.is_match("tests/main.rs"));
1036+
assert!(parsed.exclude.is_match("target/debug/app"));
1037+
assert!(parsed.exclude.is_match("dist/app"));
1038+
}
1039+
8801040
#[test]
8811041
fn parse_repos() {
8821042
// Local hook should not have `rev`

src/hook.rs

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,8 +14,8 @@ use thiserror::Error;
1414
use tracing::{error, trace};
1515

1616
use crate::config::{
17-
self, BuiltinHook, Config, HookOptions, Language, LocalHook, ManifestHook, MetaHook,
18-
RemoteHook, SerdeRegex, Stage, read_manifest,
17+
self, BuiltinHook, Config, FilePattern, HookOptions, Language, LocalHook, ManifestHook,
18+
MetaHook, RemoteHook, Stage, read_manifest,
1919
};
2020
use crate::languages::version::LanguageRequest;
2121
use crate::languages::{extract_metadata_from_entry, resolve_command};
@@ -418,8 +418,8 @@ pub(crate) struct Hook {
418418
pub entry: Entry,
419419
pub language: Language,
420420
pub alias: String,
421-
pub files: Option<SerdeRegex>,
422-
pub exclude: Option<SerdeRegex>,
421+
pub files: Option<FilePattern>,
422+
pub exclude: Option<FilePattern>,
423423
pub types: Vec<String>,
424424
pub types_or: Vec<String>,
425425
pub exclude_types: Vec<String>,

0 commit comments

Comments
 (0)