@@ -6,6 +6,7 @@ use std::sync::LazyLock;
66
77use anyhow:: Result ;
88use fancy_regex:: Regex ;
9+ use globset:: { Glob , GlobMatcher } ;
910use itertools:: Itertools ;
1011use prek_consts:: { ALT_CONFIG_FILE , CONFIG_FILE } ;
1112use rustc_hash:: FxHashMap ;
@@ -45,6 +46,12 @@ impl<'de> Deserialize<'de> for SerdeRegex {
4546 }
4647}
4748
49+ impl SerdeRegex {
50+ pub ( crate ) fn from_regex ( regex : Regex ) -> Self {
51+ Self ( regex)
52+ }
53+ }
54+
4855pub ( crate ) static CONFIG_FILE_REGEX : LazyLock < SerdeRegex > = LazyLock :: new ( || {
4956 let pattern = format ! (
5057 "^{}|{}$" ,
@@ -54,6 +61,103 @@ pub(crate) static CONFIG_FILE_REGEX: LazyLock<SerdeRegex> = LazyLock::new(|| {
5461 SerdeRegex ( Regex :: new ( & pattern) . expect ( "config regex must compile" ) )
5562} ) ;
5663
64+ #[ derive( Clone ) ]
65+ pub ( crate ) struct FilePattern {
66+ sources : Vec < String > ,
67+ kind : FilePatternKind ,
68+ }
69+
70+ #[ derive( Clone ) ]
71+ enum FilePatternKind {
72+ Regex ( Regex ) ,
73+ Glob ( Vec < GlobMatcher > ) ,
74+ }
75+
76+ impl FilePattern {
77+ pub ( crate ) fn is_match ( & self , text : & str ) -> bool {
78+ match & self . kind {
79+ FilePatternKind :: Regex ( regex) => regex. is_match ( text) . unwrap_or ( false ) ,
80+ FilePatternKind :: Glob ( globs) => globs. iter ( ) . any ( |g| g. is_match ( text) ) ,
81+ }
82+ }
83+
84+ pub ( crate ) fn from_regex ( regex : SerdeRegex ) -> Self {
85+ Self {
86+ sources : vec ! [ regex. as_str( ) . to_string( ) ] ,
87+ kind : FilePatternKind :: Regex ( regex. 0 ) ,
88+ }
89+ }
90+
91+ pub ( crate ) fn sources ( & self ) -> & [ String ] {
92+ & self . sources
93+ }
94+
95+ pub ( crate ) fn sources_display ( & self ) -> String {
96+ self . sources . join ( ", " )
97+ }
98+ }
99+
100+ impl std:: fmt:: Debug for FilePattern {
101+ fn fmt ( & self , f : & mut std:: fmt:: Formatter < ' _ > ) -> std:: fmt:: Result {
102+ let kind = match self . kind {
103+ FilePatternKind :: Regex ( _) => "Regex" ,
104+ FilePatternKind :: Glob ( _) => "Glob" ,
105+ } ;
106+ f. debug_struct ( "FilePattern" )
107+ . field ( "kind" , & kind)
108+ . field ( "sources" , & self . sources )
109+ . finish ( )
110+ }
111+ }
112+
113+ impl < ' de > Deserialize < ' de > for FilePattern {
114+ fn deserialize < D > ( deserializer : D ) -> Result < Self , D :: Error >
115+ where
116+ D : Deserializer < ' de > ,
117+ {
118+ #[ derive( Deserialize ) ]
119+ #[ serde( untagged) ]
120+ enum PatternWire {
121+ Glob { glob : String } ,
122+ GlobList { glob : Vec < String > } ,
123+ Regex ( String ) ,
124+ }
125+
126+ match PatternWire :: deserialize ( deserializer) ? {
127+ PatternWire :: Glob { glob } => Glob :: new ( & glob)
128+ . map ( |g| Self {
129+ sources : vec ! [ glob] ,
130+ kind : FilePatternKind :: Glob ( vec ! [ g. compile_matcher( ) ] ) ,
131+ } )
132+ . map_err ( serde:: de:: Error :: custom) ,
133+ PatternWire :: GlobList { glob } => {
134+ if glob. is_empty ( ) {
135+ return Err ( serde:: de:: Error :: custom ( "glob list cannot be empty" ) ) ;
136+ }
137+ let mut matchers = Vec :: with_capacity ( glob. len ( ) ) ;
138+ for g in & glob {
139+ matchers. push (
140+ Glob :: new ( g)
141+ . map_err ( serde:: de:: Error :: custom) ?
142+ . compile_matcher ( ) ,
143+ ) ;
144+ }
145+ Ok ( Self {
146+ sources : glob,
147+ kind : FilePatternKind :: Glob ( matchers) ,
148+ } )
149+ }
150+ PatternWire :: Regex ( pattern) => Regex :: new ( & pattern)
151+ . map ( |r| Self {
152+ sources : vec ! [ pattern] ,
153+ kind : FilePatternKind :: Regex ( r) ,
154+ } )
155+ . map_err ( serde:: de:: Error :: custom) ,
156+ }
157+ }
158+ }
159+
160+
57161#[ derive( Debug , Copy , Clone , PartialEq , Eq , Hash , Deserialize , Serialize , clap:: ValueEnum ) ]
58162#[ serde( rename_all = "snake_case" ) ]
59163pub enum Language {
@@ -252,10 +356,10 @@ pub(crate) struct HookOptions {
252356 /// Not documented in the official docs.
253357 pub alias : Option < String > ,
254358 /// The pattern of files to run on.
255- pub files : Option < SerdeRegex > ,
359+ pub files : Option < FilePattern > ,
256360 /// Exclude files that were matched by `files`.
257361 /// Default is `$^`, which matches nothing.
258- pub exclude : Option < SerdeRegex > ,
362+ pub exclude : Option < FilePattern > ,
259363 /// List of file types to run on (AND).
260364 /// Default is `[file]`, which matches all files.
261365 #[ serde( deserialize_with = "deserialize_and_validate_tags" , default ) ]
@@ -613,9 +717,9 @@ pub(crate) struct Config {
613717 /// Default to all stages.
614718 pub default_stages : Option < Vec < Stage > > ,
615719 /// Global file include pattern.
616- pub files : Option < SerdeRegex > ,
720+ pub files : Option < FilePattern > ,
617721 /// Global file exclude pattern.
618- pub exclude : Option < SerdeRegex > ,
722+ pub exclude : Option < FilePattern > ,
619723 /// Set to true to have prek stop running hooks after the first failure.
620724 /// Default is false.
621725 pub fail_fast : Option < bool > ,
@@ -877,6 +981,62 @@ mod tests {
877981 use super :: * ;
878982 use std:: io:: Write as _;
879983
984+ #[ test]
985+ fn parse_file_patterns_regex_and_glob ( ) {
986+ #[ derive( Debug , Deserialize ) ]
987+ struct Wrapper {
988+ files : FilePattern ,
989+ exclude : FilePattern ,
990+ }
991+
992+ let regex_yaml = indoc:: indoc! { r"
993+ files: ^src/
994+ exclude: ^target/
995+ " } ;
996+ let parsed: Wrapper = serde_yaml:: from_str ( regex_yaml) . expect ( "regex patterns should parse" ) ;
997+ match parsed. files . kind {
998+ FilePatternKind :: Regex ( _) => { }
999+ _ => panic ! ( "expected regex pattern" ) ,
1000+ }
1001+ assert ! ( parsed. files. is_match( "src/main.rs" ) ) ;
1002+ assert ! ( !parsed. files. is_match( "other/main.rs" ) ) ;
1003+ assert ! ( parsed. exclude. is_match( "target/debug/app" ) ) ;
1004+
1005+ let glob_yaml = indoc:: indoc! { r"
1006+ files:
1007+ glob: src/**/*.rs
1008+ exclude:
1009+ glob: target/**
1010+ " } ;
1011+ let parsed: Wrapper = serde_yaml:: from_str ( glob_yaml) . expect ( "glob patterns should parse" ) ;
1012+ match parsed. files . kind {
1013+ FilePatternKind :: Glob ( _) => { }
1014+ _ => panic ! ( "expected glob pattern" ) ,
1015+ }
1016+ assert ! ( parsed. files. is_match( "src/lib/main.rs" ) ) ;
1017+ assert ! ( !parsed. files. is_match( "src/lib/main.py" ) ) ;
1018+ assert ! ( parsed. exclude. is_match( "target/debug/app" ) ) ;
1019+ assert ! ( !parsed. exclude. is_match( "src/lib/main.rs" ) ) ;
1020+
1021+ let glob_list_yaml = indoc:: indoc! { r"
1022+ files:
1023+ glob:
1024+ - src/**/*.rs
1025+ - crates/**/src/**/*.rs
1026+ exclude:
1027+ glob:
1028+ - target/**
1029+ - dist/**
1030+ " } ;
1031+ let parsed: Wrapper =
1032+ serde_yaml:: from_str ( glob_list_yaml) . expect ( "glob list patterns should parse" ) ;
1033+ assert ! ( parsed. files. is_match( "src/lib/main.rs" ) ) ;
1034+ assert ! ( parsed. files. is_match( "crates/foo/src/lib.rs" ) ) ;
1035+ assert ! ( !parsed. files. is_match( "tests/main.rs" ) ) ;
1036+ assert ! ( parsed. exclude. is_match( "target/debug/app" ) ) ;
1037+ assert ! ( parsed. exclude. is_match( "dist/app" ) ) ;
1038+ }
1039+
8801040 #[ test]
8811041 fn parse_repos ( ) {
8821042 // Local hook should not have `rev`
0 commit comments