Skip to content

Commit 5ae43ab

Browse files
committed
Use lazy_regex for compile time regex checking (#996)
1 parent 4a864b3 commit 5ae43ab

File tree

9 files changed

+73
-42
lines changed

9 files changed

+73
-42
lines changed

Cargo.lock

Lines changed: 24 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,7 @@ ignore = { version = "0.4.23" }
5757
indicatif = { version = "0.18.0" }
5858
indoc = { version = "2.0.5" }
5959
itertools = { version = "0.14.0" }
60+
lazy-regex = { version = "3.4.2" }
6061
levenshtein = { version = "1.0.5" }
6162
# Enable static linking for liblzma
6263
# This is required for the `xz` feature in `async-compression`

src/cli/auto_update.rs

Lines changed: 4 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -5,9 +5,9 @@ use std::process::Stdio;
55
use anyhow::{Context, Result};
66
use bstr::ByteSlice;
77
use constants::MANIFEST_FILE;
8-
use fancy_regex::Regex;
98
use futures::StreamExt;
109
use itertools::Itertools;
10+
use lazy_regex::regex;
1111
use owo_colors::OwoColorize;
1212
use rustc_hash::FxHashMap;
1313
use rustc_hash::FxHashSet;
@@ -350,14 +350,13 @@ async fn write_new_config(path: &Path, revisions: &[Option<Revision>]) -> Result
350350
.map(ToString::to_string)
351351
.collect::<Vec<_>>();
352352

353-
let rev_regex = Regex::new(r#"^(\s+)rev:(\s*)(['"]?)([^\s#]+)(.*)(\r?\n)$"#)
354-
.expect("Failed to compile regex");
353+
let rev_regex = regex!(r#"^(\s+)rev:(\s*)(['"]?)([^\s#]+)(.*)(\r?\n)$"#);
355354

356355
let rev_lines = lines
357356
.iter()
358357
.enumerate()
359358
.filter_map(|(line_no, line)| {
360-
if let Ok(true) = rev_regex.is_match(line) {
359+
if rev_regex.is_match(line) {
361360
Some(line_no)
362361
} else {
363362
None
@@ -394,8 +393,7 @@ async fn write_new_config(path: &Path, revisions: &[Option<Revision>]) -> Result
394393

395394
let caps = rev_regex
396395
.captures(&lines[*line_no])
397-
.expect("Invalid regex")
398-
.expect("Failed to capture revision line");
396+
.context("Failed to capture rev line")?;
399397

400398
let comment = if let Some(frozen) = &revision.frozen {
401399
format!(" # frozen: {frozen}")

src/config.rs

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2,11 +2,11 @@ use std::fmt::Display;
22
use std::ops::{Deref, RangeInclusive};
33
use std::path::Path;
44
use std::str::FromStr;
5-
use std::sync::OnceLock;
65

76
use anyhow::Result;
87
use constants::{ALT_CONFIG_FILE, CONFIG_FILE};
9-
use fancy_regex::{self as regex, Regex};
8+
use fancy_regex::Regex;
9+
use lazy_regex::regex;
1010
use rustc_hash::FxHashMap;
1111
use serde::{Deserialize, Deserializer, Serialize, Serializer};
1212
use serde_yaml::Value;
@@ -524,8 +524,8 @@ impl<'de> Deserialize<'de> for MetaHook {
524524
files: Some(
525525
Regex::new(&format!(
526526
"^{}|{}$",
527-
regex::escape(CONFIG_FILE),
528-
regex::escape(ALT_CONFIG_FILE)
527+
fancy_regex::escape(CONFIG_FILE),
528+
fancy_regex::escape(ALT_CONFIG_FILE)
529529
))
530530
.map(SerdeRegex)
531531
.unwrap(),
@@ -542,8 +542,8 @@ impl<'de> Deserialize<'de> for MetaHook {
542542
files: Some(
543543
Regex::new(&format!(
544544
"^{}|{}$",
545-
regex::escape(CONFIG_FILE),
546-
regex::escape(ALT_CONFIG_FILE)
545+
fancy_regex::escape(CONFIG_FILE),
546+
fancy_regex::escape(ALT_CONFIG_FILE)
547547
))
548548
.map(SerdeRegex)
549549
.unwrap(),
@@ -812,9 +812,7 @@ pub fn read_manifest(path: &Path) -> Result<Manifest, Error> {
812812

813813
/// Check if a string looks like a git SHA
814814
fn looks_like_sha(s: &str) -> bool {
815-
static SHA_RE: OnceLock<Regex> = OnceLock::new();
816-
let re = SHA_RE.get_or_init(|| Regex::new(r"^[a-fA-F0-9]+$").unwrap());
817-
re.is_match(s).unwrap_or(false)
815+
regex!(r"^[a-fA-F0-9]+$").is_match(s)
818816
}
819817

820818
/// Deserializes a vector of strings and validates that each is a known file type tag.

src/languages/docker.rs

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@ use std::path::Path;
66
use std::sync::{Arc, LazyLock};
77

88
use anyhow::{Context, Result};
9-
use fancy_regex::Regex;
9+
use lazy_regex::regex;
1010
use tracing::trace;
1111

1212
use crate::cli::reporter::HookInstallReporter;
@@ -121,9 +121,9 @@ impl Docker {
121121
/// <https://stackoverflow.com/questions/20995351/how-can-i-get-docker-linux-container-information-from-within-the-container-itsel>
122122
fn current_container_id() -> Result<String> {
123123
// Adapted from https://github.com/open-telemetry/opentelemetry-java-instrumentation/pull/7167/files
124-
let regex = Regex::new(r".*/docker/containers/([0-9a-f]{64})/.*").expect("invalid regex");
124+
let regex = regex!(r".*/docker/containers/([0-9a-f]{64})/.*");
125125
let cgroup_path = fs::read_to_string("/proc/self/cgroup")?;
126-
let Some(captures) = regex.captures(&cgroup_path)? else {
126+
let Some(captures) = regex.captures(&cgroup_path) else {
127127
anyhow::bail!("Failed to get container id: no match found");
128128
};
129129
let Some(id) = captures.get(1).map(|m| m.as_str().to_string()) else {

src/languages/golang/gomod.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,7 @@
1+
use anyhow::Result;
2+
3+
use crate::hook::Hook;
4+
5+
pub(crate) async fn extract_go_mod_metadata(_hook: &mut Hook) -> Result<()> {
6+
Ok(())
7+
}

src/languages/golang/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
#[allow(clippy::module_inception)]
22
mod golang;
3+
mod gomod;
34
mod installer;
45
mod version;
56

67
pub(crate) use golang::Golang;
8+
pub(crate) use gomod::extract_go_mod_metadata;
79
pub(crate) use version::GoRequest;

src/languages/mod.rs

Lines changed: 21 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,9 @@ trait LanguageImpl {
4949
store: &Store,
5050
reporter: &HookInstallReporter,
5151
) -> Result<InstalledHook>;
52+
5253
async fn check_health(&self, info: &InstallInfo) -> Result<()>;
54+
5355
async fn run(
5456
&self,
5557
hook: &InstalledHook,
@@ -227,40 +229,35 @@ impl Language {
227229
}
228230

229231
/// Try to extract metadata from the given hook entry if possible.
230-
///
231-
/// Currently, only PEP 723 inline metadata for `python` hooks is supported.
232-
/// First part of `entry` must be a file path to the Python script.
233-
/// Effectively, we are implementing a new `python-script` language which works like `script`.
234-
/// But we don't want to introduce a new language just for this for now.
235232
pub(crate) async fn extract_metadata_from_entry(hook: &mut Hook) -> Result<()> {
236-
// Only support `python` hooks for now.
237-
if hook.language == Language::Python {
238-
return python::extract_pep723_metadata(hook).await;
233+
match hook.language {
234+
Language::Python => python::extract_pep723_metadata(hook).await,
235+
Language::Golang => golang::extract_go_mod_metadata(hook).await,
236+
_ => Ok(()),
239237
}
240-
241-
Ok(())
242238
}
243239

244-
pub(crate) fn resolve_command(mut cmds: Vec<String>, env_path: Option<&OsStr>) -> Vec<String> {
245-
let cmd = &cmds[0];
246-
let exe_path = match which::which_in(cmd, env_path, &*CWD) {
240+
/// Resolve the actual process invocation, honoring shebangs and PATH lookups.
241+
pub(crate) fn resolve_command(mut cmds: Vec<String>, paths: Option<&OsStr>) -> Vec<String> {
242+
let candidate = &cmds[0];
243+
let resolved_binary = match which::which_in(candidate, paths, &*CWD) {
247244
Ok(p) => p,
248-
Err(_) => PathBuf::from(cmd),
245+
Err(_) => PathBuf::from(candidate),
249246
};
250-
trace!("Resolved command: {}", exe_path.display());
247+
trace!("Resolved command: {}", resolved_binary.display());
251248

252-
if let Ok(mut interpreter) = parse_shebang(&exe_path) {
253-
trace!("Found shebang: {:?}", interpreter);
249+
if let Ok(mut shebang_argv) = parse_shebang(&resolved_binary) {
250+
trace!("Found shebang: {:?}", shebang_argv);
254251
// Resolve the interpreter path, convert "python3" to "python3.exe" on Windows
255-
if let Ok(p) = which::which_in(&interpreter[0], env_path, &*CWD) {
256-
interpreter[0] = p.to_string_lossy().to_string();
257-
trace!("Resolved interpreter: {}", &interpreter[0]);
252+
if let Ok(p) = which::which_in(&shebang_argv[0], paths, &*CWD) {
253+
shebang_argv[0] = p.to_string_lossy().to_string();
254+
trace!("Resolved interpreter: {}", &shebang_argv[0]);
258255
}
259-
interpreter.push(exe_path.to_string_lossy().to_string());
260-
interpreter.extend_from_slice(&cmds[1..]);
261-
interpreter
256+
shebang_argv.push(resolved_binary.to_string_lossy().to_string());
257+
shebang_argv.extend_from_slice(&cmds[1..]);
258+
shebang_argv
262259
} else {
263-
cmds[0] = exe_path.to_string_lossy().to_string();
260+
cmds[0] = resolved_binary.to_string_lossy().to_string();
264261
cmds
265262
}
266263
}

src/languages/python/pep723.rs

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -251,6 +251,10 @@ impl ScriptTag {
251251
}
252252
}
253253

254+
/// Extract PEP 723 inline metadata for `python` hooks.
255+
/// First part of `entry` must be a file path to the Python script.
256+
/// Effectively, we are implementing a new `python-script` language which works like `script`.
257+
/// But we don't want to introduce a new language just for this for now.
254258
pub(crate) async fn extract_pep723_metadata(hook: &mut Hook) -> Result<()> {
255259
if !hook.additional_dependencies.is_empty() {
256260
trace!(

0 commit comments

Comments
 (0)