//
// Syd: rock-solid application kernel
// src/wildmatch.rs: Shell-style pattern matching
//
// Copyright (c) 2024, 2025, 2026 Ali Polatel <alip@chesswob.org>
// Based in part upon rsync's lib/wildmatch.c which is:
//   Written by Rich $alz, mirror!rs, Wed Nov 26 19:03:17 EST 1986.
//   Rich $alz is now <rsalz@bbn.com>.
//   Modified by Wayne Davison to special-case '/' matching, to make '**'
//   work differently than '*', and to fix the character-class code.
//   SPDX-License-Identifier: GPL-3.0-or-later
// Based in part upon Kirk Krauss's FastWildCompare algorithm which is:
//   Copyright 2018 IBM Corporation
//   See: https://www.developforperformance.com/MatchingWildcards_AnImprovedAlgorithmForBigData.html
//   SPDX-License-Identifier: Apache-2.0
//
// Changes by alip:
// - Ported to Rust.
// - Added SIMD support.
// - Intuitive matching for consecutive slashes separated by double
//   star, e.g. /usr/**/bin/bash matches /usr/bin/bash.
// - Rewrite using Kirk Krauss's FastWildCompare algorithm,
//   extended with dual backup points for '*' and '**' wildcards.
//
// SPDX-License-Identifier: GPL-3.0

// SAFETY: This module has been liberated from unsafe code!
// Tests call fnmatch(3) to compare.
#![cfg_attr(not(test), forbid(unsafe_code))]

use std::{borrow::Cow, cmp::Ordering};

use memchr::{
    arch::all::{is_equal, is_prefix},
    memchr, memchr2, memchr3, memmem,
};
use nix::NixPath;

use crate::{
    likely,
    path::{XPath, XPathBuf},
    unlikely,
};

/// Match methods
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
pub enum MatchMethod {
    /// Literal match
    Literal,
    /// Prefix match
    Prefix,
    /// Glob match
    Glob,
}

/// Return true if haystack contains the substring needle.
pub fn contains(haystack: &[u8], needle: &[u8]) -> bool {
    memmem::find(haystack, needle).is_some()
}

/// Apply matching according to given type and return result.
pub fn globmatch(pattern: &[u8], path: &[u8], method: MatchMethod) -> bool {
    match method {
        MatchMethod::Literal => litmatch(pattern, path),
        MatchMethod::Prefix => prematch(pattern, path),
        MatchMethod::Glob => wildmatch(pattern, path),
    }
}

/// Convenience for glob matching of names.
///
/// Pattern is prefixed and suffixed with the `*` character
/// for literal, non-glob patterns.
///
/// Matching is done case-insensitively.
pub fn inamematch(pattern: &str, name: &str) -> bool {
    let glob = if !is_literal(pattern.as_bytes()) {
        Cow::Borrowed(pattern)
    } else {
        Cow::Owned(format!("*{pattern}*"))
    };

    wildmatch(
        glob.to_ascii_lowercase().as_bytes(),
        name.to_ascii_lowercase().as_bytes(),
    )
}

/// Return true if the pattern contains none of '*', '?', or '[',
/// indicating a literal string rather than a glob pattern.
#[inline]
pub fn is_literal(pattern: &[u8]) -> bool {
    memchr3(b'*', b'?', b'[', pattern).is_none()
}

/// Return Some(prefix) if the pattern can be reduced to a substring match.
pub fn get_prefix(pattern: &XPath) -> Option<XPathBuf> {
    if pattern.ends_with(b"/***") {
        // 1. Extract prefix (remove the slash).
        // 2. Check if the prefix is a literal string.
        let len = pattern.len();
        let pre = &pattern.as_bytes()[..len - "/***".len()];
        if is_literal(pre) {
            return Some(pre.into());
        }
    } else if pattern.ends_with(b"/**") {
        // 1. Extract prefix (keep the slash!)
        // 2. Check if the prefix is a literal string.
        let len = pattern.len();
        let pre = &pattern.as_bytes()[..len - "**".len()];
        if is_literal(pre) {
            return Some(pre.into());
        }
    }

    None
}

/// Determine match method for a pattern and handle `/***` splitting.
///
/// Returns `(method, optional_split_pattern)`:
/// - `method`: The match method to use (Prefix, Literal, or Glob)
/// - `split`: For `/***` patterns, contains the intermediate `foo/**` pattern
///
/// For `/***` patterns: `foo/***` is split into `foo/**` (returned in Option)
/// and `foo` (mutated in place).
#[expect(clippy::disallowed_methods)]
pub fn get_match_method(pat: &mut XPathBuf) -> (MatchMethod, Option<XPathBuf>) {
    if let Some(prefix) = get_prefix(pat) {
        *pat = prefix;
        (MatchMethod::Prefix, None)
    } else if is_literal(pat.as_bytes()) {
        (MatchMethod::Literal, None)
    } else if pat.ends_with(b"/***") {
        // Pattern followed by triple star: split into `foo/**` and `foo`.
        let len = pat.len();
        let len0 = len.checked_sub(b"*".len()).unwrap();
        let len1 = len.checked_sub(b"/***".len()).unwrap();
        pat.truncate(len0); // foo/*** -> foo/**
        let split = pat.clone();
        pat.truncate(len1); // foo/*** -> foo
        (MatchMethod::Glob, Some(split))
    } else {
        (MatchMethod::Glob, None)
    }
}

/// Match the "pattern" against the "path" literally.
///
/// This function performs simple string matching.
///
/// # Arguments
///
/// * `pattern` - The literal string to match.
/// * `path` - The path to match against the pattern.
///
/// # Returns
///
/// * `true` if the path matches the pattern.
/// * `false` otherwise.
pub fn litmatch(pattern: &[u8], path: &[u8]) -> bool {
    is_equal(path, pattern)
}

/// Match the "pattern" against the "path" using prefix match.
///
/// This function performs simple substring matching.
///
/// # Arguments
///
/// * `pattern` - The prefix to match.
/// * `path` - The path to match against the pattern.
///
/// # Returns
///
/// * `true` if the path matches the pattern.
/// * `false` otherwise.
pub fn prematch(pattern: &[u8], path: &[u8]) -> bool {
    let len = pattern.len();
    let ord = path.len().cmp(&len);
    (ord == Ordering::Equal
        || (ord == Ordering::Greater && (pattern.last() == Some(&b'/') || path[len] == b'/')))
        && is_prefix(path, pattern)
}

/// Match the "pattern" against the "text".
///
/// This function performs shell-style pattern matching, supporting ?, \, [], and * characters.
/// It is 8-bit clean and has special handling for '/' characters and '**' patterns.
///
/// # Arguments
///
/// * `pattern` - The glob pattern to match.
/// * `text` - The text to match against the pattern.
///
/// # Returns
///
/// * `true` if the path matches the pattern.
/// * `false` otherwise.
// Two-loop algorithm based on Kirk Krauss's FastWildCompare.
// Loop 1: Fast path for literals until '*' is found.
// Loop 2: Handle wildcards with dual backup points.
#[expect(clippy::cognitive_complexity)]
pub fn wildmatch(pattern: &[u8], text: &[u8]) -> bool {
    // Loop 1: Match literals until a special character is found.
    let mut idx = 0;
    for (&p_ch, &t_ch) in pattern.iter().zip(text.iter()) {
        if unlikely(matches!(p_ch, b'*' | b'[' | b'\\')) {
            // Special character detected, enter 2nd loop.
            break;
        }
        if unlikely((p_ch != b'?' && p_ch != t_ch) || (p_ch != b'/' && t_ch == b'/')) {
            // ?: Match any single character except '/'.
            // x: Literal match.
            return false;
        }
        idx += 1;
    }

    // Handle cases where one ran out before the other or both exhausted.
    let p_len = pattern.len();
    let t_len = text.len();
    if unlikely(idx >= p_len) {
        // Pattern exhausted: Only match if text also exhausted.
        return idx >= t_len;
    }

    if likely(idx >= t_len) {
        // Text exhausted:
        // 1. Pattern must be all wildcards.
        // 2. Handle intuitive '/**/' where trailing '/' is optional.
        let mut p_idx = idx;
        while let Some(p_ch) = pattern.get(p_idx) {
            // Skip all consecutive '*' characters.
            if p_ch == &b'*' {
                p_idx += 1;
                while pattern.get(p_idx) == Some(&b'*') {
                    p_idx += 1;
                }
            } else {
                return false;
            }

            // If we encounter '/', check the previous two characters to be '**'.
            if pattern.get(p_idx) == Some(&b'/') {
                for n in 1..=2 {
                    if p_idx
                        .checked_sub(n)
                        .map(|idx| pattern.get(idx) != Some(&b'*'))
                        .unwrap_or(false)
                    {
                        return false;
                    }
                }
                p_idx += 1;
            }
        }
        return true;
    }

    // Loop 2: Handle wildcards with dual backup points.
    let mut p_idx = idx;
    let mut t_idx = idx;

    // Backup points for backtracking:
    // star:     Backup for '*' which stops at '/'.
    // globstar: Backup for '**' which crosses '/'.
    struct BackupPoint {
        // Index into the pattern.
        p_idx: usize,
        // Index into the text.
        t_idx: usize,
    }
    let mut star_p: Option<BackupPoint> = None;
    let mut globstar_p: Option<BackupPoint> = None;
    let mut globstar_anchored = false;

    loop {
        if let Some(&p_ch) = pattern.get(p_idx) {
            match p_ch {
                b'*' => {
                    // Check for '**'.
                    let is_double = pattern.get(p_idx + 1).map(|&b| b == b'*').unwrap_or(false);

                    if is_double {
                        p_idx += 2; // Skip both stars.

                        // Check for intuitive '/**/' matching:
                        // If pattern is '/**/' make the trailing '/' optional.
                        let anchored = p_idx
                            .checked_sub(3)
                            .map(|idx| {
                                pattern.get(idx) == Some(&b'/') && pattern.get(p_idx) == Some(&b'/')
                            })
                            .unwrap_or(false);

                        if anchored {
                            p_idx += 1; // Skip the trailing '/'.
                            globstar_anchored = true;
                        } else {
                            globstar_anchored = false;
                        }

                        // Set globstar backup point.
                        globstar_p = Some(BackupPoint { p_idx, t_idx });

                        // Clear single star: globstar takes over.
                        star_p = None;
                    } else {
                        p_idx += 1; // Skip the star.

                        // SIMD optimization: If next pattern char is a literal,
                        // use memchr2 to skip to its first occurrence or a '/'.
                        match pattern.get(p_idx).copied() {
                            None | Some(b'*' | b'?' | b'[' | b'\\') => {
                                // Set star backup point for non-literal next chars.
                                star_p = Some(BackupPoint { p_idx, t_idx });
                            }
                            Some(next_p) => {
                                // Find next_p or '/' in remaining text.
                                star_p = if let Some(skip) = memchr2(next_p, b'/', &text[t_idx..]) {
                                    if text[t_idx + skip] != b'/' {
                                        // Found the literal: Skip directly to it.
                                        t_idx += skip;
                                    }
                                    // Else we hit slash barrier: Single star can't cross.
                                    // Set backup point and let backtracking handle it.
                                    Some(BackupPoint { p_idx, t_idx })
                                } else if globstar_p.is_some() {
                                    // globstar: Fall through to backtracking.
                                    Some(BackupPoint { p_idx, t_idx })
                                } else {
                                    // Literal not found, no slash, and no globstar.
                                    return false;
                                };
                                continue;
                            }
                        }
                    }

                    // Check for trailing wildcard.
                    if p_idx < p_len {
                        continue;
                    }

                    // '**' matches everything.
                    if is_double {
                        return true;
                    }

                    // '*' at end: Check if no more '/' in text.
                    // If there is a '/', fall through to try globstar backtracking.
                    if memchr(b'/', &text[t_idx..]).is_none() {
                        return true;
                    }

                    // There's a '/': If no globstar backup, fail.
                    // Otherwise fall through to backtracking.
                    if globstar_p.is_none() {
                        return false;
                    }
                    // Fall through to backtracking below.
                }

                b'?' => {
                    if text.get(t_idx).map(|&b| b != b'/').unwrap_or(false) {
                        p_idx += 1;
                        t_idx += 1;
                        continue;
                    }
                }

                b'[' => match text.get(t_idx) {
                    None | Some(&b'/') => {}
                    Some(&t_ch) => {
                        if let Some(new_p) = classmatch(pattern, p_idx + 1, t_ch) {
                            p_idx = new_p;
                            t_idx += 1;
                            continue;
                        }
                    }
                },

                b'\\' => {
                    p_idx += 1;
                    if pattern
                        .get(p_idx)
                        .map(|p_ch| text.get(t_idx) == Some(p_ch))
                        .unwrap_or(false)
                    {
                        p_idx += 1;
                        t_idx += 1;
                        continue;
                    }
                }

                // Literal match.
                _ => {
                    if text.get(t_idx) == Some(&p_ch) {
                        p_idx += 1;
                        t_idx += 1;
                        continue;
                    }
                }
            }
        }

        // Check if both exhausted which means success.
        if p_idx >= p_len && t_idx >= t_len {
            return true;
        }

        // Check for trailing wildcards in pattern.
        if t_idx >= t_len {
            while matches!(pattern.get(p_idx), Some(&b'*')) {
                p_idx += 1;
            }
            return p_idx >= p_len;
        }

        // Mismatch: Try backtracking.
        //
        // Step 1: Try single star '*' first, bounded by '/'.
        if let Some(BackupPoint {
            p_idx: sp,
            t_idx: st,
        }) = star_p
        {
            if text.get(st).map(|&b| b != b'/').unwrap_or(false) {
                p_idx = sp;
                t_idx = st + 1;
                star_p = Some(BackupPoint { p_idx, t_idx });
                continue;
            }
            // Hit '/' - star is exhausted: Fall through to globstar.
        }

        // Step 2: Try globstar '**' which crosses '/'.
        if let Some(BackupPoint {
            p_idx: gsp,
            t_idx: gst,
        }) = globstar_p
        {
            if gst < t_len {
                if globstar_anchored {
                    // Must resume at component boundary: Find next '/'.
                    if let Some(pos) = memchr(b'/', &text[gst..]) {
                        p_idx = gsp;
                        t_idx = gst + pos + 1;
                        star_p = None;
                        globstar_p = Some(BackupPoint { p_idx, t_idx });
                        continue;
                    }
                    // No more slashes: globstar is exhausted.
                } else {
                    // Non-anchored: Resume at any position.
                    p_idx = gsp;
                    t_idx = gst + 1;
                    star_p = None;
                    globstar_p = Some(BackupPoint { p_idx, t_idx });
                    continue;
                }
            }
        }

        // No backtracking options left.
        return false;
    }
}

// Match a character class [...]
// Returns the pattern index after ']' if matched, None otherwise.
#[expect(clippy::cognitive_complexity)]
#[inline]
fn classmatch(pattern: &[u8], mut p_idx: usize, t_ch: u8) -> Option<usize> {
    let mut matched = false;
    let mut negated = false;
    let mut prev_ch: u8 = 0;
    let mut first = true;

    loop {
        let p_ch = if let Some(&p_ch) = pattern.get(p_idx) {
            p_ch
        } else {
            // Unclosed bracker
            return None;
        };

        // Check for negation.
        if unlikely(first && !negated && matches!(p_ch, NEGATE_CLASS | NEGATE_CLASS2)) {
            negated = true;
            p_idx += 1;
            continue;
        }

        // Handle ']' - closes the class unless it's at the start.
        if unlikely(p_ch == b']' && !first) {
            break;
        }

        first = false;
        match p_ch {
            b'\\' => {
                // Escaped character
                p_idx += 1;
                let escaped = if let Some(&escaped) = pattern.get(p_idx) {
                    escaped
                } else {
                    return None;
                };
                if escaped == t_ch {
                    matched = true;
                }
                prev_ch = escaped;
                p_idx += 1;
            }
            b'-' if prev_ch != 0 && pattern.get(p_idx + 1).map(|&b| b != b']').unwrap_or(false) => {
                // Character range: prev_ch - next_ch
                p_idx += 1;
                let mut range_end = pattern[p_idx];
                if range_end == b'\\' {
                    p_idx += 1;
                    range_end = if let Some(&ch) = pattern.get(p_idx) {
                        ch
                    } else {
                        return None;
                    };
                }
                if t_ch >= prev_ch && t_ch <= range_end {
                    matched = true;
                }
                p_idx += 1;
                prev_ch = 0; // Reset after range.
            }
            b'[' if pattern.get(p_idx + 1).map(|&b| b == b':').unwrap_or(false) => {
                // POSIX character class [:name:]
                p_idx += 2;
                let class_start = p_idx;

                // Find closing ':]'.
                while let Some(ch) = pattern.get(p_idx) {
                    if ch == &b':' && pattern.get(p_idx + 1) == Some(&b']') {
                        break;
                    }
                    p_idx += 1;
                }

                if unlikely(pattern.get(p_idx).map(|&b| b != b':').unwrap_or(true)) {
                    // Didn't find ':', treat '[' as literal.
                    p_idx = class_start - 2;
                    if pattern[p_idx] == t_ch {
                        matched = true;
                    }
                    prev_ch = b'[';
                    p_idx += 1;
                    continue;
                }

                // Look up POSIX class and call respective closure.
                let class_name = &pattern[class_start..p_idx];
                if let Ok(pos) = POSIX_CLASSES.binary_search_by(|(name, _)| name.cmp(&class_name)) {
                    if POSIX_CLASSES[pos].1(t_ch) {
                        matched = true;
                    }
                } else {
                    // Unknown POSIX class.
                    return None;
                }

                p_idx += 2; // Skip ':]'
                prev_ch = 0; // Reset after character class.
            }
            _ => {
                // Literal character in class.
                if p_ch == t_ch {
                    matched = true;
                }
                p_idx += 1;
                prev_ch = p_ch;
            }
        }
    }

    // p_idx is now at ']'.
    if matched != negated {
        Some(p_idx + 1)
    } else {
        None
    }
}

const NEGATE_CLASS: u8 = b'!';
const NEGATE_CLASS2: u8 = b'^';

// Supported POSIX classes.
// This array must be sorted by name for binary search.
#[expect(clippy::type_complexity)]
const POSIX_CLASSES: &[(&[u8], fn(u8) -> bool)] = &[
    (b"alnum", |c| c.is_ascii_alphanumeric()),
    (b"alpha", |c| c.is_ascii_alphabetic()),
    (b"blank", |c| matches!(c, b' ' | b'\t')),
    (b"cntrl", |c| c.is_ascii_control()),
    (b"digit", |c| c.is_ascii_digit()),
    (b"graph", |c| c.is_ascii_graphic()),
    (b"lower", |c| c.is_ascii_lowercase()),
    (b"print", |c| c.is_ascii() && !c.is_ascii_control()),
    (b"punct", |c| c.is_ascii_punctuation()),
    (b"space", |c| c.is_ascii_whitespace()),
    (b"upper", |c| c.is_ascii_uppercase()),
    (b"xdigit", |c| c.is_ascii_hexdigit()),
];

#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_litmatch() {
        assert!(litmatch(b"", b""));
        assert!(litmatch(b"p", b"p"));
        assert!(!litmatch(b"p", b"P"));
        assert!(litmatch(b"/usr", b"/usr"));
        assert!(!litmatch(b"/usr", b"/usr/"));
    }

    #[test]
    fn test_prematch() {
        assert!(prematch(b"", b""));
        assert!(prematch(b"p", b"p"));
        assert!(!prematch(b"p", b"P"));
        assert!(prematch(b"/usr", b"/usr"));
        assert!(prematch(b"/usr", b"/usr/"));
        assert!(prematch(b"/usr", b"/usr/bin"));
        assert!(!prematch(b"/usr", b"/usra"));
        assert!(!prematch(b"/usr", b"/usra/bin"));
    }

    #[test]
    fn test_wildmatch() {
        use std::io::BufRead;

        let data = include_bytes!("wildtest.txt.xz");
        let decoder = xz2::read::XzDecoder::new(&data[..]);
        let reader = std::io::BufReader::new(decoder);

        let mut failures = 0;
        let mut test_cnt = 0;

        for (index, line) in reader.lines().enumerate() {
            let line = line.expect("Failed to read line from wildtest.txt.xz");
            let line_bytes = line.as_bytes();
            let line_num = index + 1;

            if line_bytes.starts_with(&[b'#'])
                || line_bytes.iter().all(|&b| b == b' ' || b == b'\t')
                || line.is_empty()
            {
                continue;
            }

            if let Some((expected, fnmatch_same, text, pattern)) = parse_test_line(line_bytes) {
                test_cnt += 1;
                if let Err(err) = run_wildtest(line_num, expected, fnmatch_same, text, pattern) {
                    eprintln!("FAIL[{test_cnt}]\t{err}");
                    if !err.contains("fnmatch") {
                        failures += 1;
                    }
                }
            } else {
                unreachable!("BUG: Fix test at line {test_cnt}: {line}!");
            }
        }

        if failures > 0 {
            panic!("{failures} out of {test_cnt} tests failed.");
        }
    }

    /// Parse a test line without allocating intermediate structures.
    /// Returns (expected, fnmatch_same, text, pattern) if valid, None otherwise.
    fn parse_test_line(line: &[u8]) -> Option<(bool, bool, &[u8], &[u8])> {
        let mut parts = [&b""[..]; 4];
        let mut part_idx = 0;
        let mut i = 0;

        while i < line.len() && part_idx < 4 {
            // Skip whitespace
            while i < line.len() && matches!(line[i], b' ' | b'\t') {
                i += 1;
            }
            if i >= line.len() {
                break;
            }

            // Check for quoted section
            if matches!(line[i], b'\'' | b'"' | b'`') {
                let quote = line[i];
                i += 1;
                let start = i;
                while i < line.len() && line[i] != quote {
                    i += 1;
                }
                parts[part_idx] = &line[start..i];
                if i < line.len() {
                    i += 1; // Skip closing quote
                }
            } else {
                // Unquoted section
                let start = i;
                while i < line.len() && !matches!(line[i], b' ' | b'\t') {
                    i += 1;
                }
                parts[part_idx] = &line[start..i];
            }
            part_idx += 1;
        }

        if part_idx >= 4 {
            let expected = parts[0].first() == Some(&b'1');
            let fnmatch_same = parts[1].first() == Some(&b'1');
            Some((expected, fnmatch_same, parts[2], parts[3]))
        } else {
            None
        }
    }

    fn run_wildtest(
        line: usize,
        expected: bool,
        fnmatch_same: bool,
        text: &[u8],
        pattern: &[u8],
    ) -> Result<(), String> {
        let result = wildmatch(pattern, text);
        if result != expected {
            let text = String::from_utf8_lossy(text);
            let pattern = String::from_utf8_lossy(pattern);
            let msg = format!(
                "[!] Test failed on line {line}: text='{text}', pattern='{pattern}', expected={expected}, got={result}",
            );
            return Err(msg);
        }

        let fn_result = fnmatch(pattern, text);
        let same = fn_result == result;
        if same != fnmatch_same {
            let text = String::from_utf8_lossy(text);
            let pattern = String::from_utf8_lossy(pattern);
            let msg = format!(
                "[!] fnmatch divergence on line {line}: text='{text}', pattern='{pattern}', wildmatch={result}, fnmatch={fn_result}, expected_same={fnmatch_same}",
            );
            return Err(msg);
        }

        Ok(())
    }

    fn fnmatch(pat: &[u8], input: &[u8]) -> bool {
        pat.with_nix_path(|pat_cstr| {
            input.with_nix_path(|input_cstr| {
                let flags = libc::FNM_PATHNAME | libc::FNM_NOESCAPE | libc::FNM_PERIOD;
                // SAFETY: FFI call to fnmatch(3)
                unsafe { libc::fnmatch(pat_cstr.as_ptr(), input_cstr.as_ptr(), flags) == 0 }
            })
        })
        .map(|res| res.unwrap())
        .unwrap()
    }
}
