475 lines
13 KiB
Go
475 lines
13 KiB
Go
|
package patternmatcher
|
||
|
|
||
|
import (
|
||
|
"errors"
|
||
|
"os"
|
||
|
"path/filepath"
|
||
|
"regexp"
|
||
|
"strings"
|
||
|
"text/scanner"
|
||
|
"unicode/utf8"
|
||
|
)
|
||
|
|
||
|
// escapeBytes is a bitmap used to check whether a character should be escaped when creating the regex.
|
||
|
var escapeBytes [8]byte
|
||
|
|
||
|
// shouldEscape reports whether a rune should be escaped as part of the regex.
|
||
|
//
|
||
|
// This only includes characters that require escaping in regex but are also NOT valid filepath pattern characters.
|
||
|
// Additionally, '\' is not excluded because there is specific logic to properly handle this, as it's a path separator
|
||
|
// on Windows.
|
||
|
//
|
||
|
// Adapted from regexp::QuoteMeta in go stdlib.
|
||
|
// See https://cs.opensource.google/go/go/+/refs/tags/go1.17.2:src/regexp/regexp.go;l=703-715;drc=refs%2Ftags%2Fgo1.17.2
|
||
|
func shouldEscape(b rune) bool {
|
||
|
return b < utf8.RuneSelf && escapeBytes[b%8]&(1<<(b/8)) != 0
|
||
|
}
|
||
|
|
||
|
func init() {
|
||
|
for _, b := range []byte(`.+()|{}$`) {
|
||
|
escapeBytes[b%8] |= 1 << (b / 8)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
// PatternMatcher allows checking paths against a list of patterns
|
||
|
type PatternMatcher struct {
|
||
|
patterns []*Pattern
|
||
|
exclusions bool
|
||
|
}
|
||
|
|
||
|
// New creates a new matcher object for specific patterns that can
|
||
|
// be used later to match against patterns against paths
|
||
|
func New(patterns []string) (*PatternMatcher, error) {
|
||
|
pm := &PatternMatcher{
|
||
|
patterns: make([]*Pattern, 0, len(patterns)),
|
||
|
}
|
||
|
for _, p := range patterns {
|
||
|
// Eliminate leading and trailing whitespace.
|
||
|
p = strings.TrimSpace(p)
|
||
|
if p == "" {
|
||
|
continue
|
||
|
}
|
||
|
p = filepath.Clean(p)
|
||
|
newp := &Pattern{}
|
||
|
if p[0] == '!' {
|
||
|
if len(p) == 1 {
|
||
|
return nil, errors.New("illegal exclusion pattern: \"!\"")
|
||
|
}
|
||
|
newp.exclusion = true
|
||
|
p = p[1:]
|
||
|
pm.exclusions = true
|
||
|
}
|
||
|
// Do some syntax checking on the pattern.
|
||
|
// filepath's Match() has some really weird rules that are inconsistent
|
||
|
// so instead of trying to dup their logic, just call Match() for its
|
||
|
// error state and if there is an error in the pattern return it.
|
||
|
// If this becomes an issue we can remove this since its really only
|
||
|
// needed in the error (syntax) case - which isn't really critical.
|
||
|
if _, err := filepath.Match(p, "."); err != nil {
|
||
|
return nil, err
|
||
|
}
|
||
|
newp.cleanedPattern = p
|
||
|
newp.dirs = strings.Split(p, string(os.PathSeparator))
|
||
|
pm.patterns = append(pm.patterns, newp)
|
||
|
}
|
||
|
return pm, nil
|
||
|
}
|
||
|
|
||
|
// Matches returns true if "file" matches any of the patterns
|
||
|
// and isn't excluded by any of the subsequent patterns.
|
||
|
//
|
||
|
// The "file" argument should be a slash-delimited path.
|
||
|
//
|
||
|
// Matches is not safe to call concurrently.
|
||
|
//
|
||
|
// Deprecated: This implementation is buggy (it only checks a single parent dir
|
||
|
// against the pattern) and will be removed soon. Use either
|
||
|
// MatchesOrParentMatches or MatchesUsingParentResults instead.
|
||
|
func (pm *PatternMatcher) Matches(file string) (bool, error) {
|
||
|
matched := false
|
||
|
file = filepath.FromSlash(file)
|
||
|
parentPath := filepath.Dir(file)
|
||
|
parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
|
||
|
|
||
|
for _, pattern := range pm.patterns {
|
||
|
// Skip evaluation if this is an inclusion and the filename
|
||
|
// already matched the pattern, or it's an exclusion and it has
|
||
|
// not matched the pattern yet.
|
||
|
if pattern.exclusion != matched {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
match, err := pattern.match(file)
|
||
|
if err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
if !match && parentPath != "." {
|
||
|
// Check to see if the pattern matches one of our parent dirs.
|
||
|
if len(pattern.dirs) <= len(parentPathDirs) {
|
||
|
match, _ = pattern.match(strings.Join(parentPathDirs[:len(pattern.dirs)], string(os.PathSeparator)))
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if match {
|
||
|
matched = !pattern.exclusion
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return matched, nil
|
||
|
}
|
||
|
|
||
|
// MatchesOrParentMatches returns true if "file" matches any of the patterns
|
||
|
// and isn't excluded by any of the subsequent patterns.
|
||
|
//
|
||
|
// The "file" argument should be a slash-delimited path.
|
||
|
//
|
||
|
// Matches is not safe to call concurrently.
|
||
|
func (pm *PatternMatcher) MatchesOrParentMatches(file string) (bool, error) {
|
||
|
matched := false
|
||
|
file = filepath.FromSlash(file)
|
||
|
parentPath := filepath.Dir(file)
|
||
|
parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
|
||
|
|
||
|
for _, pattern := range pm.patterns {
|
||
|
// Skip evaluation if this is an inclusion and the filename
|
||
|
// already matched the pattern, or it's an exclusion and it has
|
||
|
// not matched the pattern yet.
|
||
|
if pattern.exclusion != matched {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
match, err := pattern.match(file)
|
||
|
if err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
if !match && parentPath != "." {
|
||
|
// Check to see if the pattern matches one of our parent dirs.
|
||
|
for i := range parentPathDirs {
|
||
|
match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
|
||
|
if match {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if match {
|
||
|
matched = !pattern.exclusion
|
||
|
}
|
||
|
}
|
||
|
|
||
|
return matched, nil
|
||
|
}
|
||
|
|
||
|
// MatchesUsingParentResult returns true if "file" matches any of the patterns
|
||
|
// and isn't excluded by any of the subsequent patterns. The functionality is
|
||
|
// the same as Matches, but as an optimization, the caller keeps track of
|
||
|
// whether the parent directory matched.
|
||
|
//
|
||
|
// The "file" argument should be a slash-delimited path.
|
||
|
//
|
||
|
// MatchesUsingParentResult is not safe to call concurrently.
|
||
|
//
|
||
|
// Deprecated: this function does behave correctly in some cases (see
|
||
|
// https://github.com/docker/buildx/issues/850).
|
||
|
//
|
||
|
// Use MatchesUsingParentResults instead.
|
||
|
func (pm *PatternMatcher) MatchesUsingParentResult(file string, parentMatched bool) (bool, error) {
|
||
|
matched := parentMatched
|
||
|
file = filepath.FromSlash(file)
|
||
|
|
||
|
for _, pattern := range pm.patterns {
|
||
|
// Skip evaluation if this is an inclusion and the filename
|
||
|
// already matched the pattern, or it's an exclusion and it has
|
||
|
// not matched the pattern yet.
|
||
|
if pattern.exclusion != matched {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
match, err := pattern.match(file)
|
||
|
if err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
|
||
|
if match {
|
||
|
matched = !pattern.exclusion
|
||
|
}
|
||
|
}
|
||
|
return matched, nil
|
||
|
}
|
||
|
|
||
|
// MatchInfo tracks information about parent dir matches while traversing a
|
||
|
// filesystem.
|
||
|
type MatchInfo struct {
|
||
|
parentMatched []bool
|
||
|
}
|
||
|
|
||
|
// MatchesUsingParentResults returns true if "file" matches any of the patterns
|
||
|
// and isn't excluded by any of the subsequent patterns. The functionality is
|
||
|
// the same as Matches, but as an optimization, the caller passes in
|
||
|
// intermediate results from matching the parent directory.
|
||
|
//
|
||
|
// The "file" argument should be a slash-delimited path.
|
||
|
//
|
||
|
// MatchesUsingParentResults is not safe to call concurrently.
|
||
|
func (pm *PatternMatcher) MatchesUsingParentResults(file string, parentMatchInfo MatchInfo) (bool, MatchInfo, error) {
|
||
|
parentMatched := parentMatchInfo.parentMatched
|
||
|
if len(parentMatched) != 0 && len(parentMatched) != len(pm.patterns) {
|
||
|
return false, MatchInfo{}, errors.New("wrong number of values in parentMatched")
|
||
|
}
|
||
|
|
||
|
file = filepath.FromSlash(file)
|
||
|
matched := false
|
||
|
|
||
|
matchInfo := MatchInfo{
|
||
|
parentMatched: make([]bool, len(pm.patterns)),
|
||
|
}
|
||
|
for i, pattern := range pm.patterns {
|
||
|
match := false
|
||
|
// If the parent matched this pattern, we don't need to recheck.
|
||
|
if len(parentMatched) != 0 {
|
||
|
match = parentMatched[i]
|
||
|
}
|
||
|
|
||
|
if !match {
|
||
|
// Skip evaluation if this is an inclusion and the filename
|
||
|
// already matched the pattern, or it's an exclusion and it has
|
||
|
// not matched the pattern yet.
|
||
|
if pattern.exclusion != matched {
|
||
|
continue
|
||
|
}
|
||
|
|
||
|
var err error
|
||
|
match, err = pattern.match(file)
|
||
|
if err != nil {
|
||
|
return false, matchInfo, err
|
||
|
}
|
||
|
|
||
|
// If the zero value of MatchInfo was passed in, we don't have
|
||
|
// any information about the parent dir's match results, and we
|
||
|
// apply the same logic as MatchesOrParentMatches.
|
||
|
if !match && len(parentMatched) == 0 {
|
||
|
if parentPath := filepath.Dir(file); parentPath != "." {
|
||
|
parentPathDirs := strings.Split(parentPath, string(os.PathSeparator))
|
||
|
// Check to see if the pattern matches one of our parent dirs.
|
||
|
for i := range parentPathDirs {
|
||
|
match, _ = pattern.match(strings.Join(parentPathDirs[:i+1], string(os.PathSeparator)))
|
||
|
if match {
|
||
|
break
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
matchInfo.parentMatched[i] = match
|
||
|
|
||
|
if match {
|
||
|
matched = !pattern.exclusion
|
||
|
}
|
||
|
}
|
||
|
return matched, matchInfo, nil
|
||
|
}
|
||
|
|
||
|
// Exclusions returns true if any of the patterns define exclusions
|
||
|
func (pm *PatternMatcher) Exclusions() bool {
|
||
|
return pm.exclusions
|
||
|
}
|
||
|
|
||
|
// Patterns returns array of active patterns
|
||
|
func (pm *PatternMatcher) Patterns() []*Pattern {
|
||
|
return pm.patterns
|
||
|
}
|
||
|
|
||
|
// Pattern defines a single regexp used to filter file paths.
|
||
|
type Pattern struct {
|
||
|
matchType matchType
|
||
|
cleanedPattern string
|
||
|
dirs []string
|
||
|
regexp *regexp.Regexp
|
||
|
exclusion bool
|
||
|
}
|
||
|
|
||
|
type matchType int
|
||
|
|
||
|
const (
|
||
|
unknownMatch matchType = iota
|
||
|
exactMatch
|
||
|
prefixMatch
|
||
|
suffixMatch
|
||
|
regexpMatch
|
||
|
)
|
||
|
|
||
|
func (p *Pattern) String() string {
|
||
|
return p.cleanedPattern
|
||
|
}
|
||
|
|
||
|
// Exclusion returns true if this pattern defines exclusion
|
||
|
func (p *Pattern) Exclusion() bool {
|
||
|
return p.exclusion
|
||
|
}
|
||
|
|
||
|
func (p *Pattern) match(path string) (bool, error) {
|
||
|
if p.matchType == unknownMatch {
|
||
|
if err := p.compile(string(os.PathSeparator)); err != nil {
|
||
|
return false, filepath.ErrBadPattern
|
||
|
}
|
||
|
}
|
||
|
|
||
|
switch p.matchType {
|
||
|
case exactMatch:
|
||
|
return path == p.cleanedPattern, nil
|
||
|
case prefixMatch:
|
||
|
// strip trailing **
|
||
|
return strings.HasPrefix(path, p.cleanedPattern[:len(p.cleanedPattern)-2]), nil
|
||
|
case suffixMatch:
|
||
|
// strip leading **
|
||
|
suffix := p.cleanedPattern[2:]
|
||
|
if strings.HasSuffix(path, suffix) {
|
||
|
return true, nil
|
||
|
}
|
||
|
// **/foo matches "foo"
|
||
|
return suffix[0] == os.PathSeparator && path == suffix[1:], nil
|
||
|
case regexpMatch:
|
||
|
return p.regexp.MatchString(path), nil
|
||
|
}
|
||
|
|
||
|
return false, nil
|
||
|
}
|
||
|
|
||
|
func (p *Pattern) compile(sl string) error {
|
||
|
regStr := "^"
|
||
|
pattern := p.cleanedPattern
|
||
|
// Go through the pattern and convert it to a regexp.
|
||
|
// We use a scanner so we can support utf-8 chars.
|
||
|
var scan scanner.Scanner
|
||
|
scan.Init(strings.NewReader(pattern))
|
||
|
|
||
|
escSL := sl
|
||
|
if sl == `\` {
|
||
|
escSL += `\`
|
||
|
}
|
||
|
|
||
|
p.matchType = exactMatch
|
||
|
for i := 0; scan.Peek() != scanner.EOF; i++ {
|
||
|
ch := scan.Next()
|
||
|
|
||
|
if ch == '*' {
|
||
|
if scan.Peek() == '*' {
|
||
|
// is some flavor of "**"
|
||
|
scan.Next()
|
||
|
|
||
|
// Treat **/ as ** so eat the "/"
|
||
|
if string(scan.Peek()) == sl {
|
||
|
scan.Next()
|
||
|
}
|
||
|
|
||
|
if scan.Peek() == scanner.EOF {
|
||
|
// is "**EOF" - to align with .gitignore just accept all
|
||
|
if p.matchType == exactMatch {
|
||
|
p.matchType = prefixMatch
|
||
|
} else {
|
||
|
regStr += ".*"
|
||
|
p.matchType = regexpMatch
|
||
|
}
|
||
|
} else {
|
||
|
// is "**"
|
||
|
// Note that this allows for any # of /'s (even 0) because
|
||
|
// the .* will eat everything, even /'s
|
||
|
regStr += "(.*" + escSL + ")?"
|
||
|
p.matchType = regexpMatch
|
||
|
}
|
||
|
|
||
|
if i == 0 {
|
||
|
p.matchType = suffixMatch
|
||
|
}
|
||
|
} else {
|
||
|
// is "*" so map it to anything but "/"
|
||
|
regStr += "[^" + escSL + "]*"
|
||
|
p.matchType = regexpMatch
|
||
|
}
|
||
|
} else if ch == '?' {
|
||
|
// "?" is any char except "/"
|
||
|
regStr += "[^" + escSL + "]"
|
||
|
p.matchType = regexpMatch
|
||
|
} else if shouldEscape(ch) {
|
||
|
// Escape some regexp special chars that have no meaning
|
||
|
// in golang's filepath.Match
|
||
|
regStr += `\` + string(ch)
|
||
|
} else if ch == '\\' {
|
||
|
// escape next char. Note that a trailing \ in the pattern
|
||
|
// will be left alone (but need to escape it)
|
||
|
if sl == `\` {
|
||
|
// On windows map "\" to "\\", meaning an escaped backslash,
|
||
|
// and then just continue because filepath.Match on
|
||
|
// Windows doesn't allow escaping at all
|
||
|
regStr += escSL
|
||
|
continue
|
||
|
}
|
||
|
if scan.Peek() != scanner.EOF {
|
||
|
regStr += `\` + string(scan.Next())
|
||
|
p.matchType = regexpMatch
|
||
|
} else {
|
||
|
regStr += `\`
|
||
|
}
|
||
|
} else if ch == '[' || ch == ']' {
|
||
|
regStr += string(ch)
|
||
|
p.matchType = regexpMatch
|
||
|
} else {
|
||
|
regStr += string(ch)
|
||
|
}
|
||
|
}
|
||
|
|
||
|
if p.matchType != regexpMatch {
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
regStr += "$"
|
||
|
|
||
|
re, err := regexp.Compile(regStr)
|
||
|
if err != nil {
|
||
|
return err
|
||
|
}
|
||
|
|
||
|
p.regexp = re
|
||
|
p.matchType = regexpMatch
|
||
|
return nil
|
||
|
}
|
||
|
|
||
|
// Matches returns true if file matches any of the patterns
|
||
|
// and isn't excluded by any of the subsequent patterns.
|
||
|
//
|
||
|
// This implementation is buggy (it only checks a single parent dir against the
|
||
|
// pattern) and will be removed soon. Use MatchesOrParentMatches instead.
|
||
|
func Matches(file string, patterns []string) (bool, error) {
|
||
|
pm, err := New(patterns)
|
||
|
if err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
file = filepath.Clean(file)
|
||
|
|
||
|
if file == "." {
|
||
|
// Don't let them exclude everything, kind of silly.
|
||
|
return false, nil
|
||
|
}
|
||
|
|
||
|
return pm.Matches(file)
|
||
|
}
|
||
|
|
||
|
// MatchesOrParentMatches returns true if file matches any of the patterns
|
||
|
// and isn't excluded by any of the subsequent patterns.
|
||
|
func MatchesOrParentMatches(file string, patterns []string) (bool, error) {
|
||
|
pm, err := New(patterns)
|
||
|
if err != nil {
|
||
|
return false, err
|
||
|
}
|
||
|
file = filepath.Clean(file)
|
||
|
|
||
|
if file == "." {
|
||
|
// Don't let them exclude everything, kind of silly.
|
||
|
return false, nil
|
||
|
}
|
||
|
|
||
|
return pm.MatchesOrParentMatches(file)
|
||
|
}
|