diff --git a/lib/linguist/heuristics.rb b/lib/linguist/heuristics.rb index 876e974dc9..fe680382e5 100644 --- a/lib/linguist/heuristics.rb +++ b/lib/linguist/heuristics.rb @@ -45,20 +45,29 @@ def self.load() exts = disambiguation['extensions'] rules = disambiguation['rules'] rules.map! do |rule| - if !rule['pattern'].nil? - rule['pattern'] = self.to_regex(rule['pattern']) - elsif !rule['negative_pattern'].nil? - pat = self.to_regex(rule['negative_pattern']) - rule['pattern'] = NegativePattern.new(pat) - elsif !rule['named_pattern'].nil? - rule['pattern'] = named_patterns[rule['named_pattern']] - end + rule['pattern'] = self.parse_rule(named_patterns, rule) rule end @heuristics << new(exts, rules) end end + def self.parse_rule(named_patterns, rule) + if !rule['and'].nil? + rules = rule['and'].map { |block| self.parse_rule(named_patterns, block) } + return And.new(rules) + elsif !rule['pattern'].nil? + return self.to_regex(rule['pattern']) + elsif !rule['negative_pattern'].nil? + pat = self.to_regex(rule['negative_pattern']) + return NegativePattern.new(pat) + elsif !rule['named_pattern'].nil? + return named_patterns[rule['named_pattern']] + else + return AlwaysMatch.new() + end + end + # Internal: Converts a string or array of strings to regexp # # str: string or array of strings. If it is an array of strings, @@ -91,8 +100,7 @@ def matches?(filename, candidates) # Internal: Perform the heuristic def call(data) matched = @rules.find do |rule| - m = !rule.key?('pattern') || rule['pattern'].match(data) - m + rule['pattern'].match(data) end if !matched.nil? languages = matched['language'] @@ -106,6 +114,24 @@ def call(data) end + class And + + def initialize(pats) + @pats = pats + end + + def match(input) + return !@pats.any? { |pat| !pat.match(input) } + end + + end + + class AlwaysMatch + def match(input) + return true + end + end + class NegativePattern def initialize(pat) diff --git a/lib/linguist/heuristics.yml b/lib/linguist/heuristics.yml index 24b409f962..79736ab392 100644 --- a/lib/linguist/heuristics.yml +++ b/lib/linguist/heuristics.yml @@ -16,6 +16,8 @@ # Pattern can be a string with a single regular expression # or an array of strings that will be merged in a single # regular expression (with union). +# and - An and block merges multiple rules and checks that all of +# of them must match. # negative_pattern - Same as pattern, but checks for absence of matches. # named_pattern - A pattern can be reused by specifying it in the # named_patterns section and referencing it here by its @@ -217,9 +219,9 @@ disambiguations: - language: Roff pattern: '^[.''][A-Za-z]{2}(\s|$)' - language: Unix Assembly - pattern: - - '(?