Skip to content

Commit

Permalink
heuristics: and matches
Browse files Browse the repository at this point in the history
  • Loading branch information
smola committed Sep 3, 2018
1 parent 3abe126 commit 71ab440
Show file tree
Hide file tree
Showing 2 changed files with 44 additions and 14 deletions.
46 changes: 36 additions & 10 deletions lib/linguist/heuristics.rb
Original file line number Diff line number Diff line change
Expand Up @@ -45,20 +45,29 @@ def self.load()
exts = disambiguation['extensions']
rules = disambiguation['rules']
rules.map! do |rule|
if !rule['pattern'].nil?
rule['pattern'] = self.to_regex(rule['pattern'])
elsif !rule['negative_pattern'].nil?
pat = self.to_regex(rule['negative_pattern'])
rule['pattern'] = NegativePattern.new(pat)
elsif !rule['named_pattern'].nil?
rule['pattern'] = named_patterns[rule['named_pattern']]
end
rule['pattern'] = self.parse_rule(named_patterns, rule)
rule
end
@heuristics << new(exts, rules)
end
end

def self.parse_rule(named_patterns, rule)
if !rule['and'].nil?
rules = rule['and'].map { |block| self.parse_rule(named_patterns, block) }
return And.new(rules)
elsif !rule['pattern'].nil?
return self.to_regex(rule['pattern'])
elsif !rule['negative_pattern'].nil?
pat = self.to_regex(rule['negative_pattern'])
return NegativePattern.new(pat)
elsif !rule['named_pattern'].nil?
return named_patterns[rule['named_pattern']]
else
return AlwaysMatch.new()
end
end

# Internal: Converts a string or array of strings to regexp
#
# str: string or array of strings. If it is an array of strings,
Expand Down Expand Up @@ -91,8 +100,7 @@ def matches?(filename, candidates)
# Internal: Perform the heuristic
def call(data)
matched = @rules.find do |rule|
m = !rule.key?('pattern') || rule['pattern'].match(data)
m
rule['pattern'].match(data)
end
if !matched.nil?
languages = matched['language']
Expand All @@ -106,6 +114,24 @@ def call(data)

end

class And

def initialize(pats)
@pats = pats
end

def match(input)
return !@pats.any? { |pat| !pat.match(input) }
end

end

class AlwaysMatch
def match(input)
return true
end
end

class NegativePattern

def initialize(pat)
Expand Down
12 changes: 8 additions & 4 deletions lib/linguist/heuristics.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
# Pattern can be a string with a single regular expression
# or an array of strings that will be merged in a single
# regular expression (with union).
# and - An and block merges multiple rules and checks that all of
# of them must match.
# negative_pattern - Same as pattern, but checks for absence of matches.
# named_pattern - A pattern can be reused by specifying it in the
# named_patterns section and referencing it here by its
Expand Down Expand Up @@ -217,9 +219,9 @@ disambiguations:
- language: Roff
pattern: '^[.''][A-Za-z]{2}(\s|$)'
- language: Unix Assembly
pattern:
- '(?<!\S)\.(include|globa?l)\s|(?<!\/\*)(\A|\n)\s*\.[A-Za-z][_A-Za-z0-9]*:'
#TODO: !/\/\*/.match(data) && /^\s*\.(?:include\s|globa?l\s|[A-Za-z][_A-Za-z0-9]*:)/.match(data)
and:
- negative_pattern: '/\*'
- pattern: '^\s*\.(?:include\s|globa?l\s|[A-Za-z][_A-Za-z0-9]*:)'
- language: MAXScript
- extensions: ['.n']
rules:
Expand Down Expand Up @@ -267,7 +269,9 @@ disambiguations:
- language: INI
pattern: 'last_client='
- language: QMake
pattern: '(HEADERS|SOURCES)'
and:
- pattern: HEADERS
- pattern: SOURCES
- language: IDL
pattern: '^\s*function[ \w,]+$'
- extensions: ['.props']
Expand Down

0 comments on commit 71ab440

Please sign in to comment.