From ff9fee2e1a16e195d4f569330462690024a13b58 Mon Sep 17 00:00:00 2001 From: Alexander Bezzubov Date: Wed, 9 Jan 2019 21:13:57 +0100 Subject: [PATCH] gen: fix regexp Or syntax Signed-off-by: Alexander Bezzubov --- data/content.go | 4 ++-- internal/code-generator/generator/heuristics.go | 11 +++++++---- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/data/content.go b/data/content.go index 66f955ae..c595204b 100644 --- a/data/content.go +++ b/data/content.go @@ -195,7 +195,7 @@ var ContentHeuristics = map[string]*Heuristics{ }, &OrRule{ &Languages{[]string{"C++"}}, - regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)> | ^\s*template\s*< | ^[ \t]*try | ^[ \t]*catch\s*\( | ^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+ | ^[ \t]*(private|public|protected):$ | std::\w+`), + regexp.MustCompile(`(?m)^\s*#\s*include <(cstdint|string|vector|map|list|array|bitset|queue|stack|forward_list|unordered_map|unordered_set|(i|o|io)stream)>|^\s*template\s*<|^[ \t]*try|^[ \t]*catch\s*\(|^[ \t]*(class|(using[ \t]+)?namespace)\s+\w+|^[ \t]*(private|public|protected):$|std::\w+`), }, }, ".hh": &Heuristics{ @@ -300,7 +300,7 @@ var ContentHeuristics = map[string]*Heuristics{ ".md": &Heuristics{ &OrRule{ &Languages{[]string{"Markdown"}}, - regexp.MustCompile(`(?m)(^[-A-Za-z0-9=#!\*\[|>])|<\/ | \A\z`), + regexp.MustCompile(`(?m)(^[-A-Za-z0-9=#!\*\[|>])|<\/|\A\z`), }, &OrRule{ &Languages{[]string{"GCC Machine Description"}}, diff --git a/internal/code-generator/generator/heuristics.go b/internal/code-generator/generator/heuristics.go index 3eaabce4..4e75a97f 100644 --- a/internal/code-generator/generator/heuristics.go +++ b/internal/code-generator/generator/heuristics.go @@ -10,6 +10,11 @@ import ( yaml "gopkg.in/yaml.v2" ) +const ( + multilinePrefix = "(?m)" + orPipe = "|" +) + // GenHeuristics generates language identification heuristics in Go. // It is of generator.File type. func GenHeuristics(fileToParse, _, outPath, tmplPath, tmplName, commit string) error { @@ -67,14 +72,14 @@ func loadRule(namedPatterns map[string]StringArray, rule *Rule) *LanguagePattern } result = &LanguagePattern{"And", rule.Languages, "", subPatterns} } else if len(rule.Pattern) != 0 { // OrPattern - conjunction := strings.Join(rule.Pattern, " | ") + conjunction := strings.Join(rule.Pattern, orPipe) pattern := convertToValidRegexp(conjunction) result = &LanguagePattern{"Or", rule.Languages, pattern, nil} } else if rule.NegativePattern != "" { // NotPattern pattern := convertToValidRegexp(rule.NegativePattern) result = &LanguagePattern{"Not", rule.Languages, pattern, nil} } else if rule.NamedPattern != "" { // Named OrPattern - conjunction := strings.Join(namedPatterns[rule.NamedPattern], " | ") + conjunction := strings.Join(namedPatterns[rule.NamedPattern], orPipe) pattern := convertToValidRegexp(conjunction) result = &LanguagePattern{"Or", rule.Languages, pattern, nil} } else { // AlwaysPattern @@ -164,8 +169,6 @@ func isUnsupportedRegexpSyntax(reg string) bool { (strings.HasPrefix(reg, multilinePrefix+`/`) && strings.HasSuffix(reg, `/`)) } -const multilinePrefix = "(?m)" - // convertToValidRegexp converts Ruby regexp syntaxt to RE2 equivalent. // Does not work with Ruby regexp literals. func convertToValidRegexp(rubyRegexp string) string {