Skip to content

Commit

Permalink
Split PHP into two lexers - PHP and PHTML.
Browse files Browse the repository at this point in the history
The former is pure PHP code while the latter is PHP code in <? ?> tags,
within HTML.

Fixes #210.
  • Loading branch information
alecthomas committed Jun 30, 2020
1 parent 1150149 commit 2b9ea60
Show file tree
Hide file tree
Showing 10 changed files with 229 additions and 118 deletions.
5 changes: 5 additions & 0 deletions .golangci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ linters:
- wsl
- gomnd
- gocognit
- goerr113
- nolintlint
- testpackage
- godot
- nestif

linters-settings:
govet:
Expand Down
2 changes: 1 addition & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ go:
- "1.13.x"
script:
- go test -v ./...
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s v1.22.2
- curl -sfL https://install.goreleaser.com/github.com/golangci/golangci-lint.sh | bash -s 1.26.0
- ./bin/golangci-lint run
- git clean -fdx .
after_success:
Expand Down
137 changes: 63 additions & 74 deletions lexers/circular/php.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,12 @@
package circular

import (
"strings"

. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/h"
"github.com/alecthomas/chroma/lexers/internal"
)

// PHP lexer.
var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
// PHP lexer for pure PHP code (not embedded in HTML).
var PHP = internal.Register(MustNewLexer(
&Config{
Name: "PHP",
Aliases: []string{"php", "php3", "php4", "php5"},
Expand All @@ -19,73 +16,65 @@ var PHP = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
CaseInsensitive: true,
EnsureNL: true,
},
Rules{
"root": {
{`<\?(php)?`, CommentPreproc, Push("php")},
{`[^<]+`, Other, nil},
{`<`, Other, nil},
},
"php": {
{`\?>`, CommentPreproc, Pop(1)},
{`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil},
{`\s+`, Text, nil},
{`#.*?\n`, CommentSingle, nil},
{`//.*?\n`, CommentSingle, nil},
{`/\*\*/`, CommentMultiline, nil},
{`/\*\*.*?\*/`, LiteralStringDoc, nil},
{`/\*.*?\*/`, CommentMultiline, nil},
{`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil},
{`[~!%^&*+=|:.<>/@-]+`, Operator, nil},
{`\?`, Operator, nil},
{`[\[\]{}();,]+`, Punctuation, nil},
{`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")},
{`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil},
{`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")},
{`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil},
{`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil},
{`(true|false|null)\b`, KeywordConstant, nil},
Include("magicconstants"),
{`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil},
{`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil},
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil},
{`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
{`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil},
{`0[0-7]+`, LiteralNumberOct, nil},
{`0x[a-f0-9]+`, LiteralNumberHex, nil},
{`\d+`, LiteralNumberInteger, nil},
{`0b[01]+`, LiteralNumberBin, nil},
{`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil},
{"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil},
{`"`, LiteralStringDouble, Push("string")},
},
"magicfuncs": {
{Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil},
},
"magicconstants": {
{Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil},
},
"classname": {
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)},
},
"functionname": {
Include("magicfuncs"),
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`[^{$"\\]+`, LiteralStringDouble, nil},
{`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil},
{`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil},
{`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil},
{`[${\\]`, LiteralStringDouble, nil},
},
phpCommonRules.Rename("php", "root"),
))

var phpCommonRules = Rules{
"php": {
{`\?>`, CommentPreproc, Pop(1)},
{`(<<<)([\'"]?)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)(\2\n.*?\n\s*)(\3)(;?)(\n)`, ByGroups(LiteralString, LiteralString, LiteralStringDelimiter, LiteralString, LiteralStringDelimiter, Punctuation, Text), nil},
{`\s+`, Text, nil},
{`#.*?\n`, CommentSingle, nil},
{`//.*?\n`, CommentSingle, nil},
{`/\*\*/`, CommentMultiline, nil},
{`/\*\*.*?\*/`, LiteralStringDoc, nil},
{`/\*.*?\*/`, CommentMultiline, nil},
{`(->|::)(\s*)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Operator, Text, NameAttribute), nil},
{`[~!%^&*+=|:.<>/@-]+`, Operator, nil},
{`\?`, Operator, nil},
{`[\[\]{}();,]+`, Punctuation, nil},
{`(class)(\s+)`, ByGroups(Keyword, Text), Push("classname")},
{`(function)(\s*)(?=\()`, ByGroups(Keyword, Text), nil},
{`(function)(\s+)(&?)(\s*)`, ByGroups(Keyword, Text, Operator, Text), Push("functionname")},
{`(const)(\s+)((?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)`, ByGroups(Keyword, Text, NameConstant), nil},
{`(and|E_PARSE|old_function|E_ERROR|or|as|E_WARNING|parent|eval|PHP_OS|break|exit|case|extends|PHP_VERSION|cfunction|FALSE|print|for|require|continue|foreach|require_once|declare|return|default|static|do|switch|die|stdClass|echo|else|TRUE|elseif|var|empty|if|xor|enddeclare|include|virtual|endfor|include_once|while|endforeach|global|endif|list|endswitch|new|endwhile|not|array|E_ALL|NULL|final|php_user_filter|interface|implements|public|private|protected|abstract|clone|try|catch|throw|this|use|namespace|trait|yield|finally)\b`, Keyword, nil},
{`(true|false|null)\b`, KeywordConstant, nil},
Include("magicconstants"),
{`\$\{\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*\}`, NameVariable, nil},
{`\$+(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameVariable, nil},
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameOther, nil},
{`(\d+\.\d*|\d*\.\d+)(e[+-]?[0-9]+)?`, LiteralNumberFloat, nil},
{`\d+e[+-]?[0-9]+`, LiteralNumberFloat, nil},
{`0[0-7]+`, LiteralNumberOct, nil},
{`0x[a-f0-9]+`, LiteralNumberHex, nil},
{`\d+`, LiteralNumberInteger, nil},
{`0b[01]+`, LiteralNumberBin, nil},
{`'([^'\\]*(?:\\.[^'\\]*)*)'`, LiteralStringSingle, nil},
{"`([^`\\\\]*(?:\\\\.[^`\\\\]*)*)`", LiteralStringBacktick, nil},
{`"`, LiteralStringDouble, Push("string")},
},
"magicfuncs": {
{Words(``, `\b`, `__construct`, `__destruct`, `__call`, `__callStatic`, `__get`, `__set`, `__isset`, `__unset`, `__sleep`, `__wakeup`, `__toString`, `__invoke`, `__set_state`, `__clone`, `__debugInfo`), NameFunctionMagic, nil},
},
"magicconstants": {
{Words(``, `\b`, `__LINE__`, `__FILE__`, `__DIR__`, `__FUNCTION__`, `__CLASS__`, `__TRAIT__`, `__METHOD__`, `__NAMESPACE__`), NameConstant, nil},
},
"classname": {
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameClass, Pop(1)},
},
"functionname": {
Include("magicfuncs"),
{`(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*`, NameFunction, Pop(1)},
Default(Pop(1)),
},
"string": {
{`"`, LiteralStringDouble, Pop(1)},
{`[^{$"\\]+`, LiteralStringDouble, nil},
{`\\([nrt"$\\]|[0-7]{1,3}|x[0-9a-f]{1,2})`, LiteralStringEscape, nil},
{`\$(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*(\[\S+?\]|->(?:[\\_a-z]|[^\x00-\x7f])(?:[\\\w]|[^\x00-\x7f])*)?`, LiteralStringInterpol, nil},
{`(\{\$\{)(.*?)(\}\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\{)(\$.*?)(\})`, ByGroups(LiteralStringInterpol, UsingSelf("root"), LiteralStringInterpol), nil},
{`(\$\{)(\S+)(\})`, ByGroups(LiteralStringInterpol, NameVariable, LiteralStringInterpol), nil},
{`[${\\]`, LiteralStringDouble, nil},
},
).SetAnalyser(func(text string) float32 {
if strings.Contains(text, "<?php") {
return 0.5
}
return 0.0
})))
}
34 changes: 34 additions & 0 deletions lexers/circular/phtml.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
package circular

import (
"strings"

. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/h"
"github.com/alecthomas/chroma/lexers/internal"
)

// PHTML lexer is PHP in HTML.
var PHTML = internal.Register(DelegatingLexer(h.HTML, MustNewLexer(
&Config{
Name: "PHTML",
Aliases: []string{"phtml"},
Filenames: []string{"*.phtml"},
MimeTypes: []string{"application/x-php", "application/x-httpd-php", "application/x-httpd-php3", "application/x-httpd-php4", "application/x-httpd-php5"},
DotAll: true,
CaseInsensitive: true,
EnsureNL: true,
},
Rules{
"root": {
{`<\?(php)?`, CommentPreproc, Push("php")},
{`[^<]+`, Other, nil},
{`<`, Other, nil},
},
}.Merge(phpCommonRules),
).SetAnalyser(func(text string) float32 {
if strings.Contains(text, "<?php") {
return 0.5
}
return 0.0
})))
2 changes: 1 addition & 1 deletion lexers/lexers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ func TestLexers(t *testing.T) {
if os.Getenv("RECORD") == "true" {
// Update the expected file with the generated output of this lexer
f, err := os.Create(expectedFilename)
defer f.Close()
defer f.Close() // nolint: gosec
assert.NoError(t, err)
assert.NoError(t, formatters.JSON.Format(f, nil, chroma.Literator(actual...)))
} else {
Expand Down
12 changes: 1 addition & 11 deletions lexers/testdata/php.actual
Original file line number Diff line number Diff line change
@@ -1,19 +1,9 @@
<!DOCTYPE html>
<html>
<body>

<h1>My first PHP page</h1>
<?php

$docs = $modx->getIterator('modResource', ["parent" => 84]);

foreach($docs as $doc){
$q=$doc->content;
$doc->set("content", preg_replace("/Some value/i", "Replacement", $q));
print_r($doc->content);
// $doc->save();
}
}
// some comment
?>
</body>
</html>
32 changes: 1 addition & 31 deletions lexers/testdata/php.expected
Original file line number Diff line number Diff line change
@@ -1,24 +1,4 @@
[
{"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"My first PHP page"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"CommentPreproc","value":"\u003c?php"},
{"type":"Text","value":"\n\n"},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
Expand Down Expand Up @@ -81,16 +61,6 @@
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"// $doc-\u003esave();\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":" \n"},
{"type":"CommentSingle","value":"// some comment\n"},
{"type":"CommentPreproc","value":"?\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"}
{"type":"CommentSingle","value":"// some comment\n"}
]
19 changes: 19 additions & 0 deletions lexers/testdata/phtml.actual
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
<!DOCTYPE html>
<html>
<body>

<h1>My first PHP page</h1>
<?php

$docs = $modx->getIterator('modResource', ["parent" => 84]);

foreach($docs as $doc){
$q=$doc->content;
$doc->set("content", preg_replace("/Some value/i", "Replacement", $q));
print_r($doc->content);
// $doc->save();
}
// some comment
?>
</body>
</html>
96 changes: 96 additions & 0 deletions lexers/testdata/phtml.expected
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
[
{"type":"CommentPreproc","value":"\u003c!DOCTYPE html\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n\n"},
{"type":"Punctuation","value":"\u003c"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"My first PHP page"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"h1"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"CommentPreproc","value":"\u003c?php"},
{"type":"Text","value":"\n\n"},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Operator","value":"="},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$modx"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"getIterator"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringSingle","value":"'modResource'"},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"Punctuation","value":"["},
{"type":"LiteralStringDouble","value":"\"parent\""},
{"type":"Text","value":" "},
{"type":"Operator","value":"=\u003e"},
{"type":"Text","value":" "},
{"type":"LiteralNumberInteger","value":"84"},
{"type":"Punctuation","value":"]);"},
{"type":"Text","value":"\n\n"},
{"type":"Keyword","value":"foreach"},
{"type":"Punctuation","value":"("},
{"type":"NameVariable","value":"$docs"},
{"type":"Text","value":" "},
{"type":"Keyword","value":"as"},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$doc"},
{"type":"Punctuation","value":"){"},
{"type":"Text","value":"\n "},
{"type":"NameVariable","value":"$q"},
{"type":"Operator","value":"="},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"content"},
{"type":"Punctuation","value":";"},
{"type":"Text","value":"\n "},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"set"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringDouble","value":"\"content\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"NameOther","value":"preg_replace"},
{"type":"Punctuation","value":"("},
{"type":"LiteralStringDouble","value":"\"/Some value/i\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"LiteralStringDouble","value":"\"Replacement\""},
{"type":"Punctuation","value":","},
{"type":"Text","value":" "},
{"type":"NameVariable","value":"$q"},
{"type":"Punctuation","value":"));"},
{"type":"Text","value":"\n "},
{"type":"NameOther","value":"print_r"},
{"type":"Punctuation","value":"("},
{"type":"NameVariable","value":"$doc"},
{"type":"Operator","value":"-\u003e"},
{"type":"NameAttribute","value":"content"},
{"type":"Punctuation","value":");"},
{"type":"Text","value":"\n "},
{"type":"CommentSingle","value":"// $doc-\u003esave();\n"},
{"type":"Punctuation","value":"}"},
{"type":"Text","value":" \n"},
{"type":"CommentSingle","value":"// some comment\n"},
{"type":"CommentPreproc","value":"?\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"body"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"},
{"type":"Punctuation","value":"\u003c/"},
{"type":"NameTag","value":"html"},
{"type":"Punctuation","value":"\u003e"},
{"type":"Text","value":"\n"}
]
Loading

1 comment on commit 2b9ea60

@hopeseekr
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

OMG Thank you!!!

Please sign in to comment.