-
-
Notifications
You must be signed in to change notification settings - Fork 255
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #247 from dbaggerman/refactor-countstats
Refactor CountStats state machine
- Loading branch information
Showing
11 changed files
with
419 additions
and
344 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
Docstrings containing an apostrophe (') are handled incorrectly | ||
The line above is counted as code despite being in the middle of a docstring. | ||
The end of docstring flag seems to be changed to an apostrophe, | ||
which means the next line will not exit the docstring. | ||
""" | ||
# Code containing single quotes will exit the docstring, | ||
# but presuming the quotes are balanced the second | ||
# quote will put us in string scanning mode. | ||
if __name__ == '__main__': | ||
print('Hello, World!') | ||
# Not counted as a comment | ||
|
||
# ^ Not counted as a blank line | ||
# Break out of string scanner with unbalanced single quote: ' | ||
exit(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package processor | ||
|
||
type StateBlank struct {} | ||
|
||
func (state *StateBlank) String() string { | ||
return "blank" | ||
} | ||
|
||
func (state *StateBlank) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
switch tokenType, offsetJump, endString := lang.Tokens.Match(job.Content[index:]); tokenType { | ||
case TMlcomment: | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
|
||
index += offsetJump - 1 | ||
return index, commentType, NewStateCommentMulti(endString) | ||
|
||
case TSlcomment: | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
return index, commentType, &StateCommentSingle{} | ||
|
||
case TString: | ||
index, docString, skipEsc := verifyIgnoreEscape(lang, job, index) | ||
|
||
if docString { | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
|
||
return index, commentType, &StateDocString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
} | ||
|
||
return index, LINE_CODE, &StateString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
|
||
case TComplexity: | ||
if index == 0 || isWhitespace(job.Content[index-1]) { | ||
job.Complexity++ | ||
} | ||
return index, LINE_BLANK, state | ||
|
||
default: | ||
return index, LINE_CODE, &StateCode{} | ||
} | ||
} | ||
|
||
func (state *StateBlank) Reset() (LineType, State) { | ||
return LINE_BLANK, state | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
package processor | ||
|
||
type StateCode struct {} | ||
|
||
func (state *StateCode) String() string { | ||
return "code" | ||
} | ||
|
||
func (state *StateCode) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
// Hacky fix to https://github.com/boyter/scc/issues/181 | ||
endPoint := job.EndPoint | ||
if endPoint > len(job.Content) { | ||
endPoint-- | ||
} | ||
|
||
var i int | ||
for i = index; i < endPoint; i++ { | ||
curByte := job.Content[i] | ||
|
||
if curByte == '\n' { | ||
return i, LINE_CODE, state | ||
} | ||
|
||
if isBinary(i, curByte) { | ||
job.Binary = true | ||
return i, LINE_CODE, state | ||
} | ||
|
||
if shouldProcess(curByte, lang.ProcessMask) { | ||
if Duplicates { | ||
// Technically this is wrong because we skip bytes so this is not a true | ||
// hash of the file contents, but for duplicate files it shouldn't matter | ||
// as both will skip the same way | ||
digestible := []byte{job.Content[index]} | ||
job.Hash.Write(digestible) | ||
} | ||
|
||
switch tokenType, offsetJump, endString := lang.Tokens.Match(job.Content[i:]); tokenType { | ||
case TString: | ||
// If we are in string state then check what sort of string so we know if docstring OR ignoreescape string | ||
|
||
// It is safe to -1 here as to enter the code state we need to have | ||
// transitioned from blank to here hence i should always be >= 1 | ||
// This check is to ensure we aren't in a character declaration | ||
// TODO this should use language features | ||
if job.Content[i-1] == '\\' { | ||
break // from switch, not from the loop | ||
} | ||
|
||
i, docString, skipEsc := verifyIgnoreEscape(lang, job, i) | ||
|
||
if docString { | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
|
||
return i, commentType, &StateDocString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
} | ||
|
||
// i += offsetJump - 1 | ||
return i, LINE_CODE, &StateString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
|
||
case TSlcomment: | ||
i += offsetJump - 1 | ||
return i, LINE_CODE, &StateCommentSingle{} | ||
|
||
case TMlcomment: | ||
i += offsetJump - 1 | ||
|
||
return i, LINE_CODE, NewStateCommentMulti(endString) | ||
|
||
case TComplexity: | ||
if i == 0 || isWhitespace(job.Content[i-1]) { | ||
job.Complexity++ | ||
} | ||
} | ||
} | ||
} | ||
|
||
return i, LINE_CODE, state | ||
} | ||
|
||
func (state *StateCode) Reset() (LineType, State) { | ||
return LINE_BLANK, &StateBlank{} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package processor | ||
|
||
type StateCommentMulti struct { | ||
Stack [][]byte | ||
} | ||
|
||
func (state *StateCommentMulti) String() string { | ||
return "multiline-comment" | ||
} | ||
|
||
func NewStateCommentMulti(token []byte) *StateCommentMulti { | ||
return &StateCommentMulti{ | ||
Stack: [][]byte{token}, | ||
} | ||
} | ||
|
||
func (state *StateCommentMulti) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
var i int | ||
for i = index; i < job.EndPoint; i++ { | ||
curByte := job.Content[i] | ||
|
||
if curByte == '\n' { | ||
break | ||
} | ||
|
||
endToken := state.peek() | ||
if checkForMatchSingle(curByte, i, job.EndPoint, endToken, job) { | ||
// set offset jump here | ||
i += len(endToken) - 1 | ||
|
||
if len(state.Stack) == 1 { | ||
return i, lineType, &StateBlank{} | ||
} else { | ||
state.pop() | ||
return i, lineType, state | ||
} | ||
} | ||
|
||
// Check if we are entering another multiline comment | ||
// This should come below check for match single as it speeds up processing | ||
if lang.Nested { | ||
if ok, offsetJump, endString := lang.MultiLineComments.Match(job.Content[i:]); ok != 0 { | ||
i += offsetJump - 1 | ||
state.push(endString) | ||
return i, lineType, state | ||
} | ||
} | ||
} | ||
|
||
return i, lineType, state | ||
} | ||
|
||
func (state *StateCommentMulti) Reset() (LineType, State) { | ||
return LINE_COMMENT, state | ||
} | ||
|
||
func (state *StateCommentMulti) peek() []byte { | ||
i := len(state.Stack) - 1 | ||
return state.Stack[i] | ||
} | ||
|
||
func (state *StateCommentMulti) push(token []byte) { | ||
state.Stack = append(state.Stack, token) | ||
} | ||
|
||
func (state *StateCommentMulti) pop() { | ||
i := len(state.Stack) - 1 | ||
|
||
state.Stack = state.Stack[:i] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package processor | ||
|
||
type StateCommentSingle struct {} | ||
|
||
func (state *StateCommentSingle) String() string { | ||
return "comment" | ||
} | ||
|
||
func (state *StateCommentSingle) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
var i int | ||
for i = index; i < job.EndPoint; i++ { | ||
curByte := job.Content[i] | ||
|
||
if curByte == '\n' { | ||
break | ||
} | ||
} | ||
|
||
return i, lineType, state | ||
} | ||
|
||
func (state *StateCommentSingle) Reset() (LineType, State) { | ||
return LINE_BLANK, &StateBlank{} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,53 @@ | ||
package processor | ||
|
||
import ( | ||
"fmt" | ||
) | ||
|
||
type StateDocString struct { | ||
End []byte | ||
SkipEsc bool | ||
} | ||
|
||
func (state *StateDocString) String() string { | ||
return "docstring" | ||
} | ||
|
||
func (state *StateDocString) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
var i int | ||
for i = index; i < job.EndPoint; i++ { | ||
if job.Content[i] == '\n' { | ||
return i, lineType, state | ||
} | ||
|
||
if job.Content[i-1] != '\\' { | ||
if checkForMatchSingle(job.Content[i], i, job.EndPoint, state.End, job) { | ||
// So we have hit end of docstring at this point in which case check if only whitespace characters till the next | ||
// newline and if so we change to a comment otherwise to code | ||
// need to start the loop after ending definition of docstring, therefore adding the length of the string to | ||
// the index | ||
for j := i + len(state.End); j <= job.EndPoint; j++ { | ||
if job.Content[j] == '\n' { | ||
if Debug { | ||
printDebug("Found newline so docstring is comment") | ||
} | ||
return j, LINE_COMMENT, &StateBlank{} | ||
} | ||
|
||
if !isWhitespace(job.Content[j]) { | ||
if Debug { | ||
printDebug(fmt.Sprintf("Found something not whitespace so is code: %s", string(job.Content[j]))) | ||
} | ||
return j, LINE_CODE, &StateBlank{} | ||
} | ||
} | ||
} | ||
} | ||
} | ||
|
||
return i, lineType, state | ||
} | ||
|
||
func (state *StateDocString) Reset() (LineType, State) { | ||
return LINE_COMMENT, state | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package processor | ||
|
||
import "fmt" | ||
|
||
type StateString struct { | ||
End []byte | ||
SkipEsc bool | ||
} | ||
|
||
func (state *StateString) String() string { | ||
return fmt.Sprintf("string[end=%s,skipesc=%v]", state.End, state.SkipEsc) | ||
} | ||
|
||
func (state *StateString) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
var i int | ||
for i = index; i < job.EndPoint; i++ { | ||
// If we hit a newline, return because we want to count the stats but keep | ||
// the current state so we end up back in this loop when the outer | ||
// one calls again | ||
if job.Content[i] == '\n' { | ||
return i, LINE_CODE, state | ||
} | ||
|
||
// If we are in a literal string we want to ignore the \ check OR we aren't checking for special ones | ||
if state.SkipEsc || job.Content[i-1] != '\\' { | ||
if checkForMatchSingle(job.Content[i], i, job.EndPoint, state.End, job) { | ||
return i, LINE_CODE, &StateCode{} | ||
} | ||
} | ||
} | ||
|
||
return i, LINE_CODE, state | ||
} | ||
|
||
func (state *StateString) Reset() (LineType, State) { | ||
return LINE_CODE, state | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,6 @@ | ||
package processor | ||
|
||
type State interface { | ||
Process(*FileJob, *LanguageFeature, int, LineType) (int, LineType, State) | ||
Reset() (LineType, State) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.