-
-
Notifications
You must be signed in to change notification settings - Fork 266
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #1 from boyter/master
merge updated code
- Loading branch information
Showing
15 changed files
with
486 additions
and
363 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
#!/usr/bin/env python3 | ||
|
||
""" | ||
Docstrings containing an apostrophe (') are handled incorrectly | ||
The line above is counted as code despite being in the middle of a docstring. | ||
The end of docstring flag seems to be changed to an apostrophe, | ||
which means the next line will not exit the docstring. | ||
""" | ||
# Code containing single quotes will exit the docstring, | ||
# but presuming the quotes are balanced the second | ||
# quote will put us in string scanning mode. | ||
if __name__ == '__main__': | ||
print('Hello, World!') | ||
# Not counted as a comment | ||
|
||
# ^ Not counted as a blank line | ||
# Break out of string scanner with unbalanced single quote: ' | ||
exit(0) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,37 @@ | ||
package processor | ||
|
||
import "math/rand" | ||
|
||
var BloomTable [256]uint64 | ||
|
||
func init() { | ||
for i := range BloomTable { | ||
BloomTable[i] = BloomHash(byte(i)) | ||
} | ||
} | ||
|
||
func BloomHash(b byte) uint64 { | ||
// Since our input is based on ASCII characters (and majority lower case | ||
// characters) the values are not well distributed through the 0-255 byte | ||
// range. math/rand gives us a way to generate a value with more well | ||
// distributed randomness. | ||
k := rand.New(rand.NewSource(int64(b))).Uint64() | ||
|
||
// Mask to slice out a 0-63 value | ||
var mask64 uint64 = 0b00111111 | ||
|
||
// For a bloom filter we only want a few bits set, but distributed | ||
// through the 64 bit space. | ||
// The logic here is to slice a value between 0 and 63 from k, and set a | ||
// single bit in the output hash based on that. | ||
// Setting three bits this way seems to give the best results. Fewer bits | ||
// makes the hash not unique enough, more leads to overcrowding the bloom | ||
// filter. | ||
var hash uint64 | ||
for i := uint64(0); i < 3; i++ { | ||
n := k >> (i*8) & mask64 | ||
hash |= 1 << n | ||
} | ||
|
||
return hash | ||
} |
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,60 @@ | ||
package processor | ||
|
||
type StateBlank struct {} | ||
|
||
func (state *StateBlank) String() string { | ||
return "blank" | ||
} | ||
|
||
func (state *StateBlank) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
switch tokenType, offsetJump, endString := lang.Tokens.Match(job.Content[index:]); tokenType { | ||
case TMlcomment: | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
|
||
index += offsetJump - 1 | ||
return index, commentType, NewStateCommentMulti(endString) | ||
|
||
case TSlcomment: | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
return index, commentType, &StateCommentSingle{} | ||
|
||
case TString: | ||
index, docString, skipEsc := verifyIgnoreEscape(lang, job, index) | ||
|
||
if docString { | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
|
||
return index, commentType, &StateDocString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
} | ||
|
||
return index, LINE_CODE, &StateString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
|
||
case TComplexity: | ||
if index == 0 || isWhitespace(job.Content[index-1]) { | ||
job.Complexity++ | ||
} | ||
return index, LINE_BLANK, state | ||
|
||
default: | ||
return index, LINE_CODE, &StateCode{} | ||
} | ||
} | ||
|
||
func (state *StateBlank) Reset() (LineType, State) { | ||
return LINE_BLANK, state | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,92 @@ | ||
package processor | ||
|
||
type StateCode struct {} | ||
|
||
func (state *StateCode) String() string { | ||
return "code" | ||
} | ||
|
||
func (state *StateCode) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
// Hacky fix to https://github.com/boyter/scc/issues/181 | ||
endPoint := job.EndPoint | ||
if endPoint > len(job.Content) { | ||
endPoint-- | ||
} | ||
|
||
var i int | ||
for i = index; i < endPoint; i++ { | ||
curByte := job.Content[i] | ||
|
||
if curByte == '\n' { | ||
return i, LINE_CODE, state | ||
} | ||
|
||
if isBinary(i, curByte) { | ||
job.Binary = true | ||
return i, LINE_CODE, state | ||
} | ||
|
||
if shouldProcess(curByte, lang.ProcessMask) { | ||
if Duplicates { | ||
// Technically this is wrong because we skip bytes so this is not a true | ||
// hash of the file contents, but for duplicate files it shouldn't matter | ||
// as both will skip the same way | ||
digestible := []byte{job.Content[index]} | ||
job.Hash.Write(digestible) | ||
} | ||
|
||
switch tokenType, offsetJump, endString := lang.Tokens.Match(job.Content[i:]); tokenType { | ||
case TString: | ||
// If we are in string state then check what sort of string so we know if docstring OR ignoreescape string | ||
|
||
// It is safe to -1 here as to enter the code state we need to have | ||
// transitioned from blank to here hence i should always be >= 1 | ||
// This check is to ensure we aren't in a character declaration | ||
// TODO this should use language features | ||
if job.Content[i-1] == '\\' { | ||
break // from switch, not from the loop | ||
} | ||
|
||
i, docString, skipEsc := verifyIgnoreEscape(lang, job, i) | ||
|
||
if docString { | ||
commentType := lineType | ||
if commentType == LINE_BLANK { | ||
commentType = LINE_COMMENT | ||
} | ||
|
||
return i, commentType, &StateDocString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
} | ||
|
||
// i += offsetJump - 1 | ||
return i, LINE_CODE, &StateString{ | ||
End: endString, | ||
SkipEsc: skipEsc, | ||
} | ||
|
||
case TSlcomment: | ||
i += offsetJump - 1 | ||
return i, LINE_CODE, &StateCommentSingle{} | ||
|
||
case TMlcomment: | ||
i += offsetJump - 1 | ||
|
||
return i, LINE_CODE, NewStateCommentMulti(endString) | ||
|
||
case TComplexity: | ||
if i == 0 || isWhitespace(job.Content[i-1]) { | ||
job.Complexity++ | ||
} | ||
} | ||
} | ||
} | ||
|
||
return i, LINE_CODE, state | ||
} | ||
|
||
func (state *StateCode) Reset() (LineType, State) { | ||
return LINE_BLANK, &StateBlank{} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,70 @@ | ||
package processor | ||
|
||
type StateCommentMulti struct { | ||
Stack [][]byte | ||
} | ||
|
||
func (state *StateCommentMulti) String() string { | ||
return "multiline-comment" | ||
} | ||
|
||
func NewStateCommentMulti(token []byte) *StateCommentMulti { | ||
return &StateCommentMulti{ | ||
Stack: [][]byte{token}, | ||
} | ||
} | ||
|
||
func (state *StateCommentMulti) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
var i int | ||
for i = index; i < job.EndPoint; i++ { | ||
curByte := job.Content[i] | ||
|
||
if curByte == '\n' { | ||
break | ||
} | ||
|
||
endToken := state.peek() | ||
if checkForMatchSingle(curByte, i, job.EndPoint, endToken, job) { | ||
// set offset jump here | ||
i += len(endToken) - 1 | ||
|
||
if len(state.Stack) == 1 { | ||
return i, lineType, &StateBlank{} | ||
} else { | ||
state.pop() | ||
return i, lineType, state | ||
} | ||
} | ||
|
||
// Check if we are entering another multiline comment | ||
// This should come below check for match single as it speeds up processing | ||
if lang.Nested { | ||
if ok, offsetJump, endString := lang.MultiLineComments.Match(job.Content[i:]); ok != 0 { | ||
i += offsetJump - 1 | ||
state.push(endString) | ||
return i, lineType, state | ||
} | ||
} | ||
} | ||
|
||
return i, lineType, state | ||
} | ||
|
||
func (state *StateCommentMulti) Reset() (LineType, State) { | ||
return LINE_COMMENT, state | ||
} | ||
|
||
func (state *StateCommentMulti) peek() []byte { | ||
i := len(state.Stack) - 1 | ||
return state.Stack[i] | ||
} | ||
|
||
func (state *StateCommentMulti) push(token []byte) { | ||
state.Stack = append(state.Stack, token) | ||
} | ||
|
||
func (state *StateCommentMulti) pop() { | ||
i := len(state.Stack) - 1 | ||
|
||
state.Stack = state.Stack[:i] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,24 @@ | ||
package processor | ||
|
||
type StateCommentSingle struct {} | ||
|
||
func (state *StateCommentSingle) String() string { | ||
return "comment" | ||
} | ||
|
||
func (state *StateCommentSingle) Process(job *FileJob, lang *LanguageFeature, index int, lineType LineType) (int, LineType, State) { | ||
var i int | ||
for i = index; i < job.EndPoint; i++ { | ||
curByte := job.Content[i] | ||
|
||
if curByte == '\n' { | ||
break | ||
} | ||
} | ||
|
||
return i, lineType, state | ||
} | ||
|
||
func (state *StateCommentSingle) Reset() (LineType, State) { | ||
return LINE_BLANK, &StateBlank{} | ||
} |
Oops, something went wrong.