Use the go standard library implementation of Boyer-Moore string sear…

…ch from the strings package for preamble searching.
bemasher · Dec 17, 2015 · cb511be · cb511be
1 parent 40470d4
commit cb511be
Show file tree

Hide file tree

Showing 2 changed files with 145 additions and 11 deletions.
diff --git a/decode/decode.go b/decode/decode.go
@@ -17,7 +17,6 @@
 package decode
 
 import (
-	"bytes"
 	"fmt"
 	"log"
 	"math"
@@ -109,6 +108,8 @@ type Decoder struct {
 	preamble []byte
 	slices   [][]byte
 
+	preambleFinder *byteFinder
+
 	pkt []byte
 }
 
@@ -162,6 +163,8 @@ func NewDecoder(cfg PacketConfig, decimation int) (d Decoder) {
 		d.slices[symbolOffset] = flat[lower:upper]
 	}
 
+	d.preambleFinder = makeByteFinder(d.preamble)
+
 	// Signal up to the final stage is 1-bit per byte. Allocate a buffer to
 	// store packed version 8-bits per byte.
 	d.pkt = make([]byte, (d.DecCfg.PacketSymbols+7)>>3)
@@ -194,10 +197,10 @@ func (d Decoder) Decode(input []byte) []int {
 	Quantize(filterBlock, d.Quantized[d.DecCfg.PacketLength-d.DecCfg.SymbolLength2:])
 
 	// Pack the quantized signal into slices for searching.
-	d.Pack(d.Quantized[:d.DecCfg.BlockSize2], d.slices)
+	d.Pack(d.Quantized[:d.DecCfg.BlockSize2])
 
 	// Return a list of indexes the preamble exists at.
-	return d.Search(d.slices, d.preamble)
+	return d.Search()
 }
 
 // A Demodulator knows how to demodulate an array of uint8 IQ samples into an
@@ -270,8 +273,8 @@ func Quantize(input []float64, output []byte) {
 // <12345678><12345678><12345678><12345678><12345678><12345678><12345678><12345678>
 // to:
 // <11111111><22222222><33333333><44444444><55555555><66666666><77777777><88888888>
-func (d Decoder) Pack(input []byte, slices [][]byte) {
-	for symbolOffset, slice := range slices {
+func (d *Decoder) Pack(input []byte) {
+	for symbolOffset, slice := range d.slices {
 		for symbolIdx := range slice {
 			slice[symbolIdx] = input[symbolIdx*d.DecCfg.SymbolLength2+symbolOffset]
 		}
@@ -283,12 +286,17 @@ func (d Decoder) Pack(input []byte, slices [][]byte) {
 // For each sample offset look for the preamble. Return a list of indexes the
 // preamble is found at. Indexes are absolute in the unsliced quantized
 // buffer.
-func (d Decoder) Search(slices [][]byte, preamble []byte) (indexes []int) {
-	preambleLength := len(preamble)
-	for symbolOffset, slice := range slices {
-		for symbolIdx := range slice[:len(slice)-preambleLength] {
-			if bytes.Equal(preamble, slice[symbolIdx:][:preambleLength]) {
-				indexes = append(indexes, symbolIdx*d.DecCfg.SymbolLength2+symbolOffset)
+func (d *Decoder) Search() (indexes []int) {
+	for symbolOffset, slice := range d.slices {
+		offset := 0
+		idx := 0
+		for {
+			idx = d.preambleFinder.next(slice[offset:])
+			if idx != -1 {
+				indexes = append(indexes, (offset+idx)*d.Cfg.SymbolLength2+symbolOffset)
+				offset += idx + 1
+			} else {
+				break
 			}
 		}
 	}

diff --git a/decode/search.go b/decode/search.go
@@ -0,0 +1,126 @@
+// Copyright 2012 The Go Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style
+// license that can be found in the LICENSE file.
+
+package decode
+
+import "bytes"
+
+// stringFinder efficiently finds strings in a source text. It's implemented
+// using the Boyer-Moore string search algorithm:
+// http://en.wikipedia.org/wiki/Boyer-Moore_string_search_algorithm
+// http://www.cs.utexas.edu/~moore/publications/fstrpos.pdf (note: this aged
+// document uses 1-based indexing)
+type byteFinder struct {
+	// pattern is the string that we are searching for in the text.
+	pattern []byte
+
+	// badCharSkip[b] contains the distance between the last byte of pattern
+	// and the rightmost occurrence of b in pattern. If b is not in pattern,
+	// badCharSkip[b] is len(pattern).
+	//
+	// Whenever a mismatch is found with byte b in the text, we can safely
+	// shift the matching frame at least badCharSkip[b] until the next time
+	// the matching char could be in alignment.
+	badCharSkip [256]int
+
+	// goodSuffixSkip[i] defines how far we can shift the matching frame given
+	// that the suffix pattern[i+1:] matches, but the byte pattern[i] does
+	// not. There are two cases to consider:
+	//
+	// 1. The matched suffix occurs elsewhere in pattern (with a different
+	// byte preceding it that we might possibly match). In this case, we can
+	// shift the matching frame to align with the next suffix chunk. For
+	// example, the pattern "mississi" has the suffix "issi" next occurring
+	// (in right-to-left order) at index 1, so goodSuffixSkip[3] ==
+	// shift+len(suffix) == 3+4 == 7.
+	//
+	// 2. If the matched suffix does not occur elsewhere in pattern, then the
+	// matching frame may share part of its prefix with the end of the
+	// matching suffix. In this case, goodSuffixSkip[i] will contain how far
+	// to shift the frame to align this portion of the prefix to the
+	// suffix. For example, in the pattern "abcxxxabc", when the first
+	// mismatch from the back is found to be in position 3, the matching
+	// suffix "xxabc" is not found elsewhere in the pattern. However, its
+	// rightmost "abc" (at position 6) is a prefix of the whole pattern, so
+	// goodSuffixSkip[3] == shift+len(suffix) == 6+5 == 11.
+	goodSuffixSkip []int
+}
+
+func makeByteFinder(pattern []byte) *byteFinder {
+	f := &byteFinder{
+		pattern:        pattern,
+		goodSuffixSkip: make([]int, len(pattern)),
+	}
+	// last is the index of the last character in the pattern.
+	last := len(pattern) - 1
+
+	// Build bad character table.
+	// Bytes not in the pattern can skip one pattern's length.
+	for i := range f.badCharSkip {
+		f.badCharSkip[i] = len(pattern)
+	}
+	// The loop condition is < instead of <= so that the last byte does not
+	// have a zero distance to itself. Finding this byte out of place implies
+	// that it is not in the last position.
+	for i := 0; i < last; i++ {
+		f.badCharSkip[pattern[i]] = last - i
+	}
+
+	// Build good suffix table.
+	// First pass: set each value to the next index which starts a prefix of
+	// pattern.
+	lastPrefix := last
+	for i := last; i >= 0; i-- {
+		if bytes.HasPrefix(pattern, pattern[i+1:]) {
+			lastPrefix = i + 1
+		}
+		// lastPrefix is the shift, and (last-i) is len(suffix).
+		f.goodSuffixSkip[i] = lastPrefix + last - i
+	}
+	// Second pass: find repeats of pattern's suffix starting from the front.
+	for i := 0; i < last; i++ {
+		lenSuffix := longestCommonSuffix(pattern, pattern[1:i+1])
+		if pattern[i-lenSuffix] != pattern[last-lenSuffix] {
+			// (last-i) is the shift, and lenSuffix is len(suffix).
+			f.goodSuffixSkip[last-lenSuffix] = lenSuffix + last - i
+		}
+	}
+
+	return f
+}
+
+func longestCommonSuffix(a, b []byte) (i int) {
+	for ; i < len(a) && i < len(b); i++ {
+		if a[len(a)-1-i] != b[len(b)-1-i] {
+			break
+		}
+	}
+	return
+}
+
+// next returns the index in text of the first occurrence of the pattern. If
+// the pattern is not found, it returns -1.
+func (f *byteFinder) next(text []byte) int {
+	i := len(f.pattern) - 1
+	for i < len(text) {
+		// Compare backwards from the end until the first unmatching character.
+		j := len(f.pattern) - 1
+		for j >= 0 && text[i] == f.pattern[j] {
+			i--
+			j--
+		}
+		if j < 0 {
+			return i + 1 // match
+		}
+		i += max(f.badCharSkip[text[i]], f.goodSuffixSkip[j])
+	}
+	return -1
+}
+
+func max(a, b int) int {
+	if a > b {
+		return a
+	}
+	return b
+}