Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(crit): add SearchPattern method on MemoryReader #163

Merged
merged 2 commits into from
Jul 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 99 additions & 0 deletions crit/mempages.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,10 @@
"bytes"
"errors"
"fmt"
"io"
"os"
"path/filepath"
"regexp"

"github.com/checkpoint-restore/go-criu/v7/crit/images/mm"
"github.com/checkpoint-restore/go-criu/v7/crit/images/pagemap"
Expand Down Expand Up @@ -193,3 +195,100 @@

return size, nil
}

// PatternMatch represents a match when searching for a pattern in memory.
type PatternMatch struct {
Vaddr uint64
Length int
Context int
Match string
}

// SearchPattern searches for a pattern in the process memory pages.
func (mr *MemoryReader) SearchPattern(pattern string, escapeRegExpCharacters bool, context, chunkSize int) ([]PatternMatch, error) {
if context < 0 {
return nil, errors.New("context size cannot be negative")
}

// Set a default chunk size of 10MB to be read at a time
if chunkSize <= 0 {
chunkSize = 10 * 1024 * 1024
}

// Escape regular expression characters in the pattern
if escapeRegExpCharacters {
pattern = regexp.QuoteMeta(pattern)
}

regexPattern, err := regexp.Compile(pattern)
rst0git marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err

Check warning on line 225 in crit/mempages.go

View check run for this annotation

Codecov / codecov/patch

crit/mempages.go#L225

Added line #L225 was not covered by tests
}

var results []PatternMatch

f, err := os.Open(filepath.Join(mr.checkpointDir, fmt.Sprintf("pages-%d.img", mr.pagesID)))
rst0git marked this conversation as resolved.
Show resolved Hide resolved
if err != nil {
return nil, err

Check warning on line 232 in crit/mempages.go

View check run for this annotation

Codecov / codecov/patch

crit/mempages.go#L232

Added line #L232 was not covered by tests
}
defer f.Close()

for _, entry := range mr.pagemapEntries {
startAddr := entry.GetVaddr()
endAddr := startAddr + uint64(entry.GetNrPages())*uint64(mr.pageSize)

initialOffset := uint64(0)
for _, e := range mr.pagemapEntries {
if e == entry {
break
}
initialOffset += uint64(e.GetNrPages()) * uint64(mr.pageSize)
}

for offset := uint64(0); offset < endAddr-startAddr; offset += uint64(chunkSize) {
readSize := chunkSize
if endAddr-startAddr-offset < uint64(chunkSize) {
readSize = int(endAddr - startAddr - offset)
}

buff := make([]byte, readSize)
if _, err := f.ReadAt(buff, int64(initialOffset+offset)); err != nil {
if err == io.EOF {
break

Check warning on line 257 in crit/mempages.go

View check run for this annotation

Codecov / codecov/patch

crit/mempages.go#L256-L257

Added lines #L256 - L257 were not covered by tests
}
return nil, err

Check warning on line 259 in crit/mempages.go

View check run for this annotation

Codecov / codecov/patch

crit/mempages.go#L259

Added line #L259 was not covered by tests
}

// Replace non-printable ASCII characters in the buffer with a question mark (0x3f) to prevent unexpected behavior
// during regex matching. Non-printable characters might cause incorrect interpretation or premature
// termination of strings, leading to inaccuracies in pattern matching.
for i := range buff {
if buff[i] < 32 || buff[i] >= 127 {
buff[i] = 0x3F
}
}

indexes := regexPattern.FindAllIndex(buff, -1)
for _, index := range indexes {
startContext := index[0] - context
if startContext < 0 {
startContext = 0
}

endContext := index[1] + context
if endContext > len(buff) {
endContext = len(buff)
}

results = append(results, PatternMatch{
Vaddr: startAddr + offset + uint64(index[0]),
Length: index[1] - index[0],
Context: context,
Match: string(buff[startContext:endContext]),
})
}
}
}

return results, nil
}
93 changes: 85 additions & 8 deletions crit/mempages_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -248,7 +248,7 @@ func TestGetPsArgsAndEnvVars(t *testing.T) {
}
}

func TestGetShmemSize(t *testing.T) {
func TestSearchPattern(t *testing.T) {
pid, err := getTestImgPID()
if err != nil {
t.Fatal(err)
Expand All @@ -259,15 +259,92 @@ func TestGetShmemSize(t *testing.T) {
t.Fatal(err)
}

size, err := mr.GetShmemSize()
if err != nil {
t.Fatal(err)
testCases := []struct {
name string
pattern string
context int
escapeRegExpCharacters bool
shouldMatch bool
expectedError error
}{
{
name: "PATH environment variable",
pattern: "PATH=",
shouldMatch: true,
},
{
name: "PATH environment variable regex",
pattern: `\bPATH=([^\s]+)\b`,
shouldMatch: true,
},
{
name: "PATH environment variable regex with 10 bytes context",
pattern: `\bPATH=([^\s]+)\b`,
context: 10,
shouldMatch: true,
},
{
name: "PATH environment variable regex with a negative context",
pattern: `\bPATH=([^\s]+)\b`,
context: -1,
expectedError: errors.New("context size cannot be negative"),
},
{
name: "PATH environment variable regex with a large context",
pattern: `\bPATH=([^\s]+)\b`,
context: 100000,
shouldMatch: true,
},
{
name: "Non-existent pattern",
pattern: "NON_EXISTENT_PATTERN",
},
{
name: "PASSWORD environment variable value as regex",
pattern: "123 Hello.*?",
shouldMatch: true,
},
{
name: "PASSWORD environment variable value with regex metacharacters to escape",
pattern: `123 Hello.*?[^]@WORLD(|x)`,
escapeRegExpCharacters: true,
shouldMatch: true,
},
}

// Verify that the shared memory size is as expected (0)
expectedSize := int64(0)
if size != expectedSize {
t.Fatalf("Expected shared memory size: %d, but got: %d", expectedSize, size)
for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
matches, err := mr.SearchPattern(tc.pattern, tc.escapeRegExpCharacters, tc.context, 0)
if err != nil && tc.expectedError == nil {
t.Errorf("Unexpected error for pattern %s: %v", tc.pattern, err)
} else if err == nil && tc.expectedError != nil {
t.Errorf("Expected error for pattern %s: %v", tc.pattern, tc.expectedError)
}

if tc.shouldMatch && len(matches) == 0 {
t.Errorf("Expected to find a match for pattern \"%s\"", tc.pattern)
} else if !tc.shouldMatch && len(matches) > 0 {
t.Errorf("Expected not to find any match for pattern \"%s\"", tc.pattern)
}

for _, match := range matches {
content, err := mr.GetMemPages(match.Vaddr, match.Vaddr+uint64(match.Length))
if err != nil {
t.Fatalf("Failed to get memory pages: %v", err)
}

buff := content.Bytes()
for i := range buff {
if buff[i] < 32 || buff[i] >= 127 {
buff[i] = 0x3F
}
}

if !strings.Contains(match.Match, content.String()) {
t.Errorf("Expected to find %s in matched pattern %s", content.String(), match.Match)
}
}
})
}
}

Expand Down
7 changes: 7 additions & 0 deletions test/loop/loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,13 @@ int main(void)
int res = EXIT_FAILURE;
int start_pipe[2];

// Set a PASSWORD environment variable to test the search pattern
// within process memory pages using regex metacharacters.
if (setenv("PASSWORD", "123 Hello.*?[^]@WORLD(|x)", 1) != 0) {
perror("setenv");
return 1;
}

if (pipe(start_pipe)) {
perror("pipe failed!");
goto out;
Expand Down
Loading