Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Enqueue FFIS email downloads #53

Closed
wants to merge 10 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -51,3 +51,6 @@ cover.html
# Environment variable files
.env
.envrc

# Localstack
volume/
8 changes: 8 additions & 0 deletions Taskfile.yml
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,7 @@ tasks:
deps:
- build-DownloadGrantsGovDB
- build-SplitGrantsGovXMLDB
- build-EnqueueFFISDownload

build-DownloadGrantsGovDB:
desc: Compiles DownloadGrantsGovDB
Expand All @@ -123,3 +124,10 @@ tasks:
- task: build-lambda
vars:
LAMBDA_CMD: SplitGrantsGovXMLDB

build-EnqueueFFISDownload:
desc: Compiles EnqueueFFISDownload
cmds:
- task: build-lambda
vars:
LAMBDA_CMD: EnqueueFFISDownload
22 changes: 22 additions & 0 deletions cmd/EnqueueFFISDownload/fixtures/good.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
MIME-Version: 1.0
Date: Sat, 22 Apr 2023 14:55:26 -0500
Message-ID: <CAJZ0yfPKN1Q@mail.gmail.com>
Subject:
From: FFIS <ffis@ffis.org>
To: Team <team@usdigitalresponse.org>
Content-Type: multipart/alternative; boundary="0000000000008e64aa05f9f22750"

--0000000000008e64aa05f9f22750
Content-Type: text/plain; charset="UTF-8"

Click here to download competitive grant update
<https://mcusercontent.com/123456/files/file-01.xlsx>

-FFIS

--0000000000008e64aa05f9f22750
Content-Type: text/html; charset="UTF-8"

<div dir="ltr"><a href="https://mcusercontent.com/123456/files/file-01.xlsx">Click here to download competitive grant update</a><br clear="all"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><br>-FFIS</div></div></div>

--0000000000008e64aa05f9f22750--
22 changes: 22 additions & 0 deletions cmd/EnqueueFFISDownload/fixtures/missing.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
MIME-Version: 1.0
Date: Sat, 22 Apr 2023 14:55:26 -0500
Message-ID: <CAJZ0yfPKN1Q@mail.gmail.com>
Subject:
From: FFIS <ffis@ffis.org>
To: Team <team@usdigitalresponse.org>
Content-Type: multipart/alternative; boundary="0000000000008e64aa05f9f22750"

--0000000000008e64aa05f9f22750
Content-Type: text/plain; charset="UTF-8"

Click here to download competitive grant update
<https://usdigitalresponse.org>

-FFIS

--0000000000008e64aa05f9f22750
Content-Type: text/html; charset="UTF-8"

<div dir="ltr"><a href="https://mcusercontent.com/123456/files/file-01.xlsx">Click here to download competitive grant update</a><br clear="all"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><br>-FFIS</div></div></div>

--0000000000008e64aa05f9f22750--
25 changes: 25 additions & 0 deletions cmd/EnqueueFFISDownload/fixtures/multiple.eml
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
MIME-Version: 1.0
Date: Sat, 22 Apr 2023 14:55:26 -0500
Message-ID: <CAJZ0yfPKN1Q@mail.gmail.com>
Subject:
From: FFIS <ffis@ffis.org>
To: Team <team@usdigitalresponse.org>
Content-Type: multipart/alternative; boundary="0000000000008e64aa05f9f22750"

--0000000000008e64aa05f9f22750
Content-Type: text/plain; charset="UTF-8"

Click here to download competitive grant update
<https://mcusercontent.com/123456/files/file-01.xlsx>

Click here to download competitive grant update
<https://mcusercontent.com/123456/files/file-01.xlsx>

-FFIS

--0000000000008e64aa05f9f22750
Content-Type: text/html; charset="UTF-8"

<div dir="ltr"><a href="https://mcusercontent.com/123456/files/file-01.xlsx">Click here to download competitive grant update</a><br clear="all"><div><div dir="ltr" class="gmail_signature" data-smartmail="gmail_signature"><br>-FFIS</div></div></div>

--0000000000008e64aa05f9f22750--
135 changes: 135 additions & 0 deletions cmd/EnqueueFFISDownload/handler.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
package main

import (
"bytes"
"context"
"fmt"
"io"
"mime"
"mime/multipart"
"net/mail"
"regexp"
"strings"

"github.com/aws/aws-lambda-go/events"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/sqs"
"github.com/usdigitalresponse/grants-ingest/internal/log"
)

type SQSAPI interface {
SendMessage(ctx context.Context,
params *sqs.SendMessageInput,
optFns ...func(*sqs.Options)) (*sqs.SendMessageOutput, error)
}

type S3API interface {
GetObject(ctx context.Context,
params *s3.GetObjectInput,
optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error)
}

func handleS3Event(ctx context.Context, s3Event events.S3Event, s3client S3API, sqsclient SQSAPI) error {
emailBody, err := getEmailFromS3Event(s3client, s3Event, ctx)
if err != nil {
return log.Errorf("Error reading email from S3", err)
}
defer emailBody.Close()
plaintext, err := plaintextMIMEFromEmailBody(emailBody)
if err != nil {
return log.Errorf("Missing plaintext mime part from email body", err)
}
// Parse the URL from the email body
url, err := parseURLFromEmailBody(plaintext)
if err != nil {
return log.Errorf("Download URL could not be located in email plaintext", err)
}

log.Info(logger, "Parsed URL from email body", "url", url)

// Enqueue the URL for download
err = enqueueURLForDownload(url, sqsclient, ctx)
if err != nil {
return log.Errorf("Failed to enqueue parsed URL", err)
}

return nil
}

func plaintextMIMEFromEmailBody(email io.Reader) (string, error) {
msg, err := mail.ReadMessage(email)
if err != nil {
return "", err
}
mediaType, params, err := mime.ParseMediaType(msg.Header.Get("Content-Type"))
if mediaType != "multipart/alternative" {
return "", fmt.Errorf("expected multipart/alternative, got %s", mediaType)
}
if err != nil {
return "", err
}
mr := multipart.NewReader(msg.Body, params["boundary"])
for {
p, err := mr.NextPart()
if err == io.EOF {
break
}
if err != nil {
return "", err
}
if strings.HasPrefix(p.Header.Get("Content-Type"), "text/plain") {
buf := new(bytes.Buffer)
_, err := buf.ReadFrom(p)
if err != nil {
return "", err
}
return buf.String(), nil
}
}

return "", fmt.Errorf("no text/plain part found")
}

func parseURLFromEmailBody(plaintext string) (string, error) {
patternRegex := regexp.MustCompile(env.URLPattern)
matches := patternRegex.FindStringSubmatch(plaintext)
if len(matches) == 0 {
return "", fmt.Errorf("no matches found")
}
if len(matches) > 1 {
return "", fmt.Errorf("multiple matches found")
}
return matches[0], nil
}

func enqueueURLForDownload(ctx context.Context, url string, client SQSAPI) error {
message := sqs.SendMessageInput{
MessageBody: aws.String(url),
QueueUrl: aws.String(env.DestinationQueueURL),
}
output, err := client.SendMessage(ctx, &message)

if err != nil {
return err
}
log.Info(logger, "Sent SQS message", "messageId", *output.MessageId)
return nil
}

func getEmailFromS3Event(s3client S3API, s3Event events.S3Event, ctx context.Context) (io.ReadCloser, error) {
bucket := s3Event.Records[0].S3.Bucket.Name
uploadedFile := s3Event.Records[0].S3.Object.Key
logger := log.With(logger, "bucket", bucket, "key", key)
log.Debug(logger, "Reading from bucket")
// Get the email body
resp, err := s3client.GetObject(ctx, &s3.GetObjectInput{
Bucket: aws.String(bucket),
Key: aws.String(uploadedFile),
})
if err != nil {
return nil, err
}
log.Info(logger, "Retrieved new email file")
return resp.Body, nil
}
103 changes: 103 additions & 0 deletions cmd/EnqueueFFISDownload/handler_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,103 @@
package main

import (
"bytes"
"context"
"io"
"os"
"testing"

"github.com/aws/aws-lambda-go/events"
"github.com/aws/aws-sdk-go-v2/aws"
"github.com/aws/aws-sdk-go-v2/service/s3"
"github.com/aws/aws-sdk-go-v2/service/sqs"
"github.com/go-kit/log"
)

type MockS3 struct {
content string
}

func (mocks3 *MockS3) GetObject(ctx context.Context,
params *s3.GetObjectInput,
optFns ...func(*s3.Options)) (*s3.GetObjectOutput, error) {
contentBytes := []byte(mocks3.content)
return &s3.GetObjectOutput{
Body: io.NopCloser(bytes.NewReader(contentBytes)),
ContentLength: int64(len(contentBytes)),
}, nil
}

type MockSQS struct {
message *string
}

func (mocksqs *MockSQS) SendMessage(ctx context.Context,
params *sqs.SendMessageInput,
optFns ...func(*sqs.Options)) (*sqs.SendMessageOutput, error) {
mocksqs.message = params.MessageBody
output := &sqs.SendMessageOutput{
MessageId: aws.String("123456789012345678901234567890"),
}
return output, nil
}

func TestHandleS3Event(t *testing.T) {
logger = log.NewNopLogger()
env.URLPattern = "https://mcusercontent.com/.+\\.xlsx"
var tests = []struct {
emailFixture, expectedURL string
}{
{"good.eml", "https://mcusercontent.com/123456/files/file-01.xlsx"},
{"missing.eml", ""},
{"multiple.eml", ""},
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Despite this test cases's fixture looking correct as far as I can tell, I don't see any coverage on the if len(matches) > 1 branch of parseURLFromEmailBody(). Would you be able to look into that?

Hint: if you have Taskfile installed, you can run task test to generate an HTML coverage report.

}

for _, test := range tests {
t.Run(test.emailFixture, func(t *testing.T) {
content, err := os.ReadFile("./fixtures/" + test.emailFixture)
if err != nil {
t.Errorf("Error opening file: %v", err)
}
mocks3, mocksqs := getMockClients()
mocks3.content = string(content)
ctx := context.Background()
s3Event := events.S3Event{
Records: []events.S3EventRecord{
{
S3: events.S3Entity{
Bucket: events.S3Bucket{
Name: "test-bucket",
},
Object: events.S3Object{
Key: "test/email/file.eml",
},
},
},
},
}

err = handleS3Event(ctx, s3Event, mocks3, mocksqs)

if test.expectedURL != "" {
if err != nil {
t.Errorf("Error parsing S3 event: %v", err)
}
if *mocksqs.message != test.expectedURL {
t.Errorf("Expected message %v, got %v", test.expectedURL, mocksqs.message)
}
} else {
// parse expected bad message
if mocksqs.message == nil && test.expectedURL != "" {
t.Errorf("Expected message for %s to be empty", test.emailFixture)
}
}
})
}
}

func getMockClients() (*MockS3, *MockSQS) {
mocks3 := MockS3{content: "test"}
mocksqs := MockSQS{}
return &mocks3, &mocksqs
}
Loading