Skip to content

Commit

Permalink
SplitFFISSpreadsheet: logging, cleanup
Browse files Browse the repository at this point in the history
  • Loading branch information
pearkes committed May 15, 2023
1 parent c931499 commit 9e6536b
Show file tree
Hide file tree
Showing 4 changed files with 59 additions and 38 deletions.
9 changes: 6 additions & 3 deletions cmd/SplitFFISSpreadsheet/handler.go
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,10 @@ func handleS3EventWithConfig(cfg aws.Config, ctx context.Context, s3Event events

defer resp.Body.Close()

parsedOpportunities, err := parseXLSXFile(resp.Body)
log.Info(logger, "Parsing excel file")

parsedOpportunities, err := parseXLSXFile(resp.Body, logger)

if err != nil {
log.Error(logger, "Error parsing excel file: ", err)
return err
Expand Down Expand Up @@ -123,7 +126,7 @@ func handleS3EventWithConfig(cfg aws.Config, ctx context.Context, s3Event events
return nil
}

// processOpportunities
// processOpportunities consumes opportunities from the channel and uploads them to S3.
func processOpportunities(ctx context.Context, svc *s3.Client, ch <-chan opportunity) (errs error) {
span, ctx := tracer.StartSpanFromContext(ctx, "processing.worker")

Expand Down Expand Up @@ -166,7 +169,7 @@ func processOpportunities(ctx context.Context, svc *s3.Client, ch <-chan opportu
}
}

// processOpportunity
// processOpportunity marshals the opportunity to JSON and uploads it to S3.
func processOpportunity(ctx context.Context, svc S3ReadWriteObjectAPI, opp opportunity) error {
logger := log.With(logger,
"opportunity_id", opp.GrantID, "opportunity_number", opp.OppNumber)
Expand Down
54 changes: 36 additions & 18 deletions cmd/SplitFFISSpreadsheet/sheet.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
package main

import (
"fmt"
"io"
"net/url"
"regexp"
"strconv"
"time"

"github.com/usdigitalresponse/grants-ingest/internal/log"
"github.com/usdigitalresponse/grants-ingest/pkg/grantsSchemas/ffis"
"github.com/xuri/excelize/v2"
)
Expand All @@ -17,11 +17,21 @@ func parseEligibility(value string) bool {
return value == "X"
}

// parseXLSXFile takes a filepath to an xlsx file and parses it into a slice of
// ffis.FFISFundingOpportunity. It will only return opportunities that have a valid
// opportunity number, and will not return an error on individual cell parsing
// issues.
func parseXLSXFile(r io.Reader) ([]ffis.FFISFundingOpportunity, error) {
// parseXLSXFile is a function that reads and processes an Excel file stream, converting the data into a slice
// of ffis.FFISFundingOpportunity objects. The file is expected to be provided as an io.Reader.
// The function filters and retains only those funding opportunities that possess a valid grant ID.
//
// Any errors encountered during the parsing of individual cells within the Excel file are not returned as function errors,
// but are instead logged at the WARN level, accompanied by the associated row and column indices for easy identification.
//
// Parameters:
// r: The io.Reader providing the Excel file stream to be parsed.
// logger: The log.Logger used to log any parsing errors at the WARN level.
//
// Returns:
// A slice of ffis.FFISFundingOpportunity objects representing the parsed funding opportunities from the Excel file.
// An error is returned if the parsing process fails at a level beyond individual cell parsing.
func parseXLSXFile(r io.Reader, logger log.Logger) ([]ffis.FFISFundingOpportunity, error) {
xlFile, err := excelize.OpenReader(r)

if err != nil {
Expand All @@ -30,25 +40,30 @@ func parseXLSXFile(r io.Reader) ([]ffis.FFISFundingOpportunity, error) {

defer func() {
if err := xlFile.Close(); err != nil {
fmt.Println(err)
log.Error(logger, "Error closing excel file", err)
}
}()

// We assume the excel file only has one sheet
sheet := "Sheet1"

// Returns all rows in the sheet. Note this assumes the sheet is somewhat limited in
// size, and will not scale to extremely large worksheets (memory overhead)
rows, err := xlFile.GetRows(sheet)
if err != nil {
return nil, err
}

var opportunities []ffis.FFISFundingOpportunity

// Tracks if the iterator has found headers for the sheet
// Tracks if the iterator has found headers for the sheet. A header
// is a column header, like "CFDA", "Opportunity Title", etc.
foundHeaders := false

// The sheet has rows as categories that we want to assign to
// each opportunity underneath that category, so we re-assign this
// as we iterate through the rows
// as we iterate through the rows. This could be something like
// "Inflation Reduction Act".
//
// TODO: Is this the right name for this metadata?
opportunityCategory := ""
Expand All @@ -58,6 +73,8 @@ rowLoop:
opportunity := ffis.FFISFundingOpportunity{}

for colIndex, cell := range row {
logger := log.With(logger, "row_index", row, "column_index", colIndex)

// We assume the first column header is "CFDA", if it is,
// we're in the headers row, so skip it and set the flag that
// the content follows
Expand Down Expand Up @@ -87,8 +104,9 @@ rowLoop:
if err != nil {
return nil, err
}
// If we don't match a CFDA number, we assume it's a opportunity category
// and continue

// If we don't match a CFDA number and the row isn't blank, we
// assume it's an opportunity category and continue
if !r.MatchString(cell) {
opportunityCategory = cell
continue rowLoop
Expand All @@ -113,7 +131,7 @@ rowLoop:
num, err := strconv.ParseInt(cell, 10, 64)
// If we can't parse the funding amount, just skip the column
if err != nil {
fmt.Println("Error parsing estimated funding: ", err)
log.Warn(logger, "Error parsing estimated funding", err)
continue
}
opportunity.EstimatedFunding = num
Expand All @@ -126,14 +144,14 @@ rowLoop:
// need to increment the index because Excel is not zero-indexed
cellAxis, err := excelize.CoordinatesToCellName(colIndex+1, rowIndex+1)
if err != nil {
fmt.Println("Error parsing cell axis for grant ID: ", err)
log.Warn(logger, "Error parsing cell axis for grant ID", err)
continue
}

hasLink, target, err := xlFile.GetCellHyperLink(sheet, cellAxis)
if err != nil {
// log this, it is not worth aborting the whole extraction for
fmt.Println("Error getting cell hyperlink for grant ID: ", err)
log.Warn(logger, "Error getting cell hyperlink for grant ID", err)
continue
}

Expand All @@ -142,14 +160,14 @@ rowLoop:
if hasLink {
url, err := url.Parse(target)
if err != nil {
fmt.Println("Error parsing link for grant ID: ", err)
log.Warn(logger, "Error parsing link for grant ID", err)
continue
}

// The opportunity ID should be a < 20 digit numeric value
oppID, err := strconv.ParseInt(url.Query().Get("oppId"), 10, 64)
if err != nil {
fmt.Println("Error parsing opportunity ID: ", err)
log.Warn(logger, "Error parsing opportunity ID", err)
continue
}

Expand All @@ -172,7 +190,7 @@ rowLoop:
// and not the whole row
t, err := time.Parse("1-2-06", cell)
if err != nil {
fmt.Println("Error parsing date: ", err)
log.Warn(logger, "Error parsing date", err)
continue
}
opportunity.DueDate = t
Expand All @@ -185,7 +203,7 @@ rowLoop:
}

// Only add valid opportunities
if opportunity.CFDA != "" {
if opportunity.GrantID > 0 {
opportunities = append(opportunities, opportunity)
}
}
Expand Down
6 changes: 5 additions & 1 deletion cmd/SplitFFISSpreadsheet/sheet_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"testing"
"time"

"github.com/go-kit/log"
"github.com/stretchr/testify/assert"
"github.com/usdigitalresponse/grants-ingest/pkg/grantsSchemas/ffis"
)
Expand All @@ -13,7 +14,10 @@ func TestParseXLSXFile_good(t *testing.T) {
excelFixture, err := os.Open("fixtures/example_spreadsheet.xlsx")
assert.NoError(t, err, "Error opening spreadsheet fixture")

opportunities, err := parseXLSXFile(excelFixture)
// Ignore logging in this test
logger = log.NewNopLogger()

opportunities, err := parseXLSXFile(excelFixture, logger)
assert.NoError(t, err)
assert.NotNil(t, opportunities)

Expand Down
28 changes: 12 additions & 16 deletions pkg/grantsSchemas/ffis/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -10,29 +10,25 @@ type FFISMessageDownload struct {

// Represents a funding opportunity sourced from an FFIS spreadsheet
type FFISFundingOpportunity struct {
CFDA string `json:"cfda"` // eg. 11.525
OppTitle string `json:"opportunity_title"` // eg. "FY 2020 Community Connect Grant Program"

// In the FFIS spreadsheet, this is the header row for a group of opportunities
OppCategory string `json:"opportunity_category"` // eg. Inflation Reduction Act

Agency string `json:"opportunity_agency"` // eg. Forest Service
EstimatedFunding int64 `json:"estimated_funding"` // eg. $25,000,000
ExpectedAwards string `json:"expected_awards"` // eg. 10 or N/A
OppNumber string `json:"opportunity_number"` // eg. USDA-FS-2020-01
GrantID int64 `json:"grant_id"` // eg. 347509
Eligibility FFISFundingEligibility `json:"eligibility"`
CFDA string `json:"cfda"` // eg. 11.525
DueDate time.Time `json:"due_date"`

Match bool `json:"match"`
Eligibility FFISFundingEligibility `json:"eligibility"`
EstimatedFunding int64 `json:"estimated_funding"` // eg. $25,000,000
ExpectedAwards string `json:"expected_awards"` // eg. 10 or N/A
GrantID int64 `json:"grant_id"` // eg. 347509
Match bool `json:"match"`
OppCategory string `json:"opportunity_category"` // eg. Inflation Reduction Act
OppNumber string `json:"opportunity_number"` // eg. USDA-FS-2020-01
OppTitle string `json:"opportunity_title"` // eg. "FY 2020 Community Connect Grant Program"
}

// Elegibility for FFIS funding opportunities as presented in FFIS spreadsheets
type FFISFundingEligibility struct {
State bool `json:"state"` // State Governments
Local bool `json:"local"` // Local Governments
Tribal bool `json:"tribal"` // Tribal Governments
HigherEducation bool `json:"higher_education"` // Institutions of Higher Education
Local bool `json:"local"` // Local Governments
NonProfits bool `json:"non_profits"` // Non-profits
Other bool `json:"other"` // Other/see announcement
State bool `json:"state"` // State Governments
Tribal bool `json:"tribal"` // Tribal Governments
}

0 comments on commit 9e6536b

Please sign in to comment.