Skip to content

Commit

Permalink
fix: potential goroutine leaks on ReadRows (#57)
Browse files Browse the repository at this point in the history
* fix: potential goroutine leaks on ReadRows

* tests: add unit-test to catch goroutine leaks on ReadRows
  • Loading branch information
muktihari authored May 31, 2024
1 parent f3dd3ae commit e3df699
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 15 deletions.
7 changes: 7 additions & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"fmt"
"io"
"sync"
)

// XlsxFile defines a populated XLSX file struct.
Expand All @@ -14,12 +15,16 @@ type XlsxFile struct {
sheetFiles map[string]*zip.File
sharedStrings []string
dateStyles map[int]bool

doneCh chan struct{} // doneCh serves as a signal to abort unfinished operations.
}

// XlsxFileCloser wraps XlsxFile to be able to close an open file
type XlsxFileCloser struct {
zipReadCloser *zip.ReadCloser
XlsxFile

once sync.Once // once performs actions exactly once, e.g. closing a channel.
}

// getFileForName finds and returns a *zip.File by it's display name from within an archive.
Expand Down Expand Up @@ -65,6 +70,7 @@ func (xl *XlsxFileCloser) Close() error {
if xl == nil {
return nil
}
xl.once.Do(func() { close(xl.doneCh) })
return xl.zipReadCloser.Close()
}

Expand Down Expand Up @@ -159,6 +165,7 @@ func (x *XlsxFile) init(zipReader *zip.Reader) error {
x.Sheets = sheets
x.sheetFiles = *sheetFiles
x.dateStyles = *dateStyles
x.doneCh = make(chan struct{})

return nil
}
23 changes: 23 additions & 0 deletions file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"archive/zip"
"io/ioutil"
"os"
"runtime"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -142,3 +143,25 @@ func TestGetSheetFileForSheetName(t *testing.T) {
})
}
}

func TestGoroutineLeaksOnReadRows(t *testing.T) {
f, err := OpenFile("test/test-small.xlsx")
require.NoError(t, err)

usedByTest := runtime.NumGoroutine()

rowChannel := f.ReadRows(f.Sheets[0])
n := runtime.NumGoroutine() - usedByTest
require.Equal(t, 1, n, "goroutine spawned more than once")

<-rowChannel // pull one row

f.Close() // Close XlsxFile
runtime.GC() // Force GC to run

n = runtime.NumGoroutine() - usedByTest
require.Equal(t, 0, n, "goroutine leaks found")

_, ok := <-rowChannel
require.Equal(t, false, ok, "channel should be closed")
}
36 changes: 21 additions & 15 deletions rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package xlsxreader
import (
"encoding/xml"
"fmt"
"io"
"strconv"
"strings"
)
Expand Down Expand Up @@ -258,18 +259,11 @@ func (x *XlsxFile) getCellType(r rawCell) CellType {
func (x *XlsxFile) readSheetRows(sheet string, ch chan<- Row) {
defer close(ch)

file, ok := x.sheetFiles[sheet]
if !ok {
ch <- Row{
Error: fmt.Errorf("unable to open sheet %s", sheet),
}
return
}

xmlFile, err := file.Open()
xmlFile, err := x.openSheetFile(sheet)
if err != nil {
ch <- Row{
Error: err,
select {
case <-x.doneCh:
case ch <- Row{Error: err}:
}
return
}
Expand All @@ -289,12 +283,24 @@ func (x *XlsxFile) readSheetRows(sheet string, ch chan<- Row) {
if len(row.Cells) < 1 && row.Error == nil {
continue
}
ch <- row
select {
case <-x.doneCh:
return
case ch <- row:
}
}
}
}
}

func (x *XlsxFile) openSheetFile(sheet string) (io.ReadCloser, error) {
file, ok := x.sheetFiles[sheet]
if !ok {
return nil, fmt.Errorf("unable to open sheet %s", sheet)
}
return file.Open()
}

// parseRow parses the raw XML of a row element into a consumable Row struct.
// The Row struct returned will contain any errors that occurred either in
// interrogating values, or in parsing the XML.
Expand Down Expand Up @@ -353,9 +359,9 @@ func (x *XlsxFile) parseRawCells(rawCells []rawCell, index int) ([]Cell, error)
// In order to provide a simplistic interface, this method returns a channel that can be
// range-d over.
//
// This method has one notable drawback however - the entire file must be consumed before
// the channel will be closed. Reading only some of the values will leave an orphaned
// goroutine and channel behind.
// If you want to read only some of the values, please ensure that the Close() method is
// called after processing the entire file to stop all active goroutines and prevent any
// potential goroutine leaks.
//
// Notes:
// Xlsx sheets may omit cells which are empty, meaning a row may not have continuous cell
Expand Down

0 comments on commit e3df699

Please sign in to comment.