Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: potential goroutine leaks on ReadRows #57

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions file.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"bytes"
"fmt"
"io"
"sync"
)

// XlsxFile defines a populated XLSX file struct.
Expand All @@ -14,12 +15,16 @@ type XlsxFile struct {
sheetFiles map[string]*zip.File
sharedStrings []string
dateStyles map[int]bool

doneCh chan struct{} // doneCh serves as a signal to abort unfinished operations.
}

// XlsxFileCloser wraps XlsxFile to be able to close an open file
type XlsxFileCloser struct {
zipReadCloser *zip.ReadCloser
XlsxFile

once sync.Once // once performs actions exactly once, e.g. closing a channel.
}

// getFileForName finds and returns a *zip.File by it's display name from within an archive.
Expand Down Expand Up @@ -65,6 +70,7 @@ func (xl *XlsxFileCloser) Close() error {
if xl == nil {
return nil
}
xl.once.Do(func() { close(xl.doneCh) })
return xl.zipReadCloser.Close()
}

Expand Down Expand Up @@ -159,6 +165,7 @@ func (x *XlsxFile) init(zipReader *zip.Reader) error {
x.Sheets = sheets
x.sheetFiles = *sheetFiles
x.dateStyles = *dateStyles
x.doneCh = make(chan struct{})

return nil
}
23 changes: 23 additions & 0 deletions file_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"archive/zip"
"io/ioutil"
"os"
"runtime"
"testing"

"github.com/stretchr/testify/require"
Expand Down Expand Up @@ -142,3 +143,25 @@ func TestGetSheetFileForSheetName(t *testing.T) {
})
}
}

func TestGoroutineLeaksOnReadRows(t *testing.T) {
f, err := OpenFile("test/test-small.xlsx")
require.NoError(t, err)

usedByTest := runtime.NumGoroutine()

rowChannel := f.ReadRows(f.Sheets[0])
n := runtime.NumGoroutine() - usedByTest
require.Equal(t, 1, n, "goroutine spawned more than once")

<-rowChannel // pull one row

f.Close() // Close XlsxFile
runtime.GC() // Force GC to run

n = runtime.NumGoroutine() - usedByTest
require.Equal(t, 0, n, "goroutine leaks found")

_, ok := <-rowChannel
require.Equal(t, false, ok, "channel should be closed")
}
36 changes: 21 additions & 15 deletions rows.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ package xlsxreader
import (
"encoding/xml"
"fmt"
"io"
"strconv"
"strings"
)
Expand Down Expand Up @@ -258,18 +259,11 @@ func (x *XlsxFile) getCellType(r rawCell) CellType {
func (x *XlsxFile) readSheetRows(sheet string, ch chan<- Row) {
defer close(ch)

file, ok := x.sheetFiles[sheet]
if !ok {
ch <- Row{
Error: fmt.Errorf("unable to open sheet %s", sheet),
}
return
}

xmlFile, err := file.Open()
xmlFile, err := x.openSheetFile(sheet)
if err != nil {
ch <- Row{
Error: err,
select {
case <-x.doneCh:
case ch <- Row{Error: err}:
}
return
}
Expand All @@ -289,12 +283,24 @@ func (x *XlsxFile) readSheetRows(sheet string, ch chan<- Row) {
if len(row.Cells) < 1 && row.Error == nil {
continue
}
ch <- row
select {
case <-x.doneCh:
return
case ch <- row:
}
}
}
}
}

func (x *XlsxFile) openSheetFile(sheet string) (io.ReadCloser, error) {
file, ok := x.sheetFiles[sheet]
if !ok {
return nil, fmt.Errorf("unable to open sheet %s", sheet)
}
return file.Open()
}

// parseRow parses the raw XML of a row element into a consumable Row struct.
// The Row struct returned will contain any errors that occurred either in
// interrogating values, or in parsing the XML.
Expand Down Expand Up @@ -353,9 +359,9 @@ func (x *XlsxFile) parseRawCells(rawCells []rawCell, index int) ([]Cell, error)
// In order to provide a simplistic interface, this method returns a channel that can be
// range-d over.
//
// This method has one notable drawback however - the entire file must be consumed before
// the channel will be closed. Reading only some of the values will leave an orphaned
// goroutine and channel behind.
// If you want to read only some of the values, please ensure that the Close() method is
// called after processing the entire file to stop all active goroutines and prevent any
// potential goroutine leaks.
//
// Notes:
// Xlsx sheets may omit cells which are empty, meaning a row may not have continuous cell
Expand Down