Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Allow option to tolerate PixelData VL and expected length mismatches. #245

Merged
merged 8 commits into from
Sep 7, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 12 additions & 6 deletions cmd/dicomutil/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,10 +21,11 @@ import (
var GitVersion = "unknown"

var (
version = flag.Bool("version", false, "print current version and exit")
filepath = flag.String("path", "", "path")
extractImagesStream = flag.Bool("extract-images-stream", false, "Extract images using frame streaming capability")
printJSON = flag.Bool("json", false, "Print dataset as JSON")
version = flag.Bool("version", false, "print current version and exit")
filepath = flag.String("path", "", "path")
extractImagesStream = flag.Bool("extract-images-stream", false, "Extract images using frame streaming capability")
printJSON = flag.Bool("json", false, "Print dataset as JSON")
allowPixelDataVLMismatch = flag.Bool("allow-pixel-data-mismatch", false, "Allows the pixel data mismatch")
)

// FrameBufferSize represents the size of the *Frame buffered channel for streaming calls
Expand Down Expand Up @@ -57,7 +58,11 @@ func main() {
if *extractImagesStream {
ds = parseWithStreaming(f, info.Size())
} else {
data, err := dicom.Parse(f, info.Size(), nil)
var opts []dicom.ParseOption
if *allowPixelDataVLMismatch {
opts = append(opts, dicom.AllowMismatchPixelDataLength())
}
data, err := dicom.Parse(f, info.Size(), nil, opts...)
if err != nil {
log.Fatalf("error parsing data: %v", err)
}
Expand Down Expand Up @@ -131,7 +136,8 @@ func writeStreamingFrames(frameChan chan *frame.Frame, doneWG *sync.WaitGroup) {
func generateImage(fr *frame.Frame, frameIndex int, frameSuffix string, wg *sync.WaitGroup) {
i, err := fr.GetImage()
if err != nil {
log.Fatal("Error while getting image")
fmt.Printf("Error while getting image: %v\n", err)
return
}

ext := ".jpg"
Expand Down
4 changes: 3 additions & 1 deletion dataset.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@ var ErrorElementNotFound = errors.New("element not found")
// within this Dataset (including Elements nested within Sequences).
type Dataset struct {
Elements []*Element `json:"elements"`

opts parseOptSet
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can take care of this in another PR, but just putting a note for myself: I think if we're going to add this set of options to Dataset, it makes sense that any person who holds a Dataset should be able to inspect the opts ...ParseOption the Dataset was parsed with. Typically I don't like just tacking on options to another struct to "pass it through" but in this scenario I think this makes sense because it seems useful for anyone who has a Dataset to see the options it was parsed with.

}

// FindElementByTag searches through the dataset and returns a pointer to the matching element.
Expand Down Expand Up @@ -183,7 +185,7 @@ func (d *Dataset) String() string {
b.WriteString(fmt.Sprintf("%s VR: %s\n", tabs, elem.e.ValueRepresentation))
b.WriteString(fmt.Sprintf("%s VR Raw: %s\n", tabs, elem.e.RawValueRepresentation))
b.WriteString(fmt.Sprintf("%s VL: %d\n", tabs, elem.e.ValueLength))
b.WriteString(fmt.Sprintf("%s Value: %d\n", tabs, elem.e.Value))
b.WriteString(fmt.Sprintf("%s Value: %s\n", tabs, elem.e.Value.String()))
b.WriteString(fmt.Sprintf("%s]\n\n", tabs))
}
return b.String()
Expand Down
4 changes: 2 additions & 2 deletions dataset_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ func ExampleDataset_String() {
// VR: VRInt32List
// VR Raw: UL
// VL: 0
// Value: &{[100]}
// Value: [100]
// ]
//
// [
Expand All @@ -277,7 +277,7 @@ func ExampleDataset_String() {
// VR: VRInt32List
// VR Raw: UL
// VL: 0
// Value: &{[200]}
// Value: [200]
// ]

}
20 changes: 16 additions & 4 deletions element.go
Original file line number Diff line number Diff line change
Expand Up @@ -296,8 +296,12 @@ func (s *sequencesValue) MarshalJSON() ([]byte, error) {

// PixelDataInfo is a representation of DICOM PixelData.
type PixelDataInfo struct {
Frames []frame.Frame
IsEncapsulated bool `json:"isEncapsulated"`
Frames []frame.Frame
// ParseErr indicates if there was an error when reading this Frame from the DICOM.
// If this is set, this means fallback behavior was triggered to blindly write the PixelData bytes to an encapsulated frame.
// The ParseErr will contain details about the specific error encountered.
ParseErr error `json:"parseErr"`
IsEncapsulated bool `json:"isEncapsulated"`
Offsets []uint32
}

Expand All @@ -310,8 +314,16 @@ func (e *pixelDataValue) isElementValue() {}
func (e *pixelDataValue) ValueType() ValueType { return PixelData }
func (e *pixelDataValue) GetValue() interface{} { return e.PixelDataInfo }
func (e *pixelDataValue) String() string {
// TODO: consider adding more sophisticated formatting
return ""
if len(e.Frames) == 0 {
return "empty pixel data"
}
if e.IsEncapsulated {
return fmt.Sprintf("encapsulated FramesLength=%d Frame[0] size=%d", len(e.Frames), len(e.Frames[0].EncapsulatedData.Data))
}
if e.ParseErr != nil {
return fmt.Sprintf("parseErr err=%s FramesLength=%d Frame[0] size=%d", e.ParseErr.Error(), len(e.Frames), len(e.Frames[0].EncapsulatedData.Data))
}
return fmt.Sprintf("FramesLength=%d FrameSize rows=%d cols=%d", len(e.Frames), e.Frames[0].NativeData.Rows, e.Frames[0].NativeData.Cols)
}

func (e *pixelDataValue) MarshalJSON() ([]byte, error) {
Expand Down
27 changes: 19 additions & 8 deletions parse.go
Original file line number Diff line number Diff line change
Expand Up @@ -50,12 +50,15 @@ var (
// has been fully parsed. Users using one of the other Parse APIs should not
// need to use this.
ErrorEndOfDICOM = errors.New("this indicates to the caller of Next() that the DICOM has been fully parsed")

// ErrorMismatchPixelDataLength indicates that the size calculated from DICOM mismatch the VL.
ErrorMismatchPixelDataLength = errors.New("the size calculated from DICOM elements and the PixelData element's VL are mismatched")
)

// Parse parses the entire DICOM at the input io.Reader into a Dataset of DICOM Elements. Use this if you are
// looking to parse the DICOM all at once, instead of element-by-element.
func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame) (Dataset, error) {
p, err := NewParser(in, bytesToRead, frameChan)
func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame, opts ...ParseOption) (Dataset, error) {
p, err := NewParser(in, bytesToRead, frameChan, opts...)
if err != nil {
return Dataset{}, err
}
Expand All @@ -76,7 +79,7 @@ func Parse(in io.Reader, bytesToRead int64, frameChan chan *frame.Frame) (Datase

// ParseFile parses the entire DICOM at the given filepath. See dicom.Parse as
// well for a more generic io.Reader based API.
func ParseFile(filepath string, frameChan chan *frame.Frame) (Dataset, error) {
func ParseFile(filepath string, frameChan chan *frame.Frame, opts ...ParseOption) (Dataset, error) {
f, err := os.Open(filepath)
if err != nil {
return Dataset{}, err
Expand All @@ -88,7 +91,7 @@ func ParseFile(filepath string, frameChan chan *frame.Frame) (Dataset, error) {
return Dataset{}, err
}

return Parse(f, info.Size(), frameChan)
return Parse(f, info.Size(), frameChan, opts...)
}

// Parser is a struct that allows a user to parse Elements from a DICOM element-by-element using Next(), which may be
Expand Down Expand Up @@ -131,7 +134,7 @@ func NewParser(in io.Reader, bytesToRead int64, frameChannel chan *frame.Frame,
debug.Log("NewParser: readHeader complete")
}

p.dataset = Dataset{Elements: elems}
p.dataset = Dataset{Elements: elems, opts: optSet}
// TODO(suyashkumar): avoid storing the metadata pointers twice (though not that expensive)
p.metadata = Dataset{Elements: elems}

Expand Down Expand Up @@ -258,16 +261,24 @@ type ParseOption func(*parseOptSet)
// parseOptSet represents the flattened option set after all ParseOptions have been applied.
type parseOptSet struct {
skipMetadataReadOnNewParserInit bool
allowMismatchPixelDataLength bool
}

func toParseOptSet(opts ...ParseOption) *parseOptSet {
optSet := &parseOptSet{}
func toParseOptSet(opts ...ParseOption) parseOptSet {
optSet := parseOptSet{}
for _, opt := range opts {
opt(optSet)
opt(&optSet)
}
return optSet
}

// AllowMismatchPixelDataLength allows parser to ignore an error when the length calculated from elements do not match with value length.
func AllowMismatchPixelDataLength() ParseOption {
return func(set *parseOptSet) {
set.allowMismatchPixelDataLength = true
}
}

// SkipMetadataReadOnNewParserInit makes NewParser skip trying to parse metadata. This will make the Parser default to implicit little endian byte order.
// Any metatata tags found in the dataset will still be available when parsing.
func SkipMetadataReadOnNewParserInit() ParseOption {
Expand Down
103 changes: 72 additions & 31 deletions read.go
Original file line number Diff line number Diff line change
Expand Up @@ -110,9 +110,9 @@ func readValue(r dicomio.Reader, t tag.Tag, vr string, vl uint32, isImplicit boo
case tag.VRUInt16List, tag.VRUInt32List, tag.VRInt16List, tag.VRInt32List, tag.VRTagList:
return readInt(r, t, vr, vl)
case tag.VRSequence:
return readSequence(r, t, vr, vl)
return readSequence(r, t, vr, vl, d)
case tag.VRItem:
return readSequenceItem(r, t, vr, vl)
return readSequenceItem(r, t, vr, vl, d)
case tag.VRPixelData:
return readPixelData(r, t, vr, vl, d, fc)
case tag.VRFloat32List, tag.VRFloat64List:
Expand Down Expand Up @@ -221,14 +221,33 @@ func fillBufferSingleBitAllocated(pixelData []int, d dicomio.Reader, bo binary.B
return nil
}

// readNativeFrames reads NativeData frames from a Decoder based on already parsed pixel information
// that should be available in parsedData (elements like NumberOfFrames, rows, columns, etc)
func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Frame, vl uint32) (pixelData *PixelDataInfo,
bytesRead int, err error) {
func makeErrorPixelData(reader io.Reader, vl uint32, fc chan<- *frame.Frame, parseErr error) (*PixelDataInfo, error) {
data := make([]byte, vl)
_, err := io.ReadFull(reader, data)
if err != nil {
return nil, fmt.Errorf("makeErrorPixelData: read pixelData: %w", err)
}

f := frame.Frame{
EncapsulatedData: frame.EncapsulatedFrame{
Data: data,
},
}

if fc != nil {
fc <- &f
}
image := PixelDataInfo{
IsEncapsulated: false,
ParseErr: parseErr,
Frames: []frame.Frame{f},
}
return &image, nil
}

// readNativeFrames reads NativeData frames from a Decoder based on already parsed pixel information
// that should be available in parsedData (elements like NumberOfFrames, rows, columns, etc)
func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Frame, vl uint32) (pixelData *PixelDataInfo,
bytesToRead int, err error) {
// Parse information from previously parsed attributes that are needed to parse NativeData Frames:
rows, err := parsedData.FindElementByTag(tag.Rows)
if err != nil {
Expand Down Expand Up @@ -269,10 +288,38 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr

debug.Logf("readNativeFrames:\nRows: %d\nCols:%d\nFrames::%d\nBitsAlloc:%d\nSamplesPerPixel:%d", MustGetInts(rows.Value)[0], MustGetInts(cols.Value)[0], nFrames, bitsAllocated, samplesPerPixel)

bytesAllocated := bitsAllocated / 8
bytesToRead = bytesAllocated * samplesPerPixel * pixelsPerFrame * nFrames
if bitsAllocated == 1 {
bytesToRead = pixelsPerFrame * samplesPerPixel / 8 * nFrames
}

skipFinalPaddingByte := false
if uint32(bytesToRead) != vl {
switch {
case uint32(bytesToRead) == vl-1 && vl%2 == 0:
skipFinalPaddingByte = true
case uint32(bytesToRead) == vl-1 && vl%2 != 0:
return nil, 0, fmt.Errorf("vl=%d: %w", vl, ErrorExpectedEvenLength)
default:
// calculated bytesToRead and actual VL mismatch
if !parsedData.opts.allowMismatchPixelDataLength {
return nil, 0, fmt.Errorf("expected_vl=%d actual_vl=%d %w", bytesToRead, vl, ErrorMismatchPixelDataLength)
}
image, err := makeErrorPixelData(d, vl, fc, ErrorMismatchPixelDataLength)
if err != nil {
return nil, 0, err
}
return image, int(vl), nil
}
}

// Parse the pixels:
image := PixelDataInfo{
IsEncapsulated: false,
}
image.Frames = make([]frame.Frame, nFrames)
bo := d.ByteOrder()
bytesAllocated := bitsAllocated / 8
pixelBuf := make([]byte, bytesAllocated)
for frameIdx := 0; frameIdx < nFrames; frameIdx++ {
// Init current frame
Expand All @@ -282,36 +329,35 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr
BitsPerSample: bitsAllocated,
Rows: MustGetInts(rows.Value)[0],
Cols: MustGetInts(cols.Value)[0],
Data: make([][]int, int(pixelsPerFrame)),
Data: make([][]int, pixelsPerFrame),
},
}
buf := make([]int, int(pixelsPerFrame)*samplesPerPixel)
buf := make([]int, pixelsPerFrame*samplesPerPixel)
if bitsAllocated == 1 {
if err := fillBufferSingleBitAllocated(buf, d, bo); err != nil {
return nil, bytesRead, err
return nil, bytesToRead, err
}
for pixel := 0; pixel < int(pixelsPerFrame); pixel++ {
for pixel := 0; pixel < pixelsPerFrame; pixel++ {
for value := 0; value < samplesPerPixel; value++ {
currentFrame.NativeData.Data[pixel] = buf[pixel*samplesPerPixel : (pixel+1)*samplesPerPixel]
}
}
} else {
for pixel := 0; pixel < int(pixelsPerFrame); pixel++ {
for pixel := 0; pixel < pixelsPerFrame; pixel++ {
for value := 0; value < samplesPerPixel; value++ {
_, err := io.ReadFull(d, pixelBuf)
if err != nil {
return nil, bytesRead,
return nil, bytesToRead,
fmt.Errorf("could not read uint%d from input: %w", bitsAllocated, err)
}

if bitsAllocated == 8 {
buf[(pixel*samplesPerPixel)+value] = int(pixelBuf[0])
} else if bitsAllocated == 16 {
buf[(pixel*samplesPerPixel)+value] = int(bo.Uint16(pixelBuf))
} else if bitsAllocated == 32 {
buf[(pixel*samplesPerPixel)+value] = int(bo.Uint32(pixelBuf))
} else {
return nil, bytesRead, fmt.Errorf("unsupported BitsAllocated value of: %d : %w", bitsAllocated, ErrorUnsupportedBitsAllocated)
return nil, bytesToRead, fmt.Errorf("bitsAllocated=%d : %w", bitsAllocated, ErrorUnsupportedBitsAllocated)
}
}
currentFrame.NativeData.Data[pixel] = buf[pixel*samplesPerPixel : (pixel+1)*samplesPerPixel]
Expand All @@ -322,31 +368,26 @@ func readNativeFrames(d dicomio.Reader, parsedData *Dataset, fc chan<- *frame.Fr
fc <- &currentFrame // write the current frame to the frame channel
}
}

bytesRead = bytesAllocated * samplesPerPixel * pixelsPerFrame * nFrames
if vl > 0 && uint32(bytesRead) == vl-1 {
if vl%2 != 0 {
// this error should never happen if the file conforms to the DICOM spec
return nil, bytesRead, fmt.Errorf("odd number of bytes specified for PixelData violates DICOM spec: %d : %w", vl, ErrorExpectedEvenLength)
}
if skipFinalPaddingByte {
err := d.Skip(1)
if err != nil {
return nil, bytesRead, fmt.Errorf("could not read padding byte: %w", err)
return nil, bytesToRead, fmt.Errorf("could not read padding byte: %w", err)
}
bytesRead++
bytesToRead++
}
return &image, bytesRead, nil
return &image, bytesToRead, nil
}

// readSequence reads a sequence element (VR = SQ) that contains a subset of Items. Each item contains
// a set of Elements.
// See http://dicom.nema.org/medical/dicom/current/output/chtml/part05/sect_7.5.2.html#table_7.5-1
func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) {
func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32, d *Dataset) (Value, error) {
var sequences sequencesValue

seqElements := &Dataset{opts: d.opts}
if vl == tag.VLUndefinedLength {
for {
subElement, err := readElement(r, nil, nil)
subElement, err := readElement(r, seqElements, nil)
if err != nil {
// Stop reading due to error
log.Println("error reading subitem, ", err)
Expand All @@ -373,7 +414,7 @@ func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, err
return nil, err
}
for !r.IsLimitExhausted() {
subElement, err := readElement(r, nil, nil)
subElement, err := readElement(r, seqElements, nil)
if err != nil {
// TODO: option to ignore errors parsing subelements?
return nil, err
Expand All @@ -390,12 +431,12 @@ func readSequence(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, err

// readSequenceItem reads an item component of a sequence dicom element and returns an Element
// with a SequenceItem value.
func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) {
func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32, d *Dataset) (Value, error) {
var sequenceItem SequenceItemValue

// seqElements holds items read so far.
// TODO: deduplicate with sequenceItem above
var seqElements Dataset
seqElements := Dataset{opts: d.opts}

if vl == tag.VLUndefinedLength {
for {
Expand Down
Loading