From c6727a0a4e3cb85c95467e57dd0b25b040104e35 Mon Sep 17 00:00:00 2001 From: jabillings Date: Sun, 9 Jun 2024 15:02:23 -0700 Subject: [PATCH] Handle unknowns completely (#235) * Interpret "" DICOM Character Set as iso-8859-1 (#219) * Treat Unknown Tags with defined VL as OW (#232) This change ensures that unknown tags with a defined VL are read as bytes (OW). This should fix #231. Previously they would have been read as strings by default. * Initial commit to default reading VR=UN as SQ when reading with an implicit transfer syntax and undefined length * add todo * handle all forms of unknown data elements * Update go version in build * update other go build versions --------- Co-authored-by: Yoshiyuki Harada <70787551+marineotter@users.noreply.github.com> Co-authored-by: Suyash Kumar --- .github/workflows/bench_pr.yml | 4 +-- .github/workflows/bench_push.yml | 4 +-- .github/workflows/go.yml | 4 +-- dataset_test.go | 28 +++++++++------ element_test.go | 2 +- pkg/charset/charset.go | 1 + read.go | 16 +++++++-- read_test.go | 9 ++++- write.go | 4 ++- write_test.go | 59 ++++++++++++++++++++++++++++---- 10 files changed, 104 insertions(+), 27 deletions(-) diff --git a/.github/workflows/bench_pr.yml b/.github/workflows/bench_pr.yml index bbfc8a7e..8c1a5319 100644 --- a/.github/workflows/bench_pr.yml +++ b/.github/workflows/bench_pr.yml @@ -6,10 +6,10 @@ jobs: runs-on: ubuntu-latest steps: - - name: Set up Go 1.15 + - name: Set up Go 1.18 uses: actions/setup-go@v2 with: - go-version: 1.15 + go-version: 1.18 id: go - name: Check out code diff --git a/.github/workflows/bench_push.yml b/.github/workflows/bench_push.yml index 22c037af..8e9de188 100644 --- a/.github/workflows/bench_push.yml +++ b/.github/workflows/bench_push.yml @@ -5,10 +5,10 @@ jobs: name: Benchmark (push) runs-on: ubuntu-latest steps: - - name: Set up Go 1.15 + - name: Set up Go 1.18 uses: actions/setup-go@v2 with: - go-version: 1.15 + go-version: 1.18 id: go - name: Check out code diff --git a/.github/workflows/go.yml b/.github/workflows/go.yml index ff8f59ad..698668e2 100644 --- a/.github/workflows/go.yml +++ b/.github/workflows/go.yml @@ -7,10 +7,10 @@ jobs: runs-on: ubuntu-latest steps: - - name: Set up Go 1.15 + - name: Set up Go 1.18 uses: actions/setup-go@v2 with: - go-version: 1.15 + go-version: 1.18 id: go - name: Check out code diff --git a/dataset_test.go b/dataset_test.go index 0607e21e..361fbab2 100644 --- a/dataset_test.go +++ b/dataset_test.go @@ -8,13 +8,13 @@ import ( "github.com/suyashkumar/dicom/pkg/tag" ) -func makeSequenceElement(tg tag.Tag, items [][]*Element) *Element { +func makeSequenceElement(tg tag.Tag, items [][]*Element, unknown bool) *Element { sequenceItems := make([]*SequenceItemValue, 0, len(items)) for _, item := range items { sequenceItems = append(sequenceItems, &SequenceItemValue{elements: item}) } - return &Element{ + e := &Element{ Tag: tg, ValueRepresentation: tag.VRSequence, RawValueRepresentation: "SQ", @@ -22,6 +22,14 @@ func makeSequenceElement(tg tag.Tag, items [][]*Element) *Element { value: sequenceItems, }, } + + if unknown { + e.ValueRepresentation = tag.VRUnknown + e.RawValueRepresentation = "UN" + e.ValueLength = tag.VLUndefinedLength + } + + return e } func TestDataset_FindElementByTag(t *testing.T) { @@ -83,9 +91,9 @@ func TestDataset_FlatStatefulIterator(t *testing.T) { { mustNewElement(tag.PatientName, []string{"Bob", "Smith"}), }, - }), + }, false), }, - }), + }, false), }}, expectedFlatElements: []*Element{ // First, expect the entire SQ element @@ -98,9 +106,9 @@ func TestDataset_FlatStatefulIterator(t *testing.T) { { mustNewElement(tag.PatientName, []string{"Bob", "Smith"}), }, - }), + }, false), }, - }), + }, false), // Then expect the inner elements mustNewElement(tag.PatientName, []string{"Bob", "Jones"}), // Inner SQ element @@ -108,7 +116,7 @@ func TestDataset_FlatStatefulIterator(t *testing.T) { { mustNewElement(tag.PatientName, []string{"Bob", "Smith"}), }, - }), + }, false), // Inner element of the inner SQ mustNewElement(tag.PatientName, []string{"Bob", "Smith"}), }, @@ -139,7 +147,7 @@ func ExampleDataset_FlatIterator() { Elements: []*Element{ mustNewElement(tag.Rows, []int{100}), mustNewElement(tag.Columns, []int{100}), - makeSequenceElement(tag.AddOtherSequence, nestedData), + makeSequenceElement(tag.AddOtherSequence, nestedData, false), }, } @@ -172,7 +180,7 @@ func ExampleDataset_FlatIteratorWithExhaustAllElements() { Elements: []*Element{ mustNewElement(tag.Rows, []int{100}), mustNewElement(tag.Columns, []int{100}), - makeSequenceElement(tag.AddOtherSequence, nestedData), + makeSequenceElement(tag.AddOtherSequence, nestedData, false), }, } @@ -220,7 +228,7 @@ func ExampleDataset_FlatStatefulIterator() { value: []int{200}, }, }, - makeSequenceElement(tag.AddOtherSequence, nestedData), + makeSequenceElement(tag.AddOtherSequence, nestedData, false), }, } diff --git a/element_test.go b/element_test.go index a5d88a41..13f45604 100644 --- a/element_test.go +++ b/element_test.go @@ -21,7 +21,7 @@ func TestElement_MarshalJSON_NestedElements(t *testing.T) { }, }, } - seqElement := makeSequenceElement(tag.AddOtherSequence, nestedData) + seqElement := makeSequenceElement(tag.AddOtherSequence, nestedData, false) want := `{"tag":{"Group":70,"Element":258},"VR":9,"rawVR":"SQ","valueLength":0,"value":[[{"tag":{"Group":16,"Element":16},"VR":2,"rawVR":"","valueLength":0,"value":["Bob"]}]]}` diff --git a/pkg/charset/charset.go b/pkg/charset/charset.go index a0dca827..9b4dcaa6 100644 --- a/pkg/charset/charset.go +++ b/pkg/charset/charset.go @@ -40,6 +40,7 @@ const ( // htmlEncodingNames represents a mapping of DICOM charset name to golang encoding/htmlindex name. "" means // 7bit ascii. var htmlEncodingNames = map[string]string{ + "": "iso-8859-1", "ISO_IR 6": "iso-8859-1", "ISO 2022 IR 6": "iso-8859-1", "ISO_IR 13": "shift_jis", diff --git a/read.go b/read.go index 7b6282bc..a158db4d 100644 --- a/read.go +++ b/read.go @@ -127,6 +127,19 @@ func readValue(r dicomio.Reader, t tag.Tag, vr string, vl uint32, isImplicit boo return readPixelData(r, t, vr, vl, d, fc) case tag.VRFloat32List, tag.VRFloat64List: return readFloat(r, t, vr, vl) + // More details on how we treat Unknown VRs can be found at + // https://github.com/suyashkumar/dicom/issues/220 + // and + // https://github.com/suyashkumar/dicom/issues/231. + // It remains to be seen if this fits most DICOMs we see in the wild. + // TODO(suyashkumar): consider replacing UN VRs with SQ earlier on if they + // meet this criteria, so users of the Dataset can interact with it + // correctly. + case tag.VRUnknown: + if isImplicit && vl == tag.VLUndefinedLength { + return readSequence(r, t, vr, vl) + } + return readBytes(r, t, vr, vl) default: return readString(r, t, vr, vl) } @@ -433,7 +446,7 @@ func readSequenceItem(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, func readBytes(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error) { // TODO: add special handling of PixelData - if vr == vrraw.OtherByte { + if vr == vrraw.OtherByte || vr == vrraw.Unknown { data := make([]byte, vl) _, err := io.ReadFull(r, data) return &bytesValue{value: data}, err @@ -477,7 +490,6 @@ func readString(r dicomio.Reader, t tag.Tag, vr string, vl uint32) (Value, error // Split multiple strings strs := strings.Split(str, "\\") - return &stringsValue{value: strs}, err } diff --git a/read_test.go b/read_test.go index 715ccf17..0d9820d4 100644 --- a/read_test.go +++ b/read_test.go @@ -168,12 +168,19 @@ func TestReadOWBytes(t *testing.T) { expectedErr error }{ { - name: "even-number bytes", + name: "OW VR with even-number bytes", bytes: []byte{0x1, 0x2, 0x3, 0x4}, VR: vrraw.OtherWord, want: &bytesValue{value: []byte{0x1, 0x2, 0x3, 0x4}}, expectedErr: nil, }, + { + name: "UN VR even-number bytes", + bytes: []byte{0x1, 0x2, 0x3, 0x4}, + VR: vrraw.Unknown, + want: &bytesValue{value: []byte{0x1, 0x2, 0x3, 0x4}}, + expectedErr: nil, + }, { name: "error on odd-number bytes", bytes: []byte{0x1, 0x2, 0x3}, diff --git a/write.go b/write.go index 4b148ff5..758d2662 100644 --- a/write.go +++ b/write.go @@ -321,6 +321,8 @@ func verifyValueType(t tag.Tag, value Value, vr string) error { } case vrraw.FloatingPointSingle, vrraw.FloatingPointDouble: ok = valueType == Floats + case vrraw.Unknown: + ok = (valueType == Bytes || valueType == Sequences) default: ok = valueType == Strings } @@ -488,7 +490,7 @@ func writeStrings(w dicomio.Writer, values []string, vr string) error { func writeBytes(w dicomio.Writer, values []byte, vr string) error { var err error switch vr { - case vrraw.OtherWord: + case vrraw.OtherWord, vrraw.Unknown: err = writeOtherWordString(w, values) case vrraw.OtherByte: err = writeOtherByteString(w, values) diff --git a/write_test.go b/write_test.go index ae87fade..6aee7b97 100644 --- a/write_test.go +++ b/write_test.go @@ -47,6 +47,17 @@ func TestWrite(t *testing.T) { mustNewElement(tag.FloatingPointValue, []float64{128.10}), mustNewElement(tag.DimensionIndexPointer, []int{32, 36950}), mustNewElement(tag.RedPaletteColorLookupTableData, []byte{0x1, 0x2, 0x3, 0x4}), + mustNewElement(tag.SelectorSLValue, []int{-20}), + // Some tag with an unknown VR. + { + Tag: tag.Tag{0x0019, 0x1027}, + ValueRepresentation: tag.VRUnknown, + RawValueRepresentation: "UN", + ValueLength: 4, + Value: &bytesValue{ + value: []byte{0x1, 0x2, 0x3, 0x4}, + }, + }, }}, expectedError: nil, }, @@ -108,7 +119,7 @@ func TestWrite(t *testing.T) { }, }, }, - }), + }, false), }}, expectedError: nil, }, @@ -158,7 +169,7 @@ func TestWrite(t *testing.T) { }, }, }, - }), + }, false), }}, expectedError: nil, opts: []WriteOption{SkipVRVerification()}, @@ -193,9 +204,45 @@ func TestWrite(t *testing.T) { }, }, }, - }), + }, false), }, - }), + }, false), + }}, + expectedError: nil, + }, + { + name: "nested unknown sequences", + dataset: Dataset{Elements: []*Element{ + mustNewElement(tag.MediaStorageSOPClassUID, []string{"1.2.840.10008.5.1.4.1.1.1.13"}), + mustNewElement(tag.MediaStorageSOPInstanceUID, []string{"1.2.3.4.5.6.7"}), + mustNewElement(tag.TransferSyntaxUID, []string{uid.ImplicitVRLittleEndian}), + mustNewElement(tag.PatientName, []string{"Bob", "Jones"}), + makeSequenceElement(tag.Tag{0x0019, 0x1027}, [][]*Element{ + // Item 1. + { + { + Tag: tag.Tag{0x0019, 0x1028}, + ValueRepresentation: tag.VRUnknown, + RawValueRepresentation: "UN", + Value: &bytesValue{ + value: []byte{0x1, 0x2, 0x3, 0x4}, + }, + }, + // Nested Sequence. + makeSequenceElement(tag.Tag{0x0019, 0x1029}, [][]*Element{ + { + { + Tag: tag.PatientName, + ValueRepresentation: tag.VRStringList, + RawValueRepresentation: "PN", + Value: &stringsValue{ + value: []string{"Bob", "Jones"}, + }, + }, + }, + }, true), + }, + }, true), }}, expectedError: nil, }, @@ -229,9 +276,9 @@ func TestWrite(t *testing.T) { }, }, }, - }), + }, false), }, - }), + }, false), }}, expectedError: nil, opts: []WriteOption{SkipVRVerification()},