Skip to content

Commit

Permalink
Enable reading of format type from a DROID sig
Browse files Browse the repository at this point in the history
Changes to DROID signature file parsing to enable reading of file
format types. Changes assume use of an element for this currently,
but can be easily changed with the tests still applicable here.
  • Loading branch information
ross-spencer committed Nov 5, 2023
1 parent ccff91e commit f2f643d
Show file tree
Hide file tree
Showing 6 changed files with 80 additions and 25 deletions.
47 changes: 36 additions & 11 deletions cmd/sf/pronom_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -128,17 +128,9 @@ var pronomIDs = []pronomIdentificationTests{
},
}

// TestPronom looks to see if PRONOM identification results for a
// minimized PRONOM dataset are correct and contain the information we
// anticipate.
func TestPronom(t *testing.T) {
sf := siegfried.New()
config.SetHome(DataPath)
identifier, err := pronom.New(config.SetLimit(minimalPronom))
if err != nil {
t.Errorf("Error creating new PRONOM identifier: %s", err)
}
sf.Add(identifier)
// runIdentificationWithSF provides a number of tests that can be run
// against a Siegfried.
func runIdentificationWithSF(sf *siegfried.Siegfried, t *testing.T) {
makeSkeletons()
skeletonFS := fstest.MapFS(skeletons)
testDirListing, err := skeletonFS.ReadDir(".")
Expand Down Expand Up @@ -182,5 +174,38 @@ func TestPronom(t *testing.T) {
t.Errorf("Results not equal for %s; expected %v; got %v", res.puid, pronomIDs[idx], res)
}
}
}

// TestPronom looks to see if PRONOM identification results for a
// minimized PRONOM dataset are correct and contain the information we
// anticipate.
func TestPronom(t *testing.T) {
sf := siegfried.New()
config.SetHome(DataPath)
identifier, err := pronom.New(config.SetLimit(minimalPronom))
if err != nil {
t.Errorf("Error creating new PRONOM identifier: %s", err)
}
sf.Add(identifier)
runIdentificationWithSF(sf, t)
config.Clear()()
}

// TestPronomNoReports performs the same tests as TestPronom, but
// against a Siegfried created purely from a signature file.
func TestPronomNoReports(t *testing.T) {
sf := siegfried.New()
config.SetHome(DataPath)
config.SetNoContainer()()
config.SetNoReports()()
if config.Reports() != "" {
t.Errorf("pronon.reports should be unset, not: %s", config.Reports())
}
identifier, err := pronom.New(config.SetLimit(minimalPronom))
if err != nil {
t.Errorf("Error creating new PRONOM identifier: %s", err)
}
sf.Add(identifier)
runIdentificationWithSF(sf, t)
config.Clear()()
}
1 change: 1 addition & 0 deletions pkg/config/identifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,6 +316,7 @@ func IsArchive(id string) Archive {
// Clear clears loc and mimeinfo details to avoid pollution when creating multiple identifiers in same session
func Clear() func() private {
return func() private {
identifier.noContainer = false
identifier.name = ""
identifier.extend = nil
identifier.limit = nil
Expand Down
2 changes: 1 addition & 1 deletion pkg/pronom/identifier.go
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ func New(opts ...config.Option) (core.Identifier, error) {
pronom = identifier.ApplyConfig(pronom)
id := &Identifier{
Base: identifier.New(pronom, config.ZipPuid()),
hasClass: config.Reports() != "" && !config.NoClass(),
hasClass: !config.NoClass(),
infos: infos(pronom.Infos()),
}
if id.Multi() == config.DROID {
Expand Down
1 change: 1 addition & 0 deletions pkg/pronom/internal/mappings/droid.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type FileFormat struct {
Name string `xml:",attr"`
Version string `xml:",attr"`
MIMEType string `xml:",attr"`
Types string `xml:"FormatTypes"`
Extensions []string `xml:"Extension"`
Signatures []int `xml:"InternalSignatureID"`
Priorities []int `xml:"HasPriorityOverFileFormatID"`
Expand Down
1 change: 1 addition & 0 deletions pkg/pronom/parseable.go
Original file line number Diff line number Diff line change
Expand Up @@ -233,6 +233,7 @@ func (d *droid) Infos() map[string]identifier.FormatInfo {
name: strings.TrimSpace(v.Name),
version: strings.TrimSpace(v.Version),
mimeType: strings.TrimSpace(v.MIMEType),
class: strings.TrimSpace(v.Types),
}
}
return infos
Expand Down
53 changes: 40 additions & 13 deletions pkg/pronom/pronom_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"sort"
"testing"

"github.com/richardlehane/siegfried/internal/identifier"
"github.com/richardlehane/siegfried/pkg/config"
)

Expand All @@ -21,15 +22,9 @@ func TestNew(t *testing.T) {
}
}

// TestFormatInfos inspects the values loaded into a PRONOM identifier
// from a minimal PRONOM dataset, i.e. fewer than loading all of PRONOM.
func TestFormatInfos(t *testing.T) {
config.SetHome(dataPath)
config.SetLimit(minimalPronom)()
i, err := NewPronom()
if err != nil {
t.Error(err)
}
// verifyIdentifier provides a number of tests that can be run against
// a PRONOM identifier.
func verifyIdentifier(i identifier.Parseable, t *testing.T) {
const minReports int = 5
if len(i.Infos()) != minReports {
t.Error("Unexpected number of reports for PRONOM minimal tests")
Expand Down Expand Up @@ -84,7 +79,7 @@ func TestFormatInfos(t *testing.T) {
sort.Strings(puids)
sort.Strings(expectedPuids)
if !reflect.DeepEqual(puids, expectedPuids) {
t.Error("PUIDs from minimal PRONOM set do not match expected values")
t.Errorf("PUIDs from minimal PRONOM set do not match expected values; expected %v; got %v", puids, expectedPuids)
}
sort.Strings(names)
sort.Strings(expectedNames)
Expand All @@ -94,17 +89,49 @@ func TestFormatInfos(t *testing.T) {
sort.Strings(versions)
sort.Strings(expectedVersions)
if !reflect.DeepEqual(versions, expectedVersions) {
t.Error("Format versions from minimal PRONOM set do not match expected values")
t.Errorf("Format versions from minimal PRONOM set do not match expected values; expected %v; got %v", versions, expectedVersions)
}
sort.Strings(mimes)
sort.Strings(expectedMimes)
if !reflect.DeepEqual(mimes, expectedMimes) {
t.Error("MIMETypes from minimal PRONOM set do not match expected values")
t.Errorf("MIMETypes from minimal PRONOM set do not match expected values; expected %v; got %v", mimes, expectedMimes)
}
sort.Strings(types)
sort.Strings(expectedTypes)
if !reflect.DeepEqual(types, expectedTypes) {
t.Error("Format types from minimal PRONOM set do not match expected values")
t.Errorf("Format types from minimal PRONOM set do not match expected values; expected %v; got %v", types, expectedTypes)
}
}

// TestFormatInfosDefault inspects the values loaded into a PRONOM
// identifier from a minimal PRONOM dataset, i.e. fewer than loading
// all of PRONOM.
func TestFormatInfosDefault(t *testing.T) {
config.SetHome(dataPath)
config.SetLimit(minimalPronom)()
i, err := NewPronom()
if err != nil {
t.Error(err)
}
verifyIdentifier(i, t)
config.Clear()()
}

// TestFormatInfosNoReports performs the same tests as TestFormatInfosDefault
// but does so without loading PRONOM reports, preferring to create an
// identifier using a signature file only.
func TestFormatInfosNoReports(t *testing.T) {
config.SetHome(dataPath)
config.SetLimit(minimalPronom)()
config.SetNoContainer()()
config.SetNoReports()()
if config.Reports() != "" {
t.Errorf("pronon.reports should be unset, not: %s", config.Reports())
}
i, err := NewPronom()
if err != nil {
t.Error(err)
}
verifyIdentifier(i, t)
config.Clear()()
}

0 comments on commit f2f643d

Please sign in to comment.