-
Notifications
You must be signed in to change notification settings - Fork 30
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
commit 64bf4da Author: Ross Spencer <all.along.the.watchtower2001@gmail.com> Date: Mon Mar 20 15:12:56 2023 +0000 miscellaneous edit to prompt a merge check commit 706209d Merge: dcb15c2 eb6f061 Author: Richard Lehane <richard.lehane@gmail.com> Date: Mon Mar 20 12:42:20 2023 +0100 Merge branch 'develop' into dev/add-pronom-type commit dcb15c2 Author: Richard Lehane <richard@itforarchivists.com> Date: Mon Mar 20 08:12:50 2023 +0100 fix indexes used by droid writer commit c95e02d Author: Richard Lehane <richard@itforarchivists.com> Date: Sun Mar 19 22:58:45 2023 +0100 add "noclass" flag to allow omitting format class commit b958528 Author: Richard Lehane <richard@itforarchivists.com> Date: Sun Mar 19 13:22:00 2023 +0100 use Limit commit 957c2e7 Author: Ross Spencer <all.along.the.watchtower2001@gmail.com> Date: Sun Feb 5 21:07:14 2023 +0100 Add test for DROID CSV header output Ensures that the DROID header doesn't change in code unless it is explicitly made to do so. commit 9f94a77 Author: Ross Spencer <all.along.the.watchtower2001@gmail.com> Date: Wed Jan 4 16:50:29 2023 +0100 Create in-memory filesystem for PRONOM skeletons We can avoid writing to disk and make the tests here more portable by reading from an in-memory filesystem. The skeletons themselves are small and so can be easily stored in-line as strings and then turned into byte objects. Given the refactor to in-memory objects, we also take the opportunity to add a file that won't identify with the minimal PRONOM signature file and PRONOM reports. Type should be a nil-string as with many of the other fields. commit e27bb70 Author: Ross Spencer <all.along.the.watchtower2001@gmail.com> Date: Wed Dec 28 12:47:55 2022 +0100 Linting fixes PRONOM identifier related linting fixes for the different source files touched by the PRONOM types additions. commit 2bdc899 Author: Ross Spencer <all.along.the.watchtower2001@gmail.com> Date: Tue Dec 27 18:33:22 2022 +0100 Add tests for PRONOM types work Tests are added for the PRONOM types work along with new helper functions for making Siegfried tests more discrete and maintainable. commit 0b02110 Author: Ross Spencer <all.along.the.watchtower2001@gmail.com> Date: Tue Dec 27 17:45:28 2022 +0100 Add format type to Siegfried PRONOM output
- Loading branch information
1 parent
457c7b3
commit 98516b1
Showing
15 changed files
with
520 additions
and
68 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,186 @@ | ||
package main | ||
|
||
import ( | ||
"encoding/hex" | ||
"path/filepath" | ||
"reflect" | ||
"sort" | ||
"testing" | ||
"testing/fstest" | ||
|
||
"github.com/richardlehane/siegfried" | ||
"github.com/richardlehane/siegfried/pkg/config" | ||
"github.com/richardlehane/siegfried/pkg/pronom" | ||
) | ||
|
||
var DataPath string = filepath.Join("..", "..", "cmd", "roy", "data") | ||
|
||
// pronomIdentificationTests provides our structure for table driven tests. | ||
type pronomIdentificationTests struct { | ||
identiifer string | ||
puid string | ||
label string | ||
version string | ||
mime string | ||
types string | ||
details string | ||
error string | ||
} | ||
|
||
var skeletons = make(map[string]*fstest.MapFile) | ||
|
||
var minimalPronom = []string{"fmt/1", "fmt/3", "fmt/5", "fmt/11", "fmt/14"} | ||
|
||
// Populate the global skeletons map from string-based byte-sequences to | ||
// save having to store skeletons on disk and read from them. | ||
func makeSkeletons() { | ||
var files = make(map[string]string) | ||
files["fmt-11-signature-id-58.png"] = "89504e470d0a1a0a0000000d494844520000000049454e44ae426082" | ||
files["fmt-14-signature-id-123.pdf"] = "255044462d312e302525454f46" | ||
files["fmt-1-signature-id-1032.wav"] = ("" + | ||
"524946460000000057415645000000000000000000000000000000000000" + | ||
"000062657874000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"000000000000000000000000000000000000000000000000000000000000" + | ||
"00000000000000000000000000000000000000000000000000000000" + | ||
"") | ||
files["fmt-5-signature-id-51.avi"] = ("" + | ||
"524946460000000041564920000000000000000000000000000000000000" + | ||
"00004c495354000000006864726c61766968000000000000000000000000" + | ||
"00000000000000004c495354000000006d6f7669" + | ||
"") | ||
files["fmt-3-signature-id-18.gif"] = "4749463837613b" | ||
files["badf00d.unknown"] = "badf00d" | ||
for key, val := range files { | ||
data, _ := hex.DecodeString(val) | ||
skeletons[key] = &fstest.MapFile{Data: []byte(data)} | ||
} | ||
} | ||
|
||
var pronomIDs = []pronomIdentificationTests{ | ||
{ | ||
"pronom", | ||
"UNKNOWN", | ||
"", | ||
"", | ||
"", | ||
"", | ||
"", | ||
"no match", | ||
}, | ||
{ | ||
"pronom", | ||
"fmt/1", | ||
"Broadcast WAVE", | ||
"0 Generic", | ||
"audio/x-wav", | ||
"Audio", | ||
"extension match wav; byte match at [[0 12] [32 356]]", | ||
"", | ||
}, | ||
{ | ||
"pronom", | ||
"fmt/11", | ||
"Portable Network Graphics", | ||
"1.0", | ||
"image/png", | ||
"Image (Raster)", | ||
"extension match png; byte match at [[0 16] [16 12]]", | ||
"", | ||
}, | ||
{ | ||
"pronom", | ||
"fmt/14", | ||
"Acrobat PDF 1.0 - Portable Document Format", | ||
"1.0", | ||
"application/pdf", | ||
"Page Description", | ||
"extension match pdf; byte match at [[0 8] [8 5]]", | ||
"", | ||
}, | ||
{ | ||
"pronom", | ||
"fmt/3", | ||
"Graphics Interchange Format", | ||
"87a", | ||
"image/gif", | ||
"Image (Raster)", | ||
"extension match gif; byte match at [[0 6] [6 1]]", | ||
"", | ||
}, | ||
{ | ||
"pronom", | ||
"fmt/5", | ||
"Audio/Video Interleaved Format", | ||
"", | ||
"video/x-msvideo", | ||
"Audio, Video", | ||
"extension match avi; byte match at [[0 12] [32 16] [68 12]]", | ||
"", | ||
}, | ||
} | ||
|
||
// TestPronom looks to see if PRONOM identification results for a | ||
// minimized PRONOM dataset are correct and contain the information we | ||
// anticipate. | ||
func TestPronom(t *testing.T) { | ||
sf := siegfried.New() | ||
config.SetHome(DataPath) | ||
identifier, err := pronom.New(config.SetLimit(minimalPronom)) | ||
if err != nil { | ||
t.Errorf("Error creating new PRONOM identifier: %s", err) | ||
} | ||
sf.Add(identifier) | ||
makeSkeletons() | ||
skeletonFS := fstest.MapFS(skeletons) | ||
testDirListing, err := skeletonFS.ReadDir(".") | ||
if err != nil { | ||
t.Fatalf("Error reading test files directory: %s", err) | ||
} | ||
const resultLen int = 8 | ||
results := make([]pronomIdentificationTests, 0) | ||
for _, val := range testDirListing { | ||
testFilePath := filepath.Join(".", val.Name()) | ||
reader, _ := skeletonFS.Open(val.Name()) | ||
res, _ := sf.Identify(reader, testFilePath, "") | ||
result := res[0].Values() | ||
if len(result) != resultLen { | ||
t.Errorf("Result len: %d not %d", len(result), resultLen) | ||
} | ||
idResult := pronomIdentificationTests{ | ||
result[0], // identifier | ||
result[1], // PUID | ||
result[2], // label | ||
result[3], // version | ||
result[4], // mime | ||
result[5], // types | ||
result[6], // details | ||
result[7], // error | ||
} | ||
results = append(results, idResult) | ||
} | ||
// Sort expected results and received results to make them | ||
// comparable. | ||
sort.Slice(pronomIDs, func(i, j int) bool { | ||
return pronomIDs[i].puid < pronomIDs[j].puid | ||
}) | ||
sort.Slice(results, func(i, j int) bool { | ||
return results[i].puid < results[j].puid | ||
}) | ||
// Compare results on a result by result basis. | ||
for idx, res := range results { | ||
//t.Error(res) | ||
if !reflect.DeepEqual(res, pronomIDs[idx]) { | ||
t.Errorf("Results not equal for %s; expected %v; got %v", res.puid, pronomIDs[idx], res) | ||
} | ||
} | ||
config.Clear()() | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.