-
Notifications
You must be signed in to change notification settings - Fork 1
/
schema.js
200 lines (177 loc) · 10 KB
/
schema.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
const anythingPattern = /[\s\S]*/su;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// tag. A three character string used to identify or label an associated variable field.
// The tag may consist of ASCII numeric characters (decimal integers 0-9) and/or ASCII alphabetic characters (uppercase or lowercase, but not both).
// https://www.loc.gov/marc/specifications/specrecstruc.html
// control field. A variable field containing information useful or required for the processing of the record.
// Control fields are assigned tags beginning with two zeroes. Control fields with fixed length data elements are restricted to ASCII graphics.
// NOTE: Aleph uses also some other controlfields with non-numeric tags (FMT, LDR if its handled as a controlfield)
// ASCII - all printable/graphic: 32-126 (\x20 - \x7E)
const controlFieldTagPattern = /^(?:[0A-Z][0A-Z][0-9A-Z]|[0a-z][0a-z][0-9a-z])$/u;
const controlFieldValuePattern = /^[\x20-\x7E]*$/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// data field. A variable field containing bibliographic or other data. Data fields are assigned tags beginning with characters other than two zeroes.
// Data fields contain data in any MARC 21 character set unless a field-specific restriction applies.
const dataFieldTagPattern = /^(?:[1-9A-Z][0-9A-Z][0-9A-Z]|[0-9A-Z][1-9A-Z][0-9A-Z]|[1-9a-z][0-9a-z][0-9a-z]|[0-9a-z][1-9a-z][0-9a-z])$/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// data element identifier: A one-character code used to identify individual data elements within a variable field.
// The data element may be any ASCII lowercase alphabetic, numeric, or graphic symbol except blank.
//
// http://oeis.org/wiki/ASCII#ASCII_graphic.2Fnongraphic_characters
// "Among the ninety-five ASCII printable characters, there are the ninety-four [visible] ASCII graphic characters
// (of which the space is not) and the [invisible] ASCII nongraphic character, namely the space character."
// ASCII - all printable/graphic: 32-126 (\x20 - \x7E)
// ASCII - blank/space: 32 (\x20)
// ASCII - uppercase alphabetic: 65-90 (\x41 - \x5A)
// eslint-disable-next-line no-control-regex
const subfieldCodePattern = /^[\x21-\x40\x5B-\x7E]$/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html:
// ... An indicator may be any ASCII lowercase alphabetic, numeric, or blank .
const indicatorPattern = /^[0-9a-z ]$/u;
// Option to not allow ASCII control characters in subfield values
// eslint-disable-next-line no-control-regex
const dataFieldValuePatternNoControlCharacters = /^[^\x00-\x1F\x7F]*$/u;
// Match anything - no restrictions
const dataFieldValuePattern = /.*/u;
// https://www.loc.gov/marc/specifications/specrecstruc.html
// ... MARC 21 sets the length of the length of field portion of the entry at four characters, thus a field may contain a maximum of 9999 octets.
// Note: We're limiting controlField value length and sibfieldValue length with this parameter, records can be too long before single field hitting
// this restriction
const maximumFieldLength = 9999;
// DEVELOP: Can we somehow check actual field length in addition to single field/subfield value
// 9999 includes indicators + subfield separators + subfield coded in datafields, these could be subtracted
// DEVELOP: Can we check the record length (maximum 99999 octets) ?
// https://www.loc.gov/marc/specifications/specrecstruc.html
// Record length (character positions 00-04), contains a five-character ASCII numeric string equal to the length of the entire record,
// including itself and the record terminator. The five-character numeric string is right justified and unused positions contain zeroes (zero fill).
// The maximum length of a record is 99999 octets.
// DEVELOP: We could add a checker for MARC21 hardcoded codes in leader
// https://www.loc.gov/marc/specifications/specrecstruc.html#leader
// ...
// * Indicator count (character position 10), contains one ASCII numeric character specifying the number of indicators
// occurring in each variable data field. In MARC 21 records, the indicator count is always 2.
// * Subfield code length (character position 11), contains one ASCII numeric character specifying the sum of the lengths
// of the delimiter and the data element identifier used in the record. In MARC 21 records,
// the subfield code length is always 2. The ANSI Z39.2 and ISO 2709 name for this data element is identifier length .
// * Entry map (character positions 20-23), contains four single digit ASCII numeric characters that specify the structure of the entries in the directory.
// ** Length of length-of-field (character position 20): specifies the length of that part of each directory entry; in MARC 21 records, it is always set to 4.
// ** Length of starting-character-position (character position 21): specifies the length of that part of each directory entry; in MARC 21 records, it is always set to 5.
// ** Length of implementation-defined (character position 22): specifies that part of each directory entry; in MARC 21 records, a directory entry does not contain an implementation-defined portion, therefore this position is always set to 0.
// ** Undefined (character position 23): this character position is undefined; it is always set to 0.
// Default setting for validationOptions:
// strict: false // All validationOptions below are set to true
//
// fields: true, // Do not allow record without fields
// subfields: true, // Do not allow empty subfields
// subfieldValues: true, // Do not allow subfields without value
// controlFieldValues: true // Do not allow controlFields without value
// leader: false, // Do not allow record without leader, with empty leader or with leader with length != 24
// characters: false // Do not allow erronous characters in tags, indicators and subfield codes
// noControlCharacters: false, // Do not allow ASCII control characters in field/subfield values
// noAdditionalProperties: false // Do not allow additional properties in fields
export default function ({strict = false, fields = true, subfields = true, subfieldValues = true, controlFieldValues = true, leader = false, characters = false, noControlCharacters = false, noAdditionalProperties = false}) {
if (strict) {
return schema({fields: true, subfields: true, subfieldValues: true, controlFieldValues: true, leader: true, characters: true, noControlCharacters: true, noAdditionalProperties: true});
}
return schema({fields, subfields, subfieldValues, controlFieldValues, leader, characters, noControlCharacters, noAdditionalProperties});
}
function schema({fields = true, subfields = true, subfieldValues = true, controlFieldValues = true, leader = false, characters = false, noControlCharacters = false, noAdditionalProperties = false}) {
return {
type: 'object',
properties: {
leader: {
type: 'string',
minLength: leader ? 24 : 0,
maxLength: leader ? 24 : maximumFieldLength,
pattern: characters ? controlFieldValuePattern : anythingPattern,
maxOccurence: 1
},
fields: {
type: 'array',
minItems: fields ? 1 : 0,
items: {
anyOf: [
{
type: 'object',
properties: {
tag: {
type: 'string',
minLength: 3,
maxLength: 3,
pattern: characters ? controlFieldTagPattern : anythingPattern
},
value: {
type: 'string',
minLength: controlFieldValues ? 1 : 0,
maxLength: maximumFieldLength,
pattern: characters ? controlFieldValuePattern : anythingPattern
},
ind1: false,
ind2: false,
subfields: false
},
required: controlFieldValues ? ['tag', 'value'] : ['tag'],
additionalProperties: !noAdditionalProperties
},
{
type: 'object',
properties: {
tag: {
type: 'string',
minLength: 3,
maxLength: 3,
pattern: characters ? dataFieldTagPattern : anythingPattern
},
ind1: {
type: 'string',
minLength: 1,
maxLength: 1,
pattern: characters ? indicatorPattern : anythingPattern
},
ind2: {
type: 'string',
minLength: 1,
maxLength: 1,
pattern: characters ? indicatorPattern : anythingPattern
},
subfields: {
type: 'array',
minItems: subfields ? 1 : 0,
items: {
type: 'object',
properties: {
code: {
type: 'string',
minLength: 1,
maxLength: 1,
pattern: characters ? subfieldCodePattern : anythingPattern
},
value: {
type: 'string',
maxLength: maximumFieldLength,
minLength: subfieldValues ? 1 : 0,
pattern: noControlCharacters ? dataFieldValuePatternNoControlCharacters : dataFieldValuePattern
}
},
required: subfieldValues ? ['code', 'value'] : ['code'],
additionalProperties: !noAdditionalProperties
}
},
value: false,
additionalProperties: !noAdditionalProperties
},
required: [
'tag',
'ind1',
'ind2',
'subfields'
],
additionalProperties: !noAdditionalProperties
}
]
}
}
},
required: leader ? ['leader', 'fields'] : ['fields']
};
}