-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
59 changed files
with
1,523 additions
and
40 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,117 @@ | ||
/** | ||
* | ||
* @licstart The following is the entire license notice for the JavaScript code in this file. | ||
* | ||
* Merge MARC records | ||
* | ||
* Copyright (C) 2015-2019 University Of Helsinki (The National Library Of Finland) | ||
* | ||
* This file is part of marc-record-merge-js | ||
* marc-record-merge-js program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License as | ||
* published by the Free Software Foundation, either version 3 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* marc-record-merge-js is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
* | ||
* @licend The above is the entire license notice | ||
* for the JavaScript code in this file. | ||
* | ||
*/ | ||
import {normalizeSync} from 'normalize-diacritics'; | ||
|
||
export function strictEquality(subfieldA, subfieldB) { | ||
return subfieldA.code === subfieldB.code && | ||
subfieldA.value === subfieldB.value; | ||
} | ||
|
||
export function subsetEquality(subfieldA, subfieldB) { | ||
return subfieldA.code === subfieldB.code && | ||
(subfieldA.value.indexOf(subfieldB.value) !== -1 || subfieldB.value.indexOf(subfieldA.value) !== -1); | ||
} | ||
|
||
export default ({pattern, equalityFunction = strictEquality}) => (base, source) => { | ||
const baseFields = base.get(pattern); | ||
const sourceFields = source.get(pattern); | ||
|
||
checkFieldType(baseFields); | ||
checkFieldType(sourceFields); | ||
|
||
if (baseFields.length > 1 || sourceFields.length > 1) { | ||
return base; | ||
} | ||
const [baseField] = baseFields; | ||
const [sourceField] = sourceFields; | ||
|
||
if (baseField.tag === sourceField.tag === false) { | ||
return base; | ||
} | ||
const baseSubs = baseField.subfields; | ||
const sourceSubs = sourceField.subfields; | ||
|
||
const baseSubsNormalized = baseSubs | ||
.map(({code, value}) => ({code, value: normalizeSubfieldValue(value)})); | ||
|
||
const sourceSubsNormalized = sourceSubs | ||
.map(({code, value}) => ({code, value: normalizeSubfieldValue(value)})); | ||
|
||
const equalSubfieldsBase = baseSubsNormalized | ||
.filter(baseSubfield => sourceSubsNormalized | ||
.some(sourceSubfield => equalityFunction(baseSubfield, sourceSubfield))); | ||
|
||
const equalSubfieldsSource = sourceSubsNormalized | ||
.filter(sourceSubfield => baseSubsNormalized | ||
.some(baseSubfield => equalityFunction(sourceSubfield, baseSubfield))); | ||
|
||
if (baseSubs.length === sourceSubs.length && equalSubfieldsBase.length < baseSubs.length) { | ||
return base; | ||
} | ||
|
||
if (baseSubs.length === sourceSubs.length && equalSubfieldsBase.length === equalSubfieldsSource.length) { | ||
const totalSubfieldLengthBase = baseSubsNormalized | ||
.map(({value}) => value.length) | ||
.reduce((acc, value) => acc + value); | ||
const totalSubfieldLengthSource = sourceSubsNormalized | ||
.map(({value}) => value.length) | ||
.reduce((acc, value) => acc + value); | ||
|
||
if (totalSubfieldLengthSource > totalSubfieldLengthBase) { | ||
return replaceBasefieldWithSourcefield(base); | ||
} | ||
} | ||
|
||
if (sourceSubs.length > baseSubs.length && equalSubfieldsBase.length === baseSubs.length) { | ||
return replaceBasefieldWithSourcefield(base); | ||
} | ||
|
||
return base; | ||
|
||
function replaceBasefieldWithSourcefield(base) { | ||
const index = base.fields.findIndex(field => field === baseField); | ||
base.fields.splice(index, 1, sourceField); // eslint-disable-line functional/immutable-data | ||
return base; | ||
} | ||
|
||
function checkFieldType(fields) { | ||
const checkedFields = fields.map(field => { | ||
if ('value' in field) { // eslint-disable-line functional/no-conditional-statement | ||
throw new Error('Invalid control field, expected data field'); | ||
} | ||
return field; | ||
}); | ||
return checkedFields; | ||
} | ||
|
||
function normalizeSubfieldValue(value) { | ||
// Regexp options: g: global search, u: unicode | ||
const punctuation = /[.,\-/#!$%^&*;:{}=_`~()[\]]/gu; | ||
return normalizeSync(value).toLowerCase().replace(punctuation, '', 'u').replace(/\s+/gu, ' ').trim(); | ||
} | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,64 @@ | ||
/** | ||
* | ||
* @licstart The following is the entire license notice for the JavaScript code in this file. | ||
* | ||
* Merge MARC records | ||
* | ||
* Copyright (C) 2015-2019 University Of Helsinki (The National Library Of Finland) | ||
* | ||
* This file is part of marc-record-merge-js | ||
* marc-record-merge-js program is free software: you can redistribute it and/or modify | ||
* it under the terms of the GNU Lesser General Public License as | ||
* published by the Free Software Foundation, either version 3 of the | ||
* License, or (at your option) any later version. | ||
* | ||
* marc-record-merge-js is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the | ||
* GNU Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public License | ||
* along with this program. If not, see <http://www.gnu.org/licenses/>. | ||
* | ||
* @licend The above is the entire license notice | ||
* for the JavaScript code in this file. | ||
* | ||
*/ | ||
import chai from 'chai'; | ||
import fs from 'fs'; | ||
import path from 'path'; | ||
import {MarcRecord} from '@natlibfi/marc-record'; | ||
import createReducer, {subsetEquality} from './select'; | ||
import fixturesFactory, {READERS} from '@natlibfi/fixura'; | ||
|
||
MarcRecord.setValidationOptions({subfieldValues: false}); | ||
|
||
describe('reducers/select', () => { | ||
const {expect} = chai; | ||
const fixturesPath = path.join(__dirname, '..', '..', 'test-fixtures', 'reducers', 'select'); | ||
|
||
fs.readdirSync(fixturesPath).forEach(subDir => { | ||
const {getFixture} = fixturesFactory({root: [fixturesPath, subDir], reader: READERS.JSON, failWhenNotFound: false}); | ||
it(subDir, () => { | ||
const baseTest = new MarcRecord(getFixture('base.json')); | ||
const sourceTest = new MarcRecord(getFixture('source.json')); | ||
const patternTest = new RegExp(getFixture({components: ['pattern.txt'], reader: READERS.TEXT}), 'u'); | ||
const expectedRecord = getFixture('merged.json'); | ||
const expectedError = getFixture({components: ['expected-error.txt'], reader: READERS.TEXT}); | ||
const equalityFunction = getEqualityFunction(); | ||
|
||
if (expectedError) { | ||
expect(() => createReducer.to.throw(Error, 'control field')); | ||
return; | ||
} | ||
const mergedRecord = createReducer({pattern: patternTest, equalityFunction})(baseTest, sourceTest); | ||
expect(mergedRecord.toObject()).to.eql(expectedRecord); | ||
|
||
function getEqualityFunction() { | ||
const functionName = getFixture({components: ['equalityFunction.txt'], reader: READERS.TEXT}); | ||
return functionName === 'subsetEquality' ? subsetEquality : undefined; | ||
} | ||
}); | ||
}); | ||
}); |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"value": "this is wrong", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "hello world" | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
Invalid control field, expected data field |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "hello world" | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
^010$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "hello world" | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "Hello world" | ||
} | ||
] | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "b", | ||
"value": "goodbye world" | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "Hello world" | ||
} | ||
] | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "b", | ||
"value": "goodbye world" | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1 @@ | ||
^010$ |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "hello world" | ||
} | ||
] | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "b", | ||
"value": "goodbye world" | ||
} | ||
] | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "c", | ||
"value": "have a nice day" | ||
} | ||
] | ||
} | ||
] | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
{ | ||
"leader": "01331cam a22003494i 4500", | ||
"fields": [ | ||
{ | ||
"tag": "001", | ||
"value": "007346734" | ||
}, | ||
{ | ||
"tag": "010", | ||
"ind1": " ", | ||
"ind2": " ", | ||
"subfields": [ | ||
{ | ||
"code": "a", | ||
"value": "Hello world" | ||
} | ||
] | ||
} | ||
] | ||
} |
Oops, something went wrong.