Skip to content

Commit

Permalink
Add select reducer (#58)
Browse files Browse the repository at this point in the history
  • Loading branch information
ssilvonen authored Oct 8, 2020
1 parent 5fbadc6 commit 11798cd
Show file tree
Hide file tree
Showing 59 changed files with 1,523 additions and 40 deletions.
404 changes: 364 additions & 40 deletions package-lock.json

Large diffs are not rendered by default.

117 changes: 117 additions & 0 deletions src/reducers/select.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/**
*
* @licstart The following is the entire license notice for the JavaScript code in this file.
*
* Merge MARC records
*
* Copyright (C) 2015-2019 University Of Helsinki (The National Library Of Finland)
*
* This file is part of marc-record-merge-js
* marc-record-merge-js program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* marc-record-merge-js is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @licend The above is the entire license notice
* for the JavaScript code in this file.
*
*/
import {normalizeSync} from 'normalize-diacritics';

export function strictEquality(subfieldA, subfieldB) {
return subfieldA.code === subfieldB.code &&
subfieldA.value === subfieldB.value;
}

export function subsetEquality(subfieldA, subfieldB) {
return subfieldA.code === subfieldB.code &&
(subfieldA.value.indexOf(subfieldB.value) !== -1 || subfieldB.value.indexOf(subfieldA.value) !== -1);
}

export default ({pattern, equalityFunction = strictEquality}) => (base, source) => {
const baseFields = base.get(pattern);
const sourceFields = source.get(pattern);

checkFieldType(baseFields);
checkFieldType(sourceFields);

if (baseFields.length > 1 || sourceFields.length > 1) {
return base;
}
const [baseField] = baseFields;
const [sourceField] = sourceFields;

if (baseField.tag === sourceField.tag === false) {
return base;
}
const baseSubs = baseField.subfields;
const sourceSubs = sourceField.subfields;

const baseSubsNormalized = baseSubs
.map(({code, value}) => ({code, value: normalizeSubfieldValue(value)}));

const sourceSubsNormalized = sourceSubs
.map(({code, value}) => ({code, value: normalizeSubfieldValue(value)}));

const equalSubfieldsBase = baseSubsNormalized
.filter(baseSubfield => sourceSubsNormalized
.some(sourceSubfield => equalityFunction(baseSubfield, sourceSubfield)));

const equalSubfieldsSource = sourceSubsNormalized
.filter(sourceSubfield => baseSubsNormalized
.some(baseSubfield => equalityFunction(sourceSubfield, baseSubfield)));

if (baseSubs.length === sourceSubs.length && equalSubfieldsBase.length < baseSubs.length) {
return base;
}

if (baseSubs.length === sourceSubs.length && equalSubfieldsBase.length === equalSubfieldsSource.length) {
const totalSubfieldLengthBase = baseSubsNormalized
.map(({value}) => value.length)
.reduce((acc, value) => acc + value);
const totalSubfieldLengthSource = sourceSubsNormalized
.map(({value}) => value.length)
.reduce((acc, value) => acc + value);

if (totalSubfieldLengthSource > totalSubfieldLengthBase) {
return replaceBasefieldWithSourcefield(base);
}
}

if (sourceSubs.length > baseSubs.length && equalSubfieldsBase.length === baseSubs.length) {
return replaceBasefieldWithSourcefield(base);
}

return base;

function replaceBasefieldWithSourcefield(base) {
const index = base.fields.findIndex(field => field === baseField);
base.fields.splice(index, 1, sourceField); // eslint-disable-line functional/immutable-data
return base;
}

function checkFieldType(fields) {
const checkedFields = fields.map(field => {
if ('value' in field) { // eslint-disable-line functional/no-conditional-statement
throw new Error('Invalid control field, expected data field');
}
return field;
});
return checkedFields;
}

function normalizeSubfieldValue(value) {
// Regexp options: g: global search, u: unicode
const punctuation = /[.,\-/#!$%^&*;:{}=_`~()[\]]/gu;
return normalizeSync(value).toLowerCase().replace(punctuation, '', 'u').replace(/\s+/gu, ' ').trim();
}
};
64 changes: 64 additions & 0 deletions src/reducers/select.spec.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/**
*
* @licstart The following is the entire license notice for the JavaScript code in this file.
*
* Merge MARC records
*
* Copyright (C) 2015-2019 University Of Helsinki (The National Library Of Finland)
*
* This file is part of marc-record-merge-js
* marc-record-merge-js program is free software: you can redistribute it and/or modify
* it under the terms of the GNU Lesser General Public License as
* published by the Free Software Foundation, either version 3 of the
* License, or (at your option) any later version.
*
* marc-record-merge-js is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
* @licend The above is the entire license notice
* for the JavaScript code in this file.
*
*/
import chai from 'chai';
import fs from 'fs';
import path from 'path';
import {MarcRecord} from '@natlibfi/marc-record';
import createReducer, {subsetEquality} from './select';
import fixturesFactory, {READERS} from '@natlibfi/fixura';

MarcRecord.setValidationOptions({subfieldValues: false});

describe('reducers/select', () => {
const {expect} = chai;
const fixturesPath = path.join(__dirname, '..', '..', 'test-fixtures', 'reducers', 'select');

fs.readdirSync(fixturesPath).forEach(subDir => {
const {getFixture} = fixturesFactory({root: [fixturesPath, subDir], reader: READERS.JSON, failWhenNotFound: false});
it(subDir, () => {
const baseTest = new MarcRecord(getFixture('base.json'));
const sourceTest = new MarcRecord(getFixture('source.json'));
const patternTest = new RegExp(getFixture({components: ['pattern.txt'], reader: READERS.TEXT}), 'u');
const expectedRecord = getFixture('merged.json');
const expectedError = getFixture({components: ['expected-error.txt'], reader: READERS.TEXT});
const equalityFunction = getEqualityFunction();

if (expectedError) {
expect(() => createReducer.to.throw(Error, 'control field'));
return;
}
const mergedRecord = createReducer({pattern: patternTest, equalityFunction})(baseTest, sourceTest);
expect(mergedRecord.toObject()).to.eql(expectedRecord);

function getEqualityFunction() {
const functionName = getFixture({components: ['equalityFunction.txt'], reader: READERS.TEXT});
return functionName === 'subsetEquality' ? subsetEquality : undefined;
}
});
});
});
21 changes: 21 additions & 0 deletions test-fixtures/reducers/select/01/base.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"value": "this is wrong",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "hello world"
}
]
}
]
}
1 change: 1 addition & 0 deletions test-fixtures/reducers/select/01/expected-error.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Invalid control field, expected data field
20 changes: 20 additions & 0 deletions test-fixtures/reducers/select/01/merged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "hello world"
}
]
}
]
}
1 change: 1 addition & 0 deletions test-fixtures/reducers/select/01/pattern.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^010$
20 changes: 20 additions & 0 deletions test-fixtures/reducers/select/01/source.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "hello world"
}
]
}
]
}
31 changes: 31 additions & 0 deletions test-fixtures/reducers/select/02/base.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "Hello world"
}
]
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "b",
"value": "goodbye world"
}
]
}
]
}
31 changes: 31 additions & 0 deletions test-fixtures/reducers/select/02/merged.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "Hello world"
}
]
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "b",
"value": "goodbye world"
}
]
}
]
}
1 change: 1 addition & 0 deletions test-fixtures/reducers/select/02/pattern.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
^010$
42 changes: 42 additions & 0 deletions test-fixtures/reducers/select/02/source.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "hello world"
}
]
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "b",
"value": "goodbye world"
}
]
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "c",
"value": "have a nice day"
}
]
}
]
}
20 changes: 20 additions & 0 deletions test-fixtures/reducers/select/03/base.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
{
"leader": "01331cam a22003494i 4500",
"fields": [
{
"tag": "001",
"value": "007346734"
},
{
"tag": "010",
"ind1": " ",
"ind2": " ",
"subfields": [
{
"code": "a",
"value": "Hello world"
}
]
}
]
}
Loading

0 comments on commit 11798cd

Please sign in to comment.