From 6b321b6e2b29879c1eb97c720728d1fb30fcaef5 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 16 May 2022 11:47:33 +0200 Subject: [PATCH 01/74] Make committer date always equal to author date GitHub displays the committer date so if we only use `--date` option to git commit, the date displayed on GitHub UI when browsing history is confusing --- src/storage-adapters/git/git.js | 22 ++++++++++++++-------- src/storage-adapters/git/index.js | 4 ++-- 2 files changed, 16 insertions(+), 10 deletions(-) diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index 5b2b784e8..fd5e5c0fa 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -32,19 +32,25 @@ export default class Git { return this.git.add(this.relativePath(filepath)); } - async commit(filepath, message, authorDate) { - const options = {}; + async commit(filepath, message, date) { + if (date) { + const commitDate = new Date(date).toISOString(); - if (authorDate) { - options['--date'] = new Date(authorDate).toISOString(); + process.env.GIT_AUTHOR_DATE = commitDate; + process.env.GIT_COMMITTER_DATE = commitDate; } let summary; - if (filepath) { - summary = await this.git.commit(message, this.relativePath(filepath), options); - } else { - summary = await this.git.commit(message, options); + try { + if (filepath) { + summary = await this.git.commit(message, filepath); + } else { + summary = await this.git.commit(message); + } + } finally { + process.env.GIT_AUTHOR_DATE = ''; + process.env.GIT_COMMITTER_DATE = ''; } if (!summary.commit) { // Nothing committed, no hash to return diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index d7b11e588..9491393b0 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -133,11 +133,11 @@ export default class GitAdapter { return filePath; } - async _commit(filePath, message, authorDate) { + async _commit(filePath, message, date) { try { await this.git.add(filePath); - return await this.git.commit(filePath, message, authorDate); + return await this.git.commit(filePath, message, date); } catch (error) { throw new Error(`Could not commit ${filePath} with message "${message}" due to error: "${error}"`); } From bafc6120d5bf5c8480333782f3ad0f3f116e642b Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 4 May 2022 10:54:54 +0200 Subject: [PATCH 02/74] Define commit message prefixes as constants --- src/storage-adapters/git/index.js | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 9491393b0..416ffce2d 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -14,6 +14,12 @@ const fs = fsApi.promises; mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown` +const COMMIT_MESSAGE_PREFIX = { + startTracking: 'Start tracking', + refilter: 'Refilter', + update: 'Update', +}; + export default class GitAdapter { constructor({ path, author, publish, prefixMessageToSnapshotId }) { this.path = path; @@ -158,9 +164,9 @@ export default class GitAdapter { } _generateCommitMessage({ serviceId, documentType, isRefilter, snapshotId, isFirstRecord }) { - let prefix = isRefilter ? 'Refilter' : 'Update'; + let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update; - prefix = isFirstRecord ? 'Start tracking' : prefix; + prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIX.startTracking : prefix; let message = `${prefix} ${serviceId} ${documentType}`; From 85d26ced4d4ea732cb9f0334f63e9fba82afc482 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 4 May 2022 10:53:26 +0200 Subject: [PATCH 03/74] Improve GitAdapter records iteration No more git checkout drastically improves performance and ensures that the repository is not checked out to a commit in the middle of the history in case of an error. Also allow to order commit before iteration. Also unify and improve returned commits data Lazy load record content --- src/storage-adapters/git/git.js | 4 ++ src/storage-adapters/git/index.js | 108 +++++++++++++++++------------- 2 files changed, 64 insertions(+), 48 deletions(-) diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index fd5e5c0fa..896b209d5 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -120,6 +120,10 @@ export default class Git { return this.git.raw(options); } + async show(options) { + return this.git.show(options); + } + relativePath(absolutePath) { // Git needs a path relative to the .git directory, not an absolute one return path.relative(this.path, absolutePath); diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 416ffce2d..bf1659075 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -20,6 +20,8 @@ const COMMIT_MESSAGE_PREFIX = { update: 'Update', }; +const PDF_MIME_TYPE = 'application/pdf'; + export default class GitAdapter { constructor({ path, author, publish, prefixMessageToSnapshotId }) { this.path = path; @@ -70,61 +72,23 @@ export default class GitAdapter { return {}; } - const mimeType = mime.getType(filePath); - const readFileOptions = {}; - - if (mimeType.startsWith('text/')) { - readFileOptions.encoding = 'utf8'; - } - - return { - id: commit.hash, - content: await fs.readFile(recordFilePath, readFileOptions), - mimeType, - fetchDate: new Date(commit.date), - isRefilter: commit.message.startsWith('Refilter'), - }; + return this._getRecordFromCommitMetadata(commit); } async* iterate() { - const initialCommitHash = (await this.git.raw([ 'rev-list', '--max-parents=0', 'HEAD' ])).trim(); - const currentBranchName = (await this.git.raw([ 'rev-parse', '--abbrev-ref', 'HEAD' ])).trim(); - - try { - let previousCommitHash; - - /* eslint-disable no-await-in-loop */ - while (previousCommitHash != initialCommitHash) { - const [{ hash, date, message, diff: { files: [{ file: relativeFilePath }] } }] = await this.git.log([ '-1', '--stat=4096', '--no-merges' ]); // get current commit information - - if (message.match(/^(Start tracking|Update|Refilter)/)) { // Skip commits which are not a document versions (initial README or LICENSE commits for example) - const absoluteFilePath = `${this.path}/${relativeFilePath}`; - - const serviceId = path.dirname(relativeFilePath); - const extension = path.extname(relativeFilePath); - const documentType = path.basename(relativeFilePath, extension); + const commits = await this._getMeaningfulCommitsAscending(); - yield { - id: hash, - serviceId, - documentType, - content: await fs.readFile(absoluteFilePath, { encoding: 'utf8' }), - fetchDate: new Date(date), - }; - } - - previousCommitHash = hash; - - if (initialCommitHash != hash) { - await this.git.checkout(['HEAD^']); // checkout the parent commit - } - } - /* eslint-enable no-await-in-loop */ - } finally { - await this.git.checkout([currentBranchName]); + for (const commit of commits) { + yield this._getRecordFromCommitMetadata(commit); } } + async _getMeaningfulCommitsAscending() { + return (await this.git.log([ '--reverse', '--no-merges' ])) + .filter(({ message }) => message.match(/^(Start tracking|Update|Refilter)/)) // Skip commits which are not a document record (README, LICENSE, …) + .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order + } + async _save({ serviceId, documentType, content, fileExtension }) { const directory = `${this.path}/${serviceId}`; @@ -177,6 +141,54 @@ export default class GitAdapter { return message; } + async _getRecordFromCommitMetadata(commit) { + const { hash, date, message, body, diff } = commit; + + let relativeFilePath; + + if (diff) { + ({ files: [{ file: relativeFilePath }] } = diff); + } + + if (!relativeFilePath) { + relativeFilePath = (await this.git.show([ '--name-only', '--pretty=', hash ])).trim(); + } + + const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); + const adapter = this; + + return { + id: hash, + serviceId: path.dirname(relativeFilePath), + documentType: path.basename(relativeFilePath, path.extname(relativeFilePath)), + mimeType: mime.getType(relativeFilePath), + fetchDate: new Date(date), + isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), + isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), + snapshotId: snapshotIdMatch && snapshotIdMatch[0], + get content() { // In this scope, `this` is the `result` object, not the adapter + return (async () => { + if (this.mimeType != PDF_MIME_TYPE) { + return adapter.git.show(`${hash}:${relativeFilePath}`); + } + + // In case of PDF, `git show` cannot be used as it converts PDF binary into string which not retain the original binary representation + // It is impossible to restore the original binary data from the resulting string + let pdfBuffer; + + try { + await adapter.git.raw([ 'restore', '-s', hash, '--', relativeFilePath ]); // So, temporarily restore the PDF file to a specific commit + pdfBuffer = await fs.readFile(`${adapter.path}/${relativeFilePath}`); // …read the content + } finally { + await adapter.git.raw([ 'restore', '-s', 'HEAD', '--', relativeFilePath ]); // …and finally restore the file to its last state. + } + + return pdfBuffer; + })(); + }, + }; + } + async _removeAllRecords() { const files = await fs.readdir(this.path, { withFileTypes: true }); const promises = files.map(file => { From 4dd31d7ef54a67d14d9f75cd60498988d056e846 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 4 May 2022 10:56:47 +0200 Subject: [PATCH 04/74] Allow passing a promise for `content` to `record` function --- src/storage-adapters/git/index.js | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index bf1659075..045b5c54f 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -39,6 +39,9 @@ export default class GitAdapter { } async record({ serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId }) { + if (content instanceof Promise) { + content = await content; + } const isFirstRecord = await this._isFirstRecord(serviceId, documentType); const message = this._generateCommitMessage({ serviceId, documentType, isRefilter, snapshotId, isFirstRecord }); const fileExtension = mime.getExtension(mimeType); From 2a3e422d9f06975c7b6599ec295b97eeb339e0d9 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 4 May 2022 10:59:00 +0200 Subject: [PATCH 05/74] Handle asynchronous record content loading in recorder --- src/archivist/recorder/index.js | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index c20f444b2..ef9039d3a 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -17,7 +17,12 @@ export default class Recorder { } async getLatestSnapshot(serviceId, documentType) { - return this.snapshotsStorageAdapter.getLatestRecord(serviceId, documentType); + const record = await this.snapshotsStorageAdapter.getLatestRecord(serviceId, documentType); + + return { + ...record, + content: await record.content, + }; } async recordSnapshot({ serviceId, documentType, fetchDate, mimeType, content }) { From a2f1f85bbe717d870db0cd74184a08c7aaa83464 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 18 May 2022 14:59:25 +0200 Subject: [PATCH 06/74] Update git adapter tests --- src/storage-adapters/git/index.test.js | 49 +++++++++++++++++--------- 1 file changed, 33 insertions(+), 16 deletions(-) diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index 3cd5c7604..dbcba43e2 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -21,7 +21,9 @@ const CONTENT = 'ToS fixture data with UTF-8 çhãràčtęrs'; const EXPECTED_FILE_PATH = `${RECORDER_PATH}/${SERVICE_PROVIDER_ID}/${DOCUMENT_TYPE}.html`; const EXPECTED_PDF_FILE_PATH = EXPECTED_FILE_PATH.replace('html', 'pdf'); const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z'); -const SNAPSHOT_ID = 'snapshot_id'; +const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z'); +const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z'); +const SNAPSHOT_ID = '513fadb2ae415c87747047e33287805d59e2dd55'; const MIME_TYPE = 'text/html'; const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf'), { encoding: 'utf8' }); const PDF_MIME_TYPE = 'application/pdf'; @@ -261,6 +263,7 @@ describe('GitAdapter', () => { serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, + mimeType: MIME_TYPE, }); numberOfRecordsBefore = (await git.log()).length; @@ -269,6 +272,7 @@ describe('GitAdapter', () => { serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, + mimeType: MIME_TYPE, })); numberOfRecordsAfter = (await git.log()).length; @@ -397,8 +401,8 @@ describe('GitAdapter', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); - it('returns the latest record content', () => { - expect(latestRecord.content.toString('utf8')).to.equal(UPDATED_FILE_CONTENT); + it('returns the latest record content', async () => { + expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_FILE_CONTENT); }); it('returns the latest record mime type', () => { @@ -424,8 +428,8 @@ describe('GitAdapter', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); - it('returns the latest record content', () => { - expect(latestRecord.content.toString('utf8')).to.equal(PDF_CONTENT); + it('returns the latest record content', async () => { + expect((await latestRecord.content).toString('utf8')).to.equal(PDF_CONTENT); }); it('returns the latest record mime type', () => { @@ -456,8 +460,12 @@ describe('GitAdapter', () => { }); describe('#iterate', () => { + const expectedIds = []; + const ids = []; + const fetchDates = []; + before(async () => { - await subject.record({ + const { id: id1 } = await subject.record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, @@ -466,36 +474,45 @@ describe('GitAdapter', () => { mimeType: MIME_TYPE, }); - await subject.record({ + expectedIds.push(id1); + + const { id: id2 } = await subject.record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, - fetchDate: FETCH_DATE, + fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, }); - await subject.record({ + expectedIds.push(id2); + + const { id: id3 } = await subject.record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, isRefilter: true, - fetchDate: FETCH_DATE, + fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, }); + + expectedIds.push(id3); + + for await (const record of subject.iterate()) { + ids.push(record.id); + fetchDates.push(record.fetchDate); + } }); after(async () => subject._removeAllRecords()); it('iterates through all records', async () => { - const result = []; - - for await (const record of subject.iterate()) { - result.push(record.content); - } + expect(ids).to.have.members(expectedIds); + }); - expect(result).to.deep.equal([ `${CONTENT} - updated 2`, `${CONTENT} - updated`, CONTENT ]); + it('iterates in ascending order', async () => { + expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); }); }); }); From 342ae17f8c05dac7a0473f22d426f56047c4d8c9 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 11 May 2022 16:54:01 +0200 Subject: [PATCH 07/74] Remove magic strings --- src/storage-adapters/git/index.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 045b5c54f..a85d74a52 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -19,6 +19,7 @@ const COMMIT_MESSAGE_PREFIX = { refilter: 'Refilter', update: 'Update', }; +const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); const PDF_MIME_TYPE = 'application/pdf'; @@ -88,7 +89,7 @@ export default class GitAdapter { async _getMeaningfulCommitsAscending() { return (await this.git.log([ '--reverse', '--no-merges' ])) - .filter(({ message }) => message.match(/^(Start tracking|Update|Refilter)/)) // Skip commits which are not a document record (README, LICENSE, …) + .filter(({ message }) => message.match(COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order } From 5e3fcf4f577a98012029cb470589f9e3eb24eeb0 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 15:10:56 +0200 Subject: [PATCH 08/74] Add iterate function to MongoAdapter --- src/storage-adapters/mongo/index.js | 39 ++++++++++++++-- src/storage-adapters/mongo/index.test.js | 58 ++++++++++++++++++++++++ 2 files changed, 92 insertions(+), 5 deletions(-) diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 46cf37e9f..2409f3480 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -3,7 +3,7 @@ * Object IDs are used as opaque unique IDs. */ -import { MongoClient, ObjectId } from 'mongodb'; +import { Binary, MongoClient, ObjectId } from 'mongodb'; const PDF_MIME_TYPE = 'application/pdf'; @@ -88,13 +88,42 @@ export default class MongoAdapter { }; } - /* eslint-disable */ async* iterate() { - throw new Error('#iterate is not yet implemented in the MongoDB storage adapter'); + const cursor = this.collection.find().sort({ fetchDate: 1 }); + + /* eslint-disable no-await-in-loop */ + while (await cursor.hasNext()) { + const record = await cursor.next(); + + yield this.getRecordFromMongoMetadata(record); + } + /* eslint-enable no-await-in-loop */ } - /* eslint-enable */ async _removeAllRecords() { - return this.collection.deleteMany({}); + return this.collection.deleteMany(); + } + + getRecordFromMongoMetadata({ _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId }) { + const { collection } = this; + const result = { + id: _id.toString(), + serviceId, + documentType, + mimeType, + fetchDate: new Date(fetchDate), + isFirstRecord: Boolean(isFirstRecord), + isRefilter: Boolean(isRefilter), + snapshotId: snapshotId && snapshotId.toString(), + get content() { + return (async () => { + const { content } = await collection.findOne({ _id }, { projection: { content: 1 } }); + + return content instanceof Binary ? content.buffer : content; + })(); + }, + }; + + return result; } } diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index c3ac45d72..d681f3c5a 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -20,6 +20,7 @@ const CONTENT = 'ToS fixture data with UTF-8 çhãràčtęrs'; const MIME_TYPE = 'text/html'; const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z'); const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z'); +const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z'); const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad'; const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf'), { encoding: 'utf8' }); const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/termsModified.pdf'), { encoding: 'utf8' }); @@ -412,4 +413,61 @@ describe('MongoAdapter', () => { }); }); }); + + describe('#iterate', () => { + const expectedIds = []; + const ids = []; + const fetchDates = []; + + before(async () => { + const { id: id1 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id1); + + const { id: id2 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id2); + + const { id: id3 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated 2`, + isRefilter: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id3); + + for await (const record of subject.iterate()) { + ids.push(record.id); + fetchDates.push(record.fetchDate); + } + }); + + after(async () => subject._removeAllRecords()); + + it('iterates through all records', async () => { + expect(ids).to.have.members(expectedIds); + }); + + it('iterates in ascending order', async () => { + expect(fetchDates).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + }); + }); }); From 7bb2fac529948ff03b2e9b2c991e294921848f97 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 15:11:39 +0200 Subject: [PATCH 09/74] Improve record function to MongoAdapter --- src/storage-adapters/mongo/index.js | 24 +++++++----------------- src/storage-adapters/mongo/index.test.js | 8 ++++---- 2 files changed, 11 insertions(+), 21 deletions(-) diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 2409f3480..4056d1fe2 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -5,8 +5,6 @@ import { Binary, MongoClient, ObjectId } from 'mongodb'; -const PDF_MIME_TYPE = 'application/pdf'; - export default class MongoAdapter { constructor({ database: databaseName, collection: collectionName, connectionURI }) { const client = new MongoClient(connectionURI); @@ -29,23 +27,21 @@ export default class MongoAdapter { return this.client.close(); } - async record({ serviceId, documentType, content: passedContent, mimeType, fetchDate, isRefilter, snapshotId }) { - let content = passedContent; - - if (mimeType == PDF_MIME_TYPE) { - content = passedContent.toString('utf-8'); // Serialize PDF + async record({ serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId }) { + if (content instanceof Promise) { + content = await content; } const previousRecord = await this.getLatestRecord(serviceId, documentType); - if (previousRecord && previousRecord.content == content) { + if (previousRecord && await previousRecord.content == content) { return {}; } const recordProperties = Object.fromEntries(Object.entries({ serviceId, documentType, - content: passedContent, + content, mimeType, fetchDate, isRefilter, @@ -77,15 +73,9 @@ export default class MongoAdapter { return {}; } - const { _id, content, mimeType, fetchDate, isRefilter } = record; + return this.getRecordFromMongoMetadata(record); + } - return { - id: _id.toString(), - content, - mimeType, - fetchDate: new Date(fetchDate), - isRefilter: Boolean(isRefilter), - }; } async* iterate() { diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index d681f3c5a..6989664e7 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -347,8 +347,8 @@ describe('MongoAdapter', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); - it('returns the latest record content', () => { - expect(latestRecord.content.toString('utf8')).to.equal(UPDATED_CONTENT); + it('returns the latest record content', async () => { + expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_CONTENT); }); it('returns the latest record mime type', () => { @@ -383,8 +383,8 @@ describe('MongoAdapter', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); - it('returns the latest record content', () => { - expect(latestRecord.content).to.equal(UPDATED_PDF_CONTENT); + it('returns the latest record content', async () => { + expect(await latestRecord.content).to.equal(UPDATED_PDF_CONTENT); }); it('returns the latest record mime type', () => { From 20ef5e0d29ed06904d8ec2c2b080ff0ae02f7bd4 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 11 May 2022 17:54:58 +0200 Subject: [PATCH 10/74] Fix deprecation warning --- src/storage-adapters/git/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index a85d74a52..a2d83edf3 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -199,7 +199,7 @@ export default class GitAdapter { const filePath = path.join(this.path, file.name); if (file.isDirectory()) { - return fs.rmdir(filePath, { recursive: true }); + return fs.rm(filePath, { recursive: true }); } return fs.unlink(filePath); From cb8e026cbbaf03cde5b893b99fb45060b0ad4f8c Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 15:43:23 +0200 Subject: [PATCH 11/74] Improve MongoAdapter tests --- src/storage-adapters/mongo/index.test.js | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index 6989664e7..3990a6046 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -22,8 +22,8 @@ const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z'); const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z'); const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z'); const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad'; -const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf'), { encoding: 'utf8' }); -const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/termsModified.pdf'), { encoding: 'utf8' }); +const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf')); +const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/termsModified.pdf')); const PDF_MIME_TYPE = 'application/pdf'; let collection; @@ -301,8 +301,10 @@ describe('MongoAdapter', () => { expect(mongoDocument._id.toString()).to.equal(record.id); }); - it('stores the proper content', () => { - expect(mongoDocument.content).to.equal(PDF_CONTENT); + it('stores the proper content', async () => { + const isSameContent = Buffer.compare(mongoDocument.content.buffer, PDF_CONTENT) == 0; + + expect(isSameContent).to.be.true; }); it('stores the mime type', () => { @@ -384,7 +386,9 @@ describe('MongoAdapter', () => { }); it('returns the latest record content', async () => { - expect(await latestRecord.content).to.equal(UPDATED_PDF_CONTENT); + const isSameContent = Buffer.compare(await latestRecord.content, UPDATED_PDF_CONTENT) == 0; + + expect(isSameContent).to.be.true; }); it('returns the latest record mime type', () => { From 24267535de1d5c7d0730a927174f11d1af3a0db0 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 17:19:46 +0200 Subject: [PATCH 12/74] Fix Recorder tests as content is now lazy loaded --- src/archivist/recorder/index.test.js | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index 5f150357a..089934971 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -117,7 +117,7 @@ describe('Recorder', () => { after(async () => snapshotsAdapter._removeAllRecords()); it('records the document with the proper content', async () => { - expect(record.content).to.equal(CONTENT); + expect(await record.content).to.equal(CONTENT); }); it('returns the record id', async () => { @@ -155,7 +155,7 @@ describe('Recorder', () => { after(async () => snapshotsAdapter._removeAllRecords()); it('records the document with the proper content', async () => { - expect(record.content).to.equal(UPDATED_CONTENT); + expect(await record.content).to.equal(UPDATED_CONTENT); }); it('returns the record id', async () => { @@ -273,7 +273,7 @@ describe('Recorder', () => { after(async () => versionsAdapter._removeAllRecords()); it('records the document with the proper content', async () => { - expect(record.content).to.equal(CONTENT); + expect(await record.content).to.equal(CONTENT); }); it('returns the record id', async () => { @@ -313,7 +313,7 @@ describe('Recorder', () => { after(async () => versionsAdapter._removeAllRecords()); it('records the document with the proper content', async () => { - expect(record.content).to.equal(UPDATED_CONTENT); + expect(await record.content).to.equal(UPDATED_CONTENT); }); it('records in the document that it is not a refilter', async () => { @@ -438,7 +438,7 @@ describe('Recorder', () => { after(async () => versionsAdapter._removeAllRecords()); after(async () => versionsAdapter._removeAllRecords()); it('records the document with the proper content', async () => { - expect(record.content).to.equal(CONTENT); + expect(await record.content).to.equal(CONTENT); }); it('returns the record id', async () => { @@ -478,7 +478,7 @@ describe('Recorder', () => { after(async () => versionsAdapter._removeAllRecords()); it('records the document with the proper content', async () => { - expect(record.content).to.equal(UPDATED_CONTENT); + expect(await record.content).to.equal(UPDATED_CONTENT); }); it('records in the document that it is a refilter', async () => { From d10df955e6352f2b756a60ba5d38ad7e7d0f2ce0 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 17:25:21 +0200 Subject: [PATCH 13/74] Fix Export tests as content is now lazy loaded --- scripts/dataset/export/index.js | 2 +- scripts/dataset/export/index.test.js | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/scripts/dataset/export/index.js b/scripts/dataset/export/index.js index 2eac4d892..f6cfbcb68 100644 --- a/scripts/dataset/export/index.js +++ b/scripts/dataset/export/index.js @@ -47,7 +47,7 @@ export default async function generate({ archivePath, releaseDate }) { logger.info({ message: versionPath, counter: index, hash: version.id }); archive.stream.append( - content, + await content, { name: `${archive.basename}/${versionPath}` }, ); index++; diff --git a/scripts/dataset/export/index.test.js b/scripts/dataset/export/index.test.js index fa46eb83c..ad445ec1c 100644 --- a/scripts/dataset/export/index.test.js +++ b/scripts/dataset/export/index.test.js @@ -30,6 +30,8 @@ const FOURTH_FETCH_DATE = '2022-01-01T12:12:24.000Z'; const FIRST_CONTENT = 'First Content'; const SECOND_CONTENT = 'Second Content'; +const MIME_TYPE = 'text/markdown'; + const SNAPSHOT_ID = '721ce4a63ad399ecbdb548a66d6d327e7bc97876'; const RELEASE_DATE = '2022-01-01T18:21:00.000Z'; @@ -57,6 +59,7 @@ describe('Export', () => { serviceId: FIRST_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: FIRST_CONTENT, + mimeType: MIME_TYPE, fetchDate: FIRST_FETCH_DATE, snapshotId: SNAPSHOT_ID, }); @@ -65,6 +68,7 @@ describe('Export', () => { serviceId: FIRST_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: SECOND_CONTENT, + mimeType: MIME_TYPE, fetchDate: SECOND_FETCH_DATE, snapshotId: SNAPSHOT_ID, }); @@ -73,6 +77,7 @@ describe('Export', () => { serviceId: SECOND_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: FIRST_CONTENT, + mimeType: MIME_TYPE, fetchDate: THIRD_FETCH_DATE, snapshotId: SNAPSHOT_ID, }); @@ -81,6 +86,7 @@ describe('Export', () => { serviceId: SECOND_SERVICE_PROVIDER_ID, documentType: SECOND_DOCUMENT_TYPE, content: FIRST_CONTENT, + mimeType: MIME_TYPE, fetchDate: FOURTH_FETCH_DATE, snapshotId: SNAPSHOT_ID, }); From 1cdf524a61101c0bfac1e41c005ddaa21085aaea Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 18 May 2022 14:59:42 +0200 Subject: [PATCH 14/74] Add getRecord function to GitAdapter --- src/storage-adapters/git/index.js | 6 +++ src/storage-adapters/git/index.test.js | 60 ++++++++++++++++++++++++-- 2 files changed, 62 insertions(+), 4 deletions(-) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index a2d83edf3..7cf9bdd50 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -79,6 +79,12 @@ export default class GitAdapter { return this._getRecordFromCommitMetadata(commit); } + async getRecord(recordId) { + const [commit] = await this.git.log([ '-1', recordId ]); + + return this._getRecordFromCommitMetadata(commit); + } + async* iterate() { const commits = await this._getMeaningfulCommitsAscending(); diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index dbcba43e2..0239a92d5 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -13,7 +13,7 @@ import GitAdapter from './index.js'; const { expect } = chai; const __dirname = path.dirname(fileURLToPath(import.meta.url)); -const RECORDER_PATH = path.resolve(__dirname, '../../../', config.get('recorder.snapshots.storage.git.path')); +const RECORDER_PATH = path.resolve(__dirname, '../../../', config.get('recorder.versions.storage.git.path')); const SERVICE_PROVIDER_ID = 'test_service'; const DOCUMENT_TYPE = 'Terms of Service'; @@ -37,15 +37,15 @@ describe('GitAdapter', () => { git = new Git({ path: RECORDER_PATH, author: { - name: config.get('recorder.snapshots.storage.git.author.name'), - email: config.get('recorder.snapshots.storage.git.author.email'), + name: config.get('recorder.versions.storage.git.author.name'), + email: config.get('recorder.versions.storage.git.author.email'), }, }); await git.initialize(); subject = new GitAdapter({ - ...config.get('recorder.snapshots.storage.git'), + ...config.get('recorder.versions.storage.git'), path: RECORDER_PATH, }); @@ -369,6 +369,58 @@ describe('GitAdapter', () => { }); }); + describe('#getRecord', () => { + let record; + let id; + + before(async () => { + ({ id } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + })); + + (record = await subject.getRecord(id)); + }); + + after(async () => subject._removeAllRecords()); + + it('returns the record id', () => { + expect(record.id).to.include(id); + }); + + it('returns a boolean to know if it is the first record', () => { + expect(record.isFirstRecord).to.be.true; + }); + + it('returns the service id', () => { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + }); + + it('returns the document type', () => { + expect(record.documentType).to.equal(DOCUMENT_TYPE); + }); + + it('returns a asynchronous content getter', async () => { + expect(await record.content).to.equal(CONTENT); + }); + + it('stores the fetch date', () => { + expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime()); + }); + + it('stores the mime type', () => { + expect(record.mimeType).to.equal(MIME_TYPE); + }); + + it('stores the snapshot ID', () => { + expect(record.snapshotId).to.equal(SNAPSHOT_ID); + }); + }); + describe('#getLatestRecord', () => { context('when there are records for the given service', () => { let lastSnapshotId; From ff9373eeb7cff1e58e67db068bba8fffb6071699 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 17:14:31 +0200 Subject: [PATCH 15/74] Add getRecords function to GitAdapter --- src/storage-adapters/git/index.js | 5 +++ src/storage-adapters/git/index.test.js | 59 ++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 7cf9bdd50..9eb3c7d11 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -85,6 +85,11 @@ export default class GitAdapter { return this._getRecordFromCommitMetadata(commit); } + async getRecords() { + return Promise.all((await this._getMeaningfulCommitsAscending()) + .map(this._getRecordFromCommitMetadata.bind(this))); + } + async* iterate() { const commits = await this._getMeaningfulCommitsAscending(); diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index 0239a92d5..6a44595e1 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -421,6 +421,65 @@ describe('GitAdapter', () => { }); }); + describe('#getRecords', () => { + let records; + const expectedIds = []; + + before(async () => { + const { id: id1 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id1); + + const { id: id2 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id2); + + const { id: id3 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated 2`, + isRefilter: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id3); + + (records = await subject.getRecords()); + }); + + after(async () => subject._removeAllRecords()); + + it('returns all records', () => { + expect(records.length).to.equal(3); + }); + + it('returns records with proper keys', () => { + for (const record of records) { + expect(record).to.have.keys([ 'id', 'serviceId', 'documentType', 'mimeType', 'fetchDate', 'content', 'isFirstRecord', 'isRefilter', 'snapshotId' ]); + } + }); + + it('returns records in ascending order', async () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + }); + }); + describe('#getLatestRecord', () => { context('when there are records for the given service', () => { let lastSnapshotId; From f41f0d5ecf7fc749959ad0eef1c6c0a840f151ab Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 15:12:08 +0200 Subject: [PATCH 16/74] Add getRecord function to MongoAdapter --- src/storage-adapters/mongo/index.js | 10 +++++ src/storage-adapters/mongo/index.test.js | 52 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 4056d1fe2..349c8fdab 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -76,6 +76,16 @@ export default class MongoAdapter { return this.getRecordFromMongoMetadata(record); } + async getRecord(recordId) { + const record = await this.collection.findOne({ _id: new ObjectId(recordId) }); + + if (!record) { + return {}; + } + + return this.getRecordFromMongoMetadata(record); + } + } async* iterate() { diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index 3990a6046..6696bf60a 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -313,6 +313,58 @@ describe('MongoAdapter', () => { }); }); + describe('#getRecord', () => { + let record; + let id; + + before(async () => { + ({ id } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + })); + + (record = await subject.getRecord(id)); + }); + + after(async () => subject._removeAllRecords()); + + it('returns the record id', () => { + expect(record.id).to.include(id); + }); + + it('returns a boolean to know if it is the first record', () => { + expect(record.isFirstRecord).to.be.true; + }); + + it('returns the service id', () => { + expect(record.serviceId).to.equal(SERVICE_PROVIDER_ID); + }); + + it('returns the document type', () => { + expect(record.documentType).to.equal(DOCUMENT_TYPE); + }); + + it('returns a asynchronous content getter', async () => { + expect(await record.content).to.equal(CONTENT); + }); + + it('stores the fetch date', () => { + expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime()); + }); + + it('stores the mime type', () => { + expect(record.mimeType).to.equal(MIME_TYPE); + }); + + it('stores the snapshot ID', () => { + expect(record.snapshotId).to.equal(SNAPSHOT_ID); + }); + }); + describe('#getLatestRecord', () => { context('when there are records for the given service', () => { let lastSnapshotId; From d08771ad2911903e58af2b39194250e5f2edaf4c Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 15:12:17 +0200 Subject: [PATCH 17/74] Add getRecords function to MongoAdapter --- src/storage-adapters/mongo/index.js | 3 ++ src/storage-adapters/mongo/index.test.js | 59 ++++++++++++++++++++++++ 2 files changed, 62 insertions(+) diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 349c8fdab..2eee756ab 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -86,6 +86,9 @@ export default class MongoAdapter { return this.getRecordFromMongoMetadata(record); } + async getRecords() { + return (await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) + .map(record => this.getRecordFromMongoMetadata(record)); } async* iterate() { diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index 6696bf60a..c022f9171 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -365,6 +365,65 @@ describe('MongoAdapter', () => { }); }); + describe('#getRecords', () => { + let records; + const expectedIds = []; + + before(async () => { + const { id: id1 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + // snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id1); + + const { id: id2 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id2); + + const { id: id3 } = await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated 2`, + isRefilter: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + expectedIds.push(id3); + + (records = await subject.getRecords()); + }); + + after(async () => subject._removeAllRecords()); + + it('returns all records', () => { + expect(records.length).to.equal(3); + }); + + it('returns records with proper keys', () => { + for (const record of records) { + expect(record).to.have.keys([ 'id', 'serviceId', 'documentType', 'mimeType', 'fetchDate', 'content', 'isFirstRecord', 'isRefilter', 'snapshotId' ]); + } + }); + + it('returns records in ascending order', async () => { + expect(records.map(record => record.fetchDate)).to.deep.equal([ FETCH_DATE_EARLIER, FETCH_DATE, FETCH_DATE_LATER ]); + }); + }); + describe('#getLatestRecord', () => { context('when there are records for the given service', () => { let lastSnapshotId; From 78f99297cb1910d3b1ee7c6e4504d7f0c35e531d Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 17:02:38 +0200 Subject: [PATCH 18/74] Add count function to MongoAdapter --- src/storage-adapters/mongo/index.js | 4 +++ src/storage-adapters/mongo/index.test.js | 40 ++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 2eee756ab..bfafbefa1 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -91,6 +91,10 @@ export default class MongoAdapter { .map(record => this.getRecordFromMongoMetadata(record)); } + async count() { + return this.collection.find().count(); + } + async* iterate() { const cursor = this.collection.find().sort({ fetchDate: 1 }); diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index c022f9171..63b5b2795 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -424,6 +424,46 @@ describe('MongoAdapter', () => { }); }); + describe('#count', () => { + let count; + + before(async () => { + await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated 2`, + isRefilter: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + (count = await subject.count()); + }); + + after(async () => subject._removeAllRecords()); + + it('returns the proper count', async () => { + expect(count).to.equal(3); + }); + }); + describe('#getLatestRecord', () => { context('when there are records for the given service', () => { let lastSnapshotId; From 9b784aa22076bdcbb6ce1b2554671025bc2b0cd3 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 10 May 2022 17:02:49 +0200 Subject: [PATCH 19/74] Add count function to GitAdapter --- src/storage-adapters/git/index.js | 4 +++ src/storage-adapters/git/index.test.js | 40 ++++++++++++++++++++++++++ 2 files changed, 44 insertions(+) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 9eb3c7d11..3eedac55a 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -90,6 +90,10 @@ export default class GitAdapter { .map(this._getRecordFromCommitMetadata.bind(this))); } + async count() { + return Number((await this.git.raw([ 'rev-list', '--count', 'HEAD' ])).trim()); + } + async* iterate() { const commits = await this._getMeaningfulCommitsAscending(); diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index 6a44595e1..a1fb80509 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -480,6 +480,46 @@ describe('GitAdapter', () => { }); }); + describe('#count', () => { + let count; + + before(async () => { + await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: CONTENT, + fetchDate: FETCH_DATE, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated`, + fetchDate: FETCH_DATE_LATER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + await subject.record({ + serviceId: SERVICE_PROVIDER_ID, + documentType: DOCUMENT_TYPE, + content: `${CONTENT} - updated 2`, + isRefilter: true, + fetchDate: FETCH_DATE_EARLIER, + snapshotId: SNAPSHOT_ID, + mimeType: MIME_TYPE, + }); + + (count = await subject.count()); + }); + + after(async () => subject._removeAllRecords()); + + it('returns the proper count', async () => { + expect(count).to.equal(3); + }); + }); + describe('#getLatestRecord', () => { context('when there are records for the given service', () => { let lastSnapshotId; From a1ce9a0050295d8cd8dada2c3bffae7dab44aef5 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 11 May 2022 10:56:50 +0200 Subject: [PATCH 20/74] Add history scripts logger module --- scripts/history/logger/index.js | 39 +++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) create mode 100644 scripts/history/logger/index.js diff --git a/scripts/history/logger/index.js b/scripts/history/logger/index.js new file mode 100644 index 000000000..afe0491e1 --- /dev/null +++ b/scripts/history/logger/index.js @@ -0,0 +1,39 @@ +import winston from 'winston'; + +import logger from '../../../src/logger/index.js'; + +const { combine, timestamp, printf, colorize } = winston.format; + +export const format = combine( + colorize(), + timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), + printf(({ level, message, timestamp, serviceId, type, id, current, total }) => { + let prefix = ''.padEnd(8); + + if (current && total) { + prefix = `${Number(((current) / total) * 100).toFixed(2)}%`.padEnd(8); + } + + if (serviceId) { + prefix += `${serviceId}`.padEnd(30); + } + + if (type) { + if (type.length > 50) { + type = `${type.substring(0, 48)}…`; + } + + prefix += `${type}`.padEnd(50); + } + + if (id) { + prefix += `${id}`.padEnd(42); + } + + return `${timestamp} ${level.padEnd(15)} ${prefix}${message}`; + }), +); + +logger.format = format; + +export default logger; From 15b0252dd80e84498ae3a7bab991f28dcedaf998 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 11 May 2022 17:08:45 +0200 Subject: [PATCH 21/74] Improve naming --- scripts/dataset/export/index.test.js | 2 +- src/archivist/index.test.js | 6 ++-- src/archivist/recorder/index.js | 2 +- src/archivist/recorder/index.test.js | 46 ++++++++++++------------ src/storage-adapters/git/index.js | 21 ++++++----- src/storage-adapters/git/index.test.js | 46 ++++++++++++------------ src/storage-adapters/mongo/index.js | 30 ++++++++-------- src/storage-adapters/mongo/index.test.js | 38 ++++++++++---------- 8 files changed, 95 insertions(+), 96 deletions(-) diff --git a/scripts/dataset/export/index.test.js b/scripts/dataset/export/index.test.js index ad445ec1c..8dbefc5c6 100644 --- a/scripts/dataset/export/index.test.js +++ b/scripts/dataset/export/index.test.js @@ -103,7 +103,7 @@ describe('Export', () => { after(async () => { await fs.rm(TMP_PATH, { recursive: true }); - await storageAdapter._removeAllRecords(); + await storageAdapter._removeAll(); }); it('is an archive', () => { diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index ca6387f64..d6bbafd0a 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -31,7 +31,7 @@ let snapshotsStorageAdapter; let versionsStorageAdapter; async function resetGitRepositories() { - return Promise.all([ snapshotsStorageAdapter._removeAllRecords(), versionsStorageAdapter._removeAllRecords() ]); + return Promise.all([ snapshotsStorageAdapter._removeAll(), versionsStorageAdapter._removeAll() ]); } let gitVersion; @@ -168,8 +168,8 @@ describe('Archivist', function () { await app.initialize(); await app.trackChanges(serviceIds); - ({ id: originalSnapshotId } = await snapshotsStorageAdapter.getLatestRecord(SERVICE_A_ID, SERVICE_A_TYPE)); - ({ id: firstVersionId } = await versionsStorageAdapter.getLatestRecord(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: originalSnapshotId } = await snapshotsStorageAdapter.getLatest(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: firstVersionId } = await versionsStorageAdapter.getLatest(SERVICE_A_ID, SERVICE_A_TYPE)); serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH }); diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index ef9039d3a..6af2179d8 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -17,7 +17,7 @@ export default class Recorder { } async getLatestSnapshot(serviceId, documentType) { - const record = await this.snapshotsStorageAdapter.getLatestRecord(serviceId, documentType); + const record = await this.snapshotsStorageAdapter.getLatest(serviceId, documentType); return { ...record, diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index 089934971..784feef81 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -59,12 +59,12 @@ describe('Recorder', () => { }); after(async () => { - await snapshotsAdapter._removeAllRecords(); + await snapshotsAdapter._removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => snapshotsAdapter._removeAllRecords()); + after(async () => snapshotsAdapter._removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -111,10 +111,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await snapshotsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await snapshotsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter._removeAllRecords()); + after(async () => snapshotsAdapter._removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -149,10 +149,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await snapshotsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter._removeAllRecords()); + after(async () => snapshotsAdapter._removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -185,10 +185,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await snapshotsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter._removeAllRecords()); + after(async () => snapshotsAdapter._removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -217,7 +217,7 @@ describe('Recorder', () => { }); context('when a required param is missing', () => { - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -267,10 +267,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -307,10 +307,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -349,10 +349,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -377,12 +377,12 @@ describe('Recorder', () => { }); after(async () => { - await versionsAdapter._removeAllRecords(); + await versionsAdapter._removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -432,10 +432,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAllRecords()); after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); after(async () => versionsAdapter._removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -472,10 +472,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -514,10 +514,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatestRecord(SERVICE_ID, TYPE); + record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAllRecords()); + after(async () => versionsAdapter._removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 3eedac55a..8f474fb04 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -67,7 +67,7 @@ export default class GitAdapter { return this.git.pushChanges(); } - async getLatestRecord(serviceId, documentType) { + async getLatest(serviceId, documentType) { const filePathGlob = this._getPathFor(serviceId, documentType, '*'); const { commit, filePath } = await this.git.findUnique(filePathGlob); const recordFilePath = `${this.path}/${filePath}`; @@ -76,18 +76,17 @@ export default class GitAdapter { return {}; } - return this._getRecordFromCommitMetadata(commit); + return this._convertCommitToRecord(commit); } - async getRecord(recordId) { + async get(recordId) { const [commit] = await this.git.log([ '-1', recordId ]); return this._getRecordFromCommitMetadata(commit); } - async getRecords() { - return Promise.all((await this._getMeaningfulCommitsAscending()) - .map(this._getRecordFromCommitMetadata.bind(this))); + async getAll() { + return Promise.all((await this._getSortedRecordsRelatedCommits()).map(this._convertCommitToRecord.bind(this))); } async count() { @@ -95,14 +94,14 @@ export default class GitAdapter { } async* iterate() { - const commits = await this._getMeaningfulCommitsAscending(); + const commits = await this._getSortedRecordsRelatedCommits(); for (const commit of commits) { - yield this._getRecordFromCommitMetadata(commit); + yield this._convertCommitToRecord(commit); } } - async _getMeaningfulCommitsAscending() { + async _getSortedRecordsRelatedCommits() { return (await this.git.log([ '--reverse', '--no-merges' ])) .filter(({ message }) => message.match(COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order @@ -160,7 +159,7 @@ export default class GitAdapter { return message; } - async _getRecordFromCommitMetadata(commit) { + async _convertCommitToRecord(commit) { const { hash, date, message, body, diff } = commit; let relativeFilePath; @@ -208,7 +207,7 @@ export default class GitAdapter { }; } - async _removeAllRecords() { + async _removeAll() { const files = await fs.readdir(this.path, { withFileTypes: true }); const promises = files.map(file => { const filePath = path.join(this.path, file.name); diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index a1fb80509..b3282e644 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -61,7 +61,7 @@ describe('GitAdapter', () => { fileExtension: 'html', })); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('creates a file for the given service', () => { expect(fs.readFileSync(EXPECTED_FILE_PATH, { encoding: 'utf8' })).to.equal(CONTENT); @@ -72,7 +72,7 @@ describe('GitAdapter', () => { const NEW_SERVICE_ID = 'test_not_existing_service'; const NEW_SERVICE_EXPECTED_FILE_PATH = `${RECORDER_PATH}/${NEW_SERVICE_ID}/${DOCUMENT_TYPE}.html`; - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('creates a directory and file for the given service', async () => { await subject._save({ @@ -105,7 +105,7 @@ describe('GitAdapter', () => { ([commit] = await git.log()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the id of the commit', () => { expect(commit.hash).to.include(id); @@ -123,7 +123,7 @@ describe('GitAdapter', () => { }); describe('#_isTracked', () => { - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); context('when the file does not exists', () => { it('returns false', async () => { @@ -171,7 +171,7 @@ describe('GitAdapter', () => { ([commit] = await git.log()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -242,7 +242,7 @@ describe('GitAdapter', () => { ([commit] = await git.log()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -278,7 +278,7 @@ describe('GitAdapter', () => { numberOfRecordsAfter = (await git.log()).length; }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('does not save the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore); @@ -316,7 +316,7 @@ describe('GitAdapter', () => { ([commit] = await git.log()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -349,7 +349,7 @@ describe('GitAdapter', () => { ([commit] = await git.log()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -369,7 +369,7 @@ describe('GitAdapter', () => { }); }); - describe('#getRecord', () => { + describe('#get', () => { let record; let id; @@ -383,10 +383,10 @@ describe('GitAdapter', () => { mimeType: MIME_TYPE, })); - (record = await subject.getRecord(id)); + (record = await subject.get(id)); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the record id', () => { expect(record.id).to.include(id); @@ -421,7 +421,7 @@ describe('GitAdapter', () => { }); }); - describe('#getRecords', () => { + describe('#getAll', () => { let records; const expectedIds = []; @@ -460,10 +460,10 @@ describe('GitAdapter', () => { expectedIds.push(id3); - (records = await subject.getRecords()); + (records = await subject.getAll()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns all records', () => { expect(records.length).to.equal(3); @@ -513,14 +513,14 @@ describe('GitAdapter', () => { (count = await subject.count()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the proper count', async () => { expect(count).to.equal(3); }); }); - describe('#getLatestRecord', () => { + describe('#getLatest', () => { context('when there are records for the given service', () => { let lastSnapshotId; let latestRecord; @@ -543,10 +543,10 @@ describe('GitAdapter', () => { mimeType: MIME_TYPE, })); - latestRecord = await subject.getLatestRecord(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); @@ -570,10 +570,10 @@ describe('GitAdapter', () => { mimeType: PDF_MIME_TYPE, })); - latestRecord = await subject.getLatestRecord(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); @@ -593,7 +593,7 @@ describe('GitAdapter', () => { let latestRecord; before(async () => { - latestRecord = await subject.getLatestRecord(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); it('returns no id', () => { @@ -656,7 +656,7 @@ describe('GitAdapter', () => { } }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('iterates through all records', async () => { expect(ids).to.have.members(expectedIds); diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index bfafbefa1..02a3beafe 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -32,7 +32,7 @@ export default class MongoAdapter { content = await content; } - const previousRecord = await this.getLatestRecord(serviceId, documentType); + const previousRecord = await this.getLatest(serviceId, documentType); if (previousRecord && await previousRecord.content == content) { return {}; @@ -66,29 +66,29 @@ export default class MongoAdapter { }; } - async getLatestRecord(serviceId, documentType) { - const [record] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); + async getLatest(serviceId, documentType) { + const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find` - if (!record) { + if (!mongoDocument) { return {}; } - return this.getRecordFromMongoMetadata(record); + return this._convertDocumentToRecord(mongoDocument); } - async getRecord(recordId) { - const record = await this.collection.findOne({ _id: new ObjectId(recordId) }); + async get(recordId) { + const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) }); - if (!record) { + if (!mongoDocument) { return {}; } - return this.getRecordFromMongoMetadata(record); + return this._convertDocumentToRecord(mongoDocument); } - async getRecords() { + async getAll() { return (await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) - .map(record => this.getRecordFromMongoMetadata(record)); + .map(mongoDocument => this._convertDocumentToRecord(mongoDocument)); } async count() { @@ -100,18 +100,18 @@ export default class MongoAdapter { /* eslint-disable no-await-in-loop */ while (await cursor.hasNext()) { - const record = await cursor.next(); + const mongoDocument = await cursor.next(); - yield this.getRecordFromMongoMetadata(record); + yield this._convertDocumentToRecord(mongoDocument); } /* eslint-enable no-await-in-loop */ } - async _removeAllRecords() { + async _removeAll() { return this.collection.deleteMany(); } - getRecordFromMongoMetadata({ _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId }) { + _convertDocumentToRecord({ _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId }) { const { collection } = this; const result = { id: _id.toString(), diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index 63b5b2795..6049e8a11 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -73,7 +73,7 @@ describe('MongoAdapter', () => { })); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -156,7 +156,7 @@ describe('MongoAdapter', () => { }).limit(1).sort({ created_at: -1 }).toArray()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -200,7 +200,7 @@ describe('MongoAdapter', () => { }).count(); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('does not save the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore); @@ -247,7 +247,7 @@ describe('MongoAdapter', () => { }).limit(1).sort({ created_at: -1 }).toArray()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -291,7 +291,7 @@ describe('MongoAdapter', () => { })); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -313,7 +313,7 @@ describe('MongoAdapter', () => { }); }); - describe('#getRecord', () => { + describe('#get', () => { let record; let id; @@ -327,10 +327,10 @@ describe('MongoAdapter', () => { mimeType: MIME_TYPE, })); - (record = await subject.getRecord(id)); + (record = await subject.get(id)); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the record id', () => { expect(record.id).to.include(id); @@ -365,7 +365,7 @@ describe('MongoAdapter', () => { }); }); - describe('#getRecords', () => { + describe('#getAll', () => { let records; const expectedIds = []; @@ -404,10 +404,10 @@ describe('MongoAdapter', () => { expectedIds.push(id3); - (records = await subject.getRecords()); + (records = await subject.getAll()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns all records', () => { expect(records.length).to.equal(3); @@ -457,14 +457,14 @@ describe('MongoAdapter', () => { (count = await subject.count()); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the proper count', async () => { expect(count).to.equal(3); }); }); - describe('#getLatestRecord', () => { + describe('#getLatest', () => { context('when there are records for the given service', () => { let lastSnapshotId; let latestRecord; @@ -488,13 +488,13 @@ describe('MongoAdapter', () => { fetchDate: FETCH_DATE_LATER, })); - latestRecord = await subject.getLatestRecord( + latestRecord = await subject.getLatest( SERVICE_PROVIDER_ID, DOCUMENT_TYPE, ); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); @@ -527,10 +527,10 @@ describe('MongoAdapter', () => { fetchDate: FETCH_DATE_LATER, })); - latestRecord = await subject.getLatestRecord(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); @@ -552,7 +552,7 @@ describe('MongoAdapter', () => { let latestRecord; before(async () => { - latestRecord = await subject.getLatestRecord(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); it('returns no id', () => { @@ -615,7 +615,7 @@ describe('MongoAdapter', () => { } }); - after(async () => subject._removeAllRecords()); + after(async () => subject._removeAll()); it('iterates through all records', async () => { expect(ids).to.have.members(expectedIds); From 809840011d59f629179e7d28fce29992f3d99757 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 11 May 2022 17:53:37 +0200 Subject: [PATCH 22/74] Better manage access to non-existent records --- src/storage-adapters/git/index.js | 7 +++++-- src/storage-adapters/git/index.test.js | 18 ++++++++---------- src/storage-adapters/mongo/index.js | 16 +++++++--------- src/storage-adapters/mongo/index.test.js | 18 ++++++++---------- 4 files changed, 28 insertions(+), 31 deletions(-) diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 8f474fb04..0b157a7c8 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -82,7 +82,7 @@ export default class GitAdapter { async get(recordId) { const [commit] = await this.git.log([ '-1', recordId ]); - return this._getRecordFromCommitMetadata(commit); + return this._convertCommitToRecord(commit); } async getAll() { @@ -160,9 +160,12 @@ export default class GitAdapter { } async _convertCommitToRecord(commit) { - const { hash, date, message, body, diff } = commit; + if (!commit || !commit.hash) { + return {}; + } let relativeFilePath; + const { hash, date, message, body } = commit; if (diff) { ({ files: [{ file: relativeFilePath }] } = diff); diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index b3282e644..48fca949a 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -419,6 +419,12 @@ describe('GitAdapter', () => { it('stores the snapshot ID', () => { expect(record.snapshotId).to.equal(SNAPSHOT_ID); }); + + context('when requested record does not exists', () => { + it('returns an empty object', async () => { + expect(await subject.get('inexistantID')).to.deep.equal({}); + }); + }); }); describe('#getAll', () => { @@ -596,16 +602,8 @@ describe('GitAdapter', () => { latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - it('returns no id', () => { - expect(latestRecord.id).to.not.be.ok; - }); - - it('returns no content', () => { - expect(latestRecord.content).to.not.be.ok; - }); - - it('returns no mime type', () => { - expect(latestRecord.mimeType).to.not.be.ok; + it('returns an empty object', async () => { + expect(latestRecord).to.deep.equal({}); }); }); }); diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 02a3beafe..d856dcd45 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -69,20 +69,12 @@ export default class MongoAdapter { async getLatest(serviceId, documentType) { const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find` - if (!mongoDocument) { - return {}; - } - return this._convertDocumentToRecord(mongoDocument); } async get(recordId) { const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) }); - if (!mongoDocument) { - return {}; - } - return this._convertDocumentToRecord(mongoDocument); } @@ -111,7 +103,13 @@ export default class MongoAdapter { return this.collection.deleteMany(); } - _convertDocumentToRecord({ _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId }) { + _convertDocumentToRecord(document) { + if (!document || !document._id) { + return {}; + } + + const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; + const { collection } = this; const result = { id: _id.toString(), diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index 6049e8a11..c0bf75019 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -363,6 +363,12 @@ describe('MongoAdapter', () => { it('stores the snapshot ID', () => { expect(record.snapshotId).to.equal(SNAPSHOT_ID); }); + + context('when requested record does not exists', () => { + it('returns an empty object', async () => { + expect(await subject.get('inexistantID')).to.deep.equal({}); + }); + }); }); describe('#getAll', () => { @@ -555,16 +561,8 @@ describe('MongoAdapter', () => { latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - it('returns no id', () => { - expect(latestRecord.id).to.not.be.ok; - }); - - it('returns no content', () => { - expect(latestRecord.content).to.not.be.ok; - }); - - it('returns no mime type', () => { - expect(latestRecord.mimeType).to.not.be.ok; + it('returns an empty object', async () => { + expect(latestRecord).to.deep.equal({}); }); }); }); From fcea33eed19b50e9b6219f7dfebb45c9dd439dbf Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 11 May 2022 17:54:01 +0200 Subject: [PATCH 23/74] Simplify code --- src/storage-adapters/git/git.js | 19 ------------------- src/storage-adapters/git/index.js | 19 ++++++------------- 2 files changed, 6 insertions(+), 32 deletions(-) diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index 896b209d5..93fa32e48 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -93,25 +93,6 @@ export default class Git { return Boolean(result); } - async findUnique(glob) { - const [latestCommit] = await this.log([ '-n', '1', '--stat=4096', glob ]); - - if (!latestCommit) { - return {}; - } - - const filePaths = latestCommit.diff.files.map(file => file.file); - - if (filePaths.length > 1) { - throw new Error(`Only one document should have been recorded in ${latestCommit.hash}, but all these documents were recorded: ${filePaths}`); - } - - return { - commit: latestCommit, - filePath: filePaths[0], - }; - } - async checkout(options) { return this.git.checkout(options); } diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 0b157a7c8..b28029102 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -68,13 +68,7 @@ export default class GitAdapter { } async getLatest(serviceId, documentType) { - const filePathGlob = this._getPathFor(serviceId, documentType, '*'); - const { commit, filePath } = await this.git.findUnique(filePathGlob); - const recordFilePath = `${this.path}/${filePath}`; - - if (!commit || !filePath || !fsApi.existsSync(recordFilePath)) { - return {}; - } + const [commit] = await this.git.log([ '-1', `${serviceId}/${documentType}.*` ]); return this._convertCommitToRecord(commit); } @@ -164,17 +158,16 @@ export default class GitAdapter { return {}; } - let relativeFilePath; const { hash, date, message, body } = commit; - if (diff) { - ({ files: [{ file: relativeFilePath }] } = diff); - } + const modifiedFilesInCommit = (await this.git.show([ '--name-only', '--pretty=', hash ])).trim().split('\n'); - if (!relativeFilePath) { - relativeFilePath = (await this.git.show([ '--name-only', '--pretty=', hash ])).trim(); + if (modifiedFilesInCommit.length > 1) { + throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`); } + const [relativeFilePath] = modifiedFilesInCommit; + const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); const adapter = this; From 8ee7ff590f7d963b9ab4847472937542cc30d741 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 25 May 2022 16:39:31 +0200 Subject: [PATCH 24/74] Add history utils to import README --- scripts/history/utils/index.js | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) create mode 100644 scripts/history/utils/index.js diff --git a/scripts/history/utils/index.js b/scripts/history/utils/index.js new file mode 100644 index 000000000..383c8cce1 --- /dev/null +++ b/scripts/history/utils/index.js @@ -0,0 +1,19 @@ +import fsApi from 'fs'; + +const fs = fsApi.promises; + +export async function importReadme({ from: sourceAdapter, to: targetAdapter }) { + const sourceAdapterReadmePath = `${sourceAdapter.path}/README.md`; + const targetAdapterReadmePath = `${targetAdapter.path}/README.md`; + + const [readmeCommit] = await sourceAdapter.git.log(['README.md']); + + if (!readmeCommit) { + console.warn(`No commits found for README in ${sourceAdapter.path}`); + + return; + } + + await fs.copyFile(sourceAdapterReadmePath, targetAdapterReadmePath); + await targetAdapter._commit(targetAdapterReadmePath, readmeCommit.message, readmeCommit.date); +} From 28339199099f9bc1bad46aa03a60c1f59c947609 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 25 May 2022 16:41:45 +0200 Subject: [PATCH 25/74] Add script to update snapshots Ids references to full git hash --- scripts/history/update-to-full-hash.js | 59 ++++++++++++++++++++++++++ src/storage-adapters/git/git.js | 7 ++- 2 files changed, 64 insertions(+), 2 deletions(-) create mode 100644 scripts/history/update-to-full-hash.js diff --git a/scripts/history/update-to-full-hash.js b/scripts/history/update-to-full-hash.js new file mode 100644 index 000000000..37d6d4beb --- /dev/null +++ b/scripts/history/update-to-full-hash.js @@ -0,0 +1,59 @@ +import path from 'path'; +import { fileURLToPath } from 'url'; + +import config from 'config'; + +import GitAdapter from '../../src/storage-adapters/git/index.js'; + +import logger from './logger/index.js'; +import { importReadme } from './utils/index.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT_PATH = path.resolve(__dirname, '../../'); + +(async function main() { + console.time('Total time'); + + const versionsAdapter = new GitAdapter({ + ...config.get('recorder.versions.storage.git'), + path: path.resolve(ROOT_PATH, './data/france-elections-versions'), + }); + + const versionsTargetAdapter = new GitAdapter({ + ...config.get('recorder.versions.storage.git'), + prefixMessageToSnapshotId: 'This version was recorded after filtering snapshot https://github.com/OpenTermsArchive/france-elections-snapshots/commit/', + path: path.resolve(ROOT_PATH, './data/france-elections-versions-hash-updated-test'), + }); + + const snapshotsAdapter = new GitAdapter({ + ...config.get('recorder.snapshots.storage.git'), + path: path.resolve(ROOT_PATH, './data/france-elections-snapshots'), + }); + + await versionsAdapter.initialize(); + await versionsTargetAdapter.initialize(); + await snapshotsAdapter.initialize(); + + await importReadme({ from: versionsAdapter, to: versionsTargetAdapter }); + + const total = await versionsAdapter.count(); + let current = 1; + + for await (const record of versionsAdapter.iterate()) { + const fullSnapshotId = await snapshotsAdapter.git.getFullHash(record.snapshotId); + + const { id: recordId } = await versionsTargetAdapter.record({ ...record, snapshotId: fullSnapshotId }); + + if (!recordId) { + logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.documentType, id: record.id, current, total }); + } else { + logger.info({ message: `Update short sha ${record.snapshotId} to ${fullSnapshotId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current, total }); + } + + current++; + } + + await versionsAdapter.finalize(); + await versionsTargetAdapter.finalize(); + await snapshotsAdapter.finalize(); +}()); diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index 93fa32e48..4d95226b5 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -58,9 +58,8 @@ export default class Git { } const shortHash = summary.commit.replace('HEAD ', '').replace('(root-commit) ', ''); - const longHash = (await this.git.show([ shortHash, '--pretty=%H', '-s' ])).trim(); - return longHash; // Return a long commit hash to always handle ids in the same format and facilitate comparison + return this.getFullHash(shortHash); // Return a long commit hash to always handle ids in the same format and facilitate comparison } async pushChanges() { @@ -105,6 +104,10 @@ export default class Git { return this.git.show(options); } + async getFullHash(shortHash) { + return (await this.git.show([ shortHash, '--pretty=%H', '-s' ])).trim(); + } + relativePath(absolutePath) { // Git needs a path relative to the .git directory, not an absolute one return path.relative(this.path, absolutePath); From 01f3b762a7e3dca57ca6392635918d73df78dbaf Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 25 May 2022 16:42:32 +0200 Subject: [PATCH 26/74] Add script to migrate services --- scripts/history/migrate-services.js | 212 ++++++++++++++++++++++++++++ 1 file changed, 212 insertions(+) create mode 100644 scripts/history/migrate-services.js diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js new file mode 100644 index 000000000..2dd66b45f --- /dev/null +++ b/scripts/history/migrate-services.js @@ -0,0 +1,212 @@ +import fsApi from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; + +import config from 'config'; +import winston from 'winston'; + +import GitAdapter from '../../src/storage-adapters/git/index.js'; + +import { format } from './logger/index.js'; +import { importReadme } from './utils/index.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const ROOT_PATH = path.resolve(__dirname, '../../'); +const fs = fsApi.promises; + +const CONFIG = { + servicesToMigrate: [ 'ASICS', 'Amazon', 'Orange Money France' ], + from: { + snapshots: 'france-snapshots', + versions: 'france-versions-hash-updated', + prefixMessageToSnapshotId: 'This version was recorded after filtering snapshot https://github.com/OpenTermsArchive/france-snapshots/commit/', + }, + to: { + snapshots: 'france-elections-snapshots', + versions: 'france-elections-versions-hash-updated', + prefixMessageToSnapshotId: 'This version was recorded after filtering snapshot https://github.com/OpenTermsArchive/france-elections-snapshots/commit/', + }, +}; + +const COUNTERS = { + imported: 0, + skipped: 0, +}; + +(async function main() { + console.time('Total time'); + + const migration = { + services: CONFIG.servicesToMigrate, + from: { + snapshots: { + source: new GitAdapter({ + ...config.get('recorder.snapshots.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.snapshots}`), + }), + destination: new GitAdapter({ + ...config.get('recorder.snapshots.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.snapshots}-migrated`), + }), + logger: winston.createLogger({ transports: [ new (winston.transports.File)({ filename: `${__dirname}/logs/${CONFIG.from.snapshots}.log` }), new winston.transports.Console() ], format }), + }, + versions: { + source: new GitAdapter({ + ...config.get('recorder.versions.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.versions}`), + }), + destination: new GitAdapter({ + ...config.get('recorder.versions.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.versions}-migrated`), + prefixMessageToSnapshotId: CONFIG.from.prefixMessageToSnapshotId, + }), + logger: winston.createLogger({ transports: [ new (winston.transports.File)({ filename: `${__dirname}/logs/${CONFIG.from.versions}.log` }), new winston.transports.Console() ], format }), + }, + }, + to: { + snapshots: { + source: new GitAdapter({ + ...config.get('recorder.snapshots.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.snapshots}`), + }), + destination: new GitAdapter({ + ...config.get('recorder.snapshots.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.snapshots}-migrated`), + }), + logger: winston.createLogger({ transports: [ new (winston.transports.File)({ filename: `${__dirname}/logs/${CONFIG.to.snapshots}.log` }), new winston.transports.Console() ], format }), + }, + versions: { + source: new GitAdapter({ + ...config.get('recorder.versions.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.versions}`), + }), + destination: new GitAdapter({ + ...config.get('recorder.versions.storage.git'), + path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.versions}-migrated`), + prefixMessageToSnapshotId: CONFIG.to.prefixMessageToSnapshotId, + }), + logger: winston.createLogger({ transports: [ new (winston.transports.File)({ filename: `${__dirname}/logs/${CONFIG.to.versions}.log` }), new winston.transports.Console() ], format }), + }, + }, + }; + + await initialize(migration); + + const fromSnapshotsRecords = await (await migration.from.snapshots.source.getAll()); + const toSnapshotsRecords = await (await migration.to.snapshots.source.getAll()); + const snapshotsToMigrate = fromSnapshotsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); + const fromSnapshotsRecordsToRewrite = fromSnapshotsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); + const toSnapshotsRecordsMigrated = [ ...toSnapshotsRecords, ...snapshotsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); + + const fromVersionsRecords = await (await migration.from.versions.source.getAll()); + const toVersionsRecords = await (await migration.to.versions.source.getAll()); + const versionsToMigrate = fromVersionsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); + const fromVersionsRecordsToRewrite = fromVersionsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); + const toVersionsRecordsMigrated = [ ...toVersionsRecords, ...versionsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); + + console.log('Number of snapshots in the source', fromSnapshotsRecords.length); + console.log('Number of snapshots in the target', toSnapshotsRecords.length); + console.log('Number of snapshots to migrate', snapshotsToMigrate.length); + + console.log('Number of versions in the source', fromVersionsRecords.length); + console.log('Number of versions in the target', toVersionsRecords.length); + console.log('Number of versions to migrate', versionsToMigrate.length); + + const idsMapping = {}; + + await Promise.all([ + rewriteSnapshots(migration.from.snapshots.destination, fromSnapshotsRecordsToRewrite, idsMapping, migration.from.snapshots.logger), + rewriteSnapshots(migration.to.snapshots.destination, toSnapshotsRecordsMigrated, idsMapping, migration.to.snapshots.logger), + ]); + + await fs.writeFile(path.join(__dirname, 'ids-mapping.json'), JSON.stringify(idsMapping, null, 4)); + + console.log('Snapshots-migrated'); + + await Promise.all([ + rewriteVersions(migration.from.versions.destination, fromVersionsRecordsToRewrite, idsMapping, migration.from.versions.logger), + rewriteVersions(migration.to.versions.destination, toVersionsRecordsMigrated, idsMapping, migration.to.versions.logger), + ]); + + console.log(`Records treated: ${Object.values(COUNTERS).reduce((acc, value) => acc + value, 0)}`); + console.log(`⌙ Migrated records: ${COUNTERS.imported}`); + console.log(`⌙ Skipped records: ${COUNTERS.skipped}`); + console.timeEnd('Total time'); + + await finalize(migration); +}()); + +async function rewriteSnapshots(adapter, records, idsMapping, logger) { + let i = 1; + + for (const record of records) { + const { id: recordId } = await adapter.record(record); // eslint-disable-line no-await-in-loop + + idsMapping[record.id] = recordId; // Saves the mapping between the old ID and the new one. + + if (recordId) { + logger.info({ message: `Migrated snapshot with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); + COUNTERS.imported++; + } else { + logger.info({ message: 'Skipped snapshot', serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); + COUNTERS.skipped++; + } + } +} + +async function rewriteVersions(adapter, records, idsMapping, logger) { + let i = 1; + + for (const record of records) { + const newSnapshotId = idsMapping[record.snapshotId]; + + if (!newSnapshotId) { + throw new Error(`Snapshot ID ${record.snapshotId} not found for record ${record.id}`); + } + + record.snapshotId = newSnapshotId; + + const { id: recordId } = await adapter.record(record); // eslint-disable-line no-await-in-loop + + if (recordId) { + logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); + COUNTERS.imported++; + } else { + logger.info({ message: 'Skipped version', serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); + COUNTERS.skipped++; + } + } +} + +async function initialize(migration) { + await Promise.all([ + migration.from.snapshots.source.initialize(), + migration.from.snapshots.destination.initialize(), + migration.from.versions.source.initialize(), + migration.from.versions.destination.initialize(), + migration.to.snapshots.source.initialize(), + migration.to.snapshots.destination.initialize(), + migration.to.versions.source.initialize(), + migration.to.versions.destination.initialize(), + ]); + + return Promise.all([ + importReadme({ from: migration.from.snapshots.source, to: migration.from.snapshots.destination }), + importReadme({ from: migration.from.versions.source, to: migration.from.versions.destination }), + importReadme({ from: migration.to.snapshots.source, to: migration.to.snapshots.destination }), + importReadme({ from: migration.to.versions.source, to: migration.to.versions.destination }), + ]); +} + +async function finalize(migration) { + return Promise.all([ + migration.from.snapshots.source.finalize(), + migration.from.snapshots.destination.finalize(), + migration.from.versions.source.finalize(), + migration.from.versions.destination.finalize(), + migration.to.snapshots.source.finalize(), + migration.to.snapshots.destination.finalize(), + migration.to.versions.source.finalize(), + migration.to.versions.destination.finalize(), + ]); +} From 7f0b9be7b1dd602cd2f82789cd1eb4b92589288d Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 25 May 2022 17:26:45 +0200 Subject: [PATCH 27/74] Significantly improvement of perf when getting commits --- package-lock.json | 98 ++++++++++++++++--------------- package.json | 2 +- src/storage-adapters/git/git.js | 2 +- src/storage-adapters/git/index.js | 10 ++-- 4 files changed, 58 insertions(+), 54 deletions(-) diff --git a/package-lock.json b/package-lock.json index 411f5dbde..a2005ea37 100644 --- a/package-lock.json +++ b/package-lock.json @@ -11,6 +11,7 @@ "dependencies": { "@accordproject/markdown-cicero": "^0.15.2", "@accordproject/markdown-pdf": "^0.15.2", + "@opentermsarchive/simple-git": "^3.7.2", "abort-controller": "^3.0.0", "ajv": "^6.12.6", "archiver": "^5.3.0", @@ -46,7 +47,6 @@ "puppeteer-extra": "^3.2.3", "puppeteer-extra-plugin-stealth": "^2.9.0", "sib-api-v3-sdk": "^8.2.1", - "simple-git": "^2.47.0", "turndown": "^7.0.0", "winston": "^3.3.3", "winston-mail": "^2.0.0" @@ -790,6 +790,36 @@ "resolved": "https://registry.npmjs.org/@octokit/webhooks-types/-/webhooks-types-4.12.0.tgz", "integrity": "sha512-G0k7CoS9bK+OI7kPHgqi1KqK4WhrjDQSjy0wJI+0OTx/xvbHUIZDeqatY60ceeRINP/1ExEk6kTARboP0xavEw==" }, + "node_modules/@opentermsarchive/simple-git": { + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@opentermsarchive/simple-git/-/simple-git-3.7.2.tgz", + "integrity": "sha512-H47Io1nMzkehYMD2ZKyoHcG/cH1zoFzCtw77aAi9qWELa4RnziSdQsO7JYmvFNJKyUXwyyenGaEtaJe0D2LVqA==", + "dependencies": { + "@kwsites/file-exists": "^1.1.1", + "@kwsites/promise-deferred": "^1.1.1", + "debug": "^4.3.3" + }, + "funding": { + "type": "github", + "url": "https://github.com/sponsors/steveukx/" + } + }, + "node_modules/@opentermsarchive/simple-git/node_modules/debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "dependencies": { + "ms": "2.1.2" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } + } + }, "node_modules/@sinonjs/commons": { "version": "1.8.3", "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-1.8.3.tgz", @@ -6659,32 +6689,6 @@ "url": "https://github.com/sponsors/ljharb" } }, - "node_modules/simple-git": { - "version": "2.47.0", - "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.47.0.tgz", - "integrity": "sha512-+HfCpqPBEZTPWiW9fPdbiPJDslM22MLqrktfzNKyI2pWaJa6DhfNVx4Mds04KZzVv5vjC9/ksw3y5gVf8ECWDg==", - "dependencies": { - "@kwsites/file-exists": "^1.1.1", - "@kwsites/promise-deferred": "^1.1.1", - "debug": "^4.3.2" - } - }, - "node_modules/simple-git/node_modules/debug": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", - "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", - "dependencies": { - "ms": "2.1.2" - }, - "engines": { - "node": ">=6.0" - }, - "peerDependenciesMeta": { - "supports-color": { - "optional": true - } - } - }, "node_modules/simple-swizzle": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", @@ -8237,6 +8241,26 @@ "resolved": "https://registry.npmjs.org/@octokit/webhooks-types/-/webhooks-types-4.12.0.tgz", "integrity": "sha512-G0k7CoS9bK+OI7kPHgqi1KqK4WhrjDQSjy0wJI+0OTx/xvbHUIZDeqatY60ceeRINP/1ExEk6kTARboP0xavEw==" }, + "@opentermsarchive/simple-git": { + "version": "3.7.2", + "resolved": "https://registry.npmjs.org/@opentermsarchive/simple-git/-/simple-git-3.7.2.tgz", + "integrity": "sha512-H47Io1nMzkehYMD2ZKyoHcG/cH1zoFzCtw77aAi9qWELa4RnziSdQsO7JYmvFNJKyUXwyyenGaEtaJe0D2LVqA==", + "requires": { + "@kwsites/file-exists": "^1.1.1", + "@kwsites/promise-deferred": "^1.1.1", + "debug": "^4.3.3" + }, + "dependencies": { + "debug": { + "version": "4.3.4", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", + "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "requires": { + "ms": "2.1.2" + } + } + } + }, "@sinonjs/commons": { "version": "1.8.3", "resolved": "https://registry.npmjs.org/@sinonjs/commons/-/commons-1.8.3.tgz", @@ -12624,26 +12648,6 @@ } } }, - "simple-git": { - "version": "2.47.0", - "resolved": "https://registry.npmjs.org/simple-git/-/simple-git-2.47.0.tgz", - "integrity": "sha512-+HfCpqPBEZTPWiW9fPdbiPJDslM22MLqrktfzNKyI2pWaJa6DhfNVx4Mds04KZzVv5vjC9/ksw3y5gVf8ECWDg==", - "requires": { - "@kwsites/file-exists": "^1.1.1", - "@kwsites/promise-deferred": "^1.1.1", - "debug": "^4.3.2" - }, - "dependencies": { - "debug": { - "version": "4.3.2", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.2.tgz", - "integrity": "sha512-mOp8wKcvj7XxC78zLgw/ZA+6TSgkoE2C/ienthhRD298T7UNwAg9diBpLRxC0mOezLl4B0xV7M0cCO6P/O0Xhw==", - "requires": { - "ms": "2.1.2" - } - } - } - }, "simple-swizzle": { "version": "0.2.2", "resolved": "https://registry.npmjs.org/simple-swizzle/-/simple-swizzle-0.2.2.tgz", diff --git a/package.json b/package.json index b512517d8..d31d16ff5 100644 --- a/package.json +++ b/package.json @@ -37,6 +37,7 @@ "dependencies": { "@accordproject/markdown-cicero": "^0.15.2", "@accordproject/markdown-pdf": "^0.15.2", + "@opentermsarchive/simple-git": "^3.7.2", "abort-controller": "^3.0.0", "ajv": "^6.12.6", "archiver": "^5.3.0", @@ -72,7 +73,6 @@ "puppeteer-extra": "^3.2.3", "puppeteer-extra-plugin-stealth": "^2.9.0", "sib-api-v3-sdk": "^8.2.1", - "simple-git": "^2.47.0", "turndown": "^7.0.0", "winston": "^3.3.3", "winston-mail": "^2.0.0" diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index 4d95226b5..c837d57d3 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -1,7 +1,7 @@ import fsApi from 'fs'; import path from 'path'; -import simpleGit from 'simple-git'; +import simpleGit from '@opentermsarchive/simple-git'; process.env.LC_ALL = 'en_GB'; // Ensure git messages will be in English as some errors are handled by analysing the message content diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index b28029102..1e2a92201 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -68,13 +68,13 @@ export default class GitAdapter { } async getLatest(serviceId, documentType) { - const [commit] = await this.git.log([ '-1', `${serviceId}/${documentType}.*` ]); + const [commit] = await this.git.log([ '-1', '--name-only', `${serviceId}/${documentType}.*` ]); return this._convertCommitToRecord(commit); } async get(recordId) { - const [commit] = await this.git.log([ '-1', recordId ]); + const [commit] = await this.git.log([ '-1', '--name-only', recordId ]); return this._convertCommitToRecord(commit); } @@ -96,7 +96,7 @@ export default class GitAdapter { } async _getSortedRecordsRelatedCommits() { - return (await this.git.log([ '--reverse', '--no-merges' ])) + return (await this.git.log([ '--reverse', '--no-merges', '--name-only' ])) .filter(({ message }) => message.match(COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order } @@ -158,9 +158,9 @@ export default class GitAdapter { return {}; } - const { hash, date, message, body } = commit; + const { hash, date, message, body, diff } = commit; - const modifiedFilesInCommit = (await this.git.show([ '--name-only', '--pretty=', hash ])).trim().split('\n'); + const modifiedFilesInCommit = diff.files.map(({ file }) => file); if (modifiedFilesInCommit.length > 1) { throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`); From 55a0025ad7c2652e3967bd1b352cc8fc6cc6e07b Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 31 May 2022 11:57:15 +0200 Subject: [PATCH 28/74] Name parameters for commits functions --- scripts/history/utils/index.js | 6 +++++- scripts/rewrite/initializer/index.js | 5 ++++- src/storage-adapters/git/git.js | 2 +- src/storage-adapters/git/index.js | 6 +++--- src/storage-adapters/git/index.test.js | 5 ++++- 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/scripts/history/utils/index.js b/scripts/history/utils/index.js index 383c8cce1..9b4f742f0 100644 --- a/scripts/history/utils/index.js +++ b/scripts/history/utils/index.js @@ -15,5 +15,9 @@ export async function importReadme({ from: sourceAdapter, to: targetAdapter }) { } await fs.copyFile(sourceAdapterReadmePath, targetAdapterReadmePath); - await targetAdapter._commit(targetAdapterReadmePath, readmeCommit.message, readmeCommit.date); + await targetAdapter._commit({ + filePath: targetAdapterReadmePath, + message: readmeCommit.message, + date: readmeCommit.date, + }); } diff --git a/scripts/rewrite/initializer/index.js b/scripts/rewrite/initializer/index.js index 73beb07cb..c1d803718 100644 --- a/scripts/rewrite/initializer/index.js +++ b/scripts/rewrite/initializer/index.js @@ -21,7 +21,10 @@ export async function initReadmeAndLicense(targetRepo, targetPath, authorDate) { await fs.copyFile(LICENSE_PATH, targetLicenseFilePath); await targetRepo.add(targetReadmeFilePath); await targetRepo.add(targetLicenseFilePath); - await targetRepo.commit(null, 'Add Readme and License', authorDate); + await targetRepo.commit({ + message: 'Add Readme and License', + date: authorDate, + }); } export async function initTargetRepo(targetRepoPath) { diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index c837d57d3..64e2696d4 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -32,7 +32,7 @@ export default class Git { return this.git.add(this.relativePath(filepath)); } - async commit(filepath, message, date) { + async commit({ filepath, message, date }) { if (date) { const commitDate = new Date(date).toISOString(); diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index 1e2a92201..abf6a15c9 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -47,7 +47,7 @@ export default class GitAdapter { const message = this._generateCommitMessage({ serviceId, documentType, isRefilter, snapshotId, isFirstRecord }); const fileExtension = mime.getExtension(mimeType); const filePath = await this._save({ serviceId, documentType, content, fileExtension }); - const sha = await this._commit(filePath, message, fetchDate); + const sha = await this._commit({ filePath, message, date: fetchDate }); if (!sha) { return {}; @@ -115,11 +115,11 @@ export default class GitAdapter { return filePath; } - async _commit(filePath, message, date) { + async _commit({ filePath, message, date }) { try { await this.git.add(filePath); - return await this.git.commit(filePath, message, date); + return await this.git.commit({ filePath, message, date }); } catch (error) { throw new Error(`Could not commit ${filePath} with message "${message}" due to error: "${error}"`); } diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index 48fca949a..fe388b543 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -100,7 +100,10 @@ describe('GitAdapter', () => { fileExtension: 'html', }); - id = await subject._commit(EXPECTED_FILE_PATH, COMMIT_MESSAGE); + id = await subject._commit({ + filePath: EXPECTED_FILE_PATH, + message: COMMIT_MESSAGE, + }); ([commit] = await git.log()); }); From 30ace5abd9a76a0ed467452e760e9610762ea885 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 31 May 2022 11:57:26 +0200 Subject: [PATCH 29/74] Remove obsolete code --- src/storage-adapters/git/git.js | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index 64e2696d4..eb9ee62bb 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -43,11 +43,7 @@ export default class Git { let summary; try { - if (filepath) { - summary = await this.git.commit(message, filepath); - } else { - summary = await this.git.commit(message); - } + summary = await this.git.commit(message, filepath); } finally { process.env.GIT_AUTHOR_DATE = ''; process.env.GIT_COMMITTER_DATE = ''; From 8d6d91e0403a32f7caaf18b417b0c3d78fe04d58 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 31 May 2022 12:04:35 +0200 Subject: [PATCH 30/74] Simplify code --- src/storage-adapters/git/git.js | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index eb9ee62bb..889de513b 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -32,17 +32,15 @@ export default class Git { return this.git.add(this.relativePath(filepath)); } - async commit({ filepath, message, date }) { - if (date) { + async commit({ filepath, message, date = new Date() }) { + let summary; + + try { const commitDate = new Date(date).toISOString(); process.env.GIT_AUTHOR_DATE = commitDate; process.env.GIT_COMMITTER_DATE = commitDate; - } - let summary; - - try { summary = await this.git.commit(message, filepath); } finally { process.env.GIT_AUTHOR_DATE = ''; From b163bdcb86c2e05cdcb1868a945f23a753e7dec5 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 31 May 2022 14:08:03 +0200 Subject: [PATCH 31/74] Minor code style improvement --- src/storage-adapters/mongo/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index d856dcd45..377cbc744 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -80,7 +80,7 @@ export default class MongoAdapter { async getAll() { return (await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) - .map(mongoDocument => this._convertDocumentToRecord(mongoDocument)); + .map(this._convertDocumentToRecord.bind(this)); } async count() { From 785ed0f9cc9542cf9b050a04428f626c36a3427d Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 6 Jun 2022 17:15:38 +0200 Subject: [PATCH 32/74] Refactor storage adapters --- .eslintrc.yaml | 2 +- scripts/dataset/export/index.test.js | 19 +- src/archivist/index.js | 6 +- src/archivist/index.test.js | 6 +- src/archivist/recorder/index.js | 13 +- src/archivist/recorder/index.test.js | 46 ++--- src/storage-adapters/git/dataMapper.js | 81 ++++++++ src/storage-adapters/git/git.js | 12 ++ src/storage-adapters/git/index.js | 210 ++++++++------------ src/storage-adapters/git/index.test.js | 237 +++++++---------------- src/storage-adapters/mongo/dataMapper.js | 64 ++++++ src/storage-adapters/mongo/index.js | 93 +++------ src/storage-adapters/mongo/index.test.js | 144 +++++++------- src/storage-adapters/record.js | 15 ++ 14 files changed, 471 insertions(+), 477 deletions(-) create mode 100644 src/storage-adapters/git/dataMapper.js create mode 100644 src/storage-adapters/mongo/dataMapper.js create mode 100644 src/storage-adapters/record.js diff --git a/.eslintrc.yaml b/.eslintrc.yaml index ac6062d82..0e6c3821f 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -1,7 +1,7 @@ extends: - airbnb-base parserOptions: - ecmaVersion: 2020 + ecmaVersion: 2022 env: node: true mocha: true diff --git a/scripts/dataset/export/index.test.js b/scripts/dataset/export/index.test.js index 8dbefc5c6..51fd4b45f 100644 --- a/scripts/dataset/export/index.test.js +++ b/scripts/dataset/export/index.test.js @@ -9,6 +9,7 @@ import mime from 'mime'; import StreamZip from 'node-stream-zip'; import GitAdapter from '../../../src/storage-adapters/git/index.js'; +import Record from '../../../src/storage-adapters/record.js'; import generateArchive from './index.js'; @@ -55,41 +56,41 @@ describe('Export', () => { await storageAdapter.initialize(); - await storageAdapter.record({ + await storageAdapter.save(new Record({ serviceId: FIRST_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: FIRST_CONTENT, mimeType: MIME_TYPE, fetchDate: FIRST_FETCH_DATE, snapshotId: SNAPSHOT_ID, - }); + })); - await storageAdapter.record({ + await storageAdapter.save(new Record({ serviceId: FIRST_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: SECOND_CONTENT, mimeType: MIME_TYPE, fetchDate: SECOND_FETCH_DATE, snapshotId: SNAPSHOT_ID, - }); + })); - await storageAdapter.record({ + await storageAdapter.save(new Record({ serviceId: SECOND_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: FIRST_CONTENT, mimeType: MIME_TYPE, fetchDate: THIRD_FETCH_DATE, snapshotId: SNAPSHOT_ID, - }); + })); - await storageAdapter.record({ + await storageAdapter.save(new Record({ serviceId: SECOND_SERVICE_PROVIDER_ID, documentType: SECOND_DOCUMENT_TYPE, content: FIRST_CONTENT, mimeType: MIME_TYPE, fetchDate: FOURTH_FETCH_DATE, snapshotId: SNAPSHOT_ID, - }); + })); await generateArchive({ archivePath: ARCHIVE_PATH, @@ -103,7 +104,7 @@ describe('Export', () => { after(async () => { await fs.rm(TMP_PATH, { recursive: true }); - await storageAdapter._removeAll(); + await storageAdapter.removeAll(); }); it('is an archive', () => { diff --git a/src/archivist/index.js b/src/archivist/index.js index 498768221..aca1bf9db 100644 --- a/src/archivist/index.js +++ b/src/archivist/index.js @@ -105,7 +105,7 @@ export default class Archivist extends events.EventEmitter { await Promise.all([ launchHeadlessBrowser(), this.recorder.initialize() ]); - this._forEachDocumentOf(servicesIds, documentDeclaration => this.trackDocumentChangesQueue.push(documentDeclaration)); + this.#forEachDocumentOf(servicesIds, documentDeclaration => this.trackDocumentChangesQueue.push(documentDeclaration)); await this.trackDocumentChangesQueue.drain(); @@ -164,7 +164,7 @@ export default class Archivist extends events.EventEmitter { await this.recorder.initialize(); - this._forEachDocumentOf(servicesIds, documentDeclaration => this.refilterDocumentsQueue.push(documentDeclaration)); + this.#forEachDocumentOf(servicesIds, documentDeclaration => this.refilterDocumentsQueue.push(documentDeclaration)); await this.refilterDocumentsQueue.drain(); await this.recorder.finalize(); @@ -194,7 +194,7 @@ export default class Archivist extends events.EventEmitter { }); } - async _forEachDocumentOf(servicesIds = [], callback) { // eslint-disable-line default-param-last + async #forEachDocumentOf(servicesIds = [], callback) { // eslint-disable-line default-param-last servicesIds.forEach(serviceId => { this.services[serviceId].getDocumentTypes().forEach(documentType => { callback(this.services[serviceId].getDocumentDeclaration(documentType)); diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index d6bbafd0a..312c407a5 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -31,7 +31,7 @@ let snapshotsStorageAdapter; let versionsStorageAdapter; async function resetGitRepositories() { - return Promise.all([ snapshotsStorageAdapter._removeAll(), versionsStorageAdapter._removeAll() ]); + return Promise.all([ snapshotsStorageAdapter.removeAll(), versionsStorageAdapter.removeAll() ]); } let gitVersion; @@ -168,8 +168,8 @@ describe('Archivist', function () { await app.initialize(); await app.trackChanges(serviceIds); - ({ id: originalSnapshotId } = await snapshotsStorageAdapter.getLatest(SERVICE_A_ID, SERVICE_A_TYPE)); - ({ id: firstVersionId } = await versionsStorageAdapter.getLatest(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: originalSnapshotId } = await snapshotsStorageAdapter.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: firstVersionId } = await versionsStorageAdapter.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH }); diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index 6af2179d8..349b301a9 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -1,3 +1,5 @@ +import Record from '../../storage-adapters/record.js'; + export default class Recorder { constructor({ versionsStorageAdapter, snapshotsStorageAdapter }) { if (!versionsStorageAdapter || !snapshotsStorageAdapter) { @@ -17,12 +19,7 @@ export default class Recorder { } async getLatestSnapshot(serviceId, documentType) { - const record = await this.snapshotsStorageAdapter.getLatest(serviceId, documentType); - - return { - ...record, - content: await record.content, - }; + return this.snapshotsStorageAdapter.findLatestByServiceIdAndDocumentType(serviceId, documentType); } async recordSnapshot({ serviceId, documentType, fetchDate, mimeType, content }) { @@ -46,7 +43,7 @@ export default class Recorder { throw new Error('A document mime type is required to ensure data consistency'); } - return this.snapshotsStorageAdapter.record({ serviceId, documentType, fetchDate, mimeType, content }); + return this.snapshotsStorageAdapter.save(new Record({ serviceId, documentType, fetchDate, mimeType, content })); } async recordVersion({ serviceId, documentType, snapshotId, fetchDate, mimeType, content, isRefilter }) { @@ -74,7 +71,7 @@ export default class Recorder { throw new Error('A document mime type is required to ensure data consistency'); } - return this.versionsStorageAdapter.record({ serviceId, documentType, snapshotId, fetchDate, mimeType, content, isRefilter }); + return this.versionsStorageAdapter.save(new Record({ serviceId, documentType, snapshotId, fetchDate, mimeType, content, isRefilter })); } async recordRefilter(params) { diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index 784feef81..a4af2db4b 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -59,12 +59,12 @@ describe('Recorder', () => { }); after(async () => { - await snapshotsAdapter._removeAll(); + await snapshotsAdapter.removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => snapshotsAdapter._removeAll()); + after(async () => snapshotsAdapter.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -111,10 +111,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await snapshotsAdapter.getLatest(SERVICE_ID, TYPE); + record = await snapshotsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter._removeAll()); + after(async () => snapshotsAdapter.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -149,10 +149,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsAdapter.getLatest(SERVICE_ID, TYPE); + record = await snapshotsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter._removeAll()); + after(async () => snapshotsAdapter.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -185,10 +185,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsAdapter.getLatest(SERVICE_ID, TYPE); + record = await snapshotsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter._removeAll()); + after(async () => snapshotsAdapter.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -217,7 +217,7 @@ describe('Recorder', () => { }); context('when a required param is missing', () => { - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -267,10 +267,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); + record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -307,10 +307,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); + record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -349,10 +349,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); + record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -377,12 +377,12 @@ describe('Recorder', () => { }); after(async () => { - await versionsAdapter._removeAll(); + await versionsAdapter.removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -432,10 +432,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); + record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAll()); after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); after(async () => versionsAdapter.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -472,10 +472,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); + record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -514,10 +514,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.getLatest(SERVICE_ID, TYPE); + record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter._removeAll()); + after(async () => versionsAdapter.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; diff --git a/src/storage-adapters/git/dataMapper.js b/src/storage-adapters/git/dataMapper.js new file mode 100644 index 000000000..c18457aac --- /dev/null +++ b/src/storage-adapters/git/dataMapper.js @@ -0,0 +1,81 @@ +import path from 'path'; + +import mime from 'mime'; + +import Record from '../record.js'; + +export const COMMIT_MESSAGE_PREFIX = { + startTracking: 'Start tracking', + refilter: 'Refilter', + update: 'Update', +}; + +export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); + +export default class GitDataMapper { + constructor({ adapter, prefixMessageToSnapshotId }) { + this.adapter = adapter; + this.prefixMessageToSnapshotId = prefixMessageToSnapshotId; + } + + async toPersistence(record) { + if (!record.content) { + await this.adapter.loadRecordContent(record); + } + + const { serviceId, documentType, isRefilter, snapshotId, mimeType, isFirstRecord } = record; + + let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update; + + prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIX.startTracking : prefix; + + let message = `${prefix} ${serviceId} ${documentType}`; + + if (snapshotId) { + message = `${message}\n\n${this.prefixMessageToSnapshotId}${snapshotId}`; + } + + return { + message, + content: record.content, + fileExtension: mime.getExtension(mimeType), + }; + } + + async toDomain(commit, { lazyLoadContent } = {}) { + if (!commit) { + return {}; + } + + const { hash, date, message, body, diff } = commit; + + const modifiedFilesInCommit = diff.files.map(({ file }) => file); + + if (modifiedFilesInCommit.length > 1) { + throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`); + } + + const [relativeFilePath] = modifiedFilesInCommit; + + const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); + const record = new Record({ + id: hash, + serviceId: path.dirname(relativeFilePath), + documentType: path.basename(relativeFilePath, path.extname(relativeFilePath)), + mimeType: mime.getType(relativeFilePath), + fetchDate: new Date(date), + isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), + isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), + snapshotId: snapshotIdMatch && snapshotIdMatch[0], + adapter: this.adapter, + }); + + if (lazyLoadContent) { + return record; + } + + await this.adapter.loadRecordContent(record); + + return record; + } +} diff --git a/src/storage-adapters/git/git.js b/src/storage-adapters/git/git.js index 889de513b..452569e4e 100644 --- a/src/storage-adapters/git/git.js +++ b/src/storage-adapters/git/git.js @@ -60,6 +60,14 @@ export default class Git { return this.git.push(); } + async listCommits(options = []) { + return this.log([ '--reverse', '--no-merges', '--name-only', ...options ]); + } + + async getCommit(options) { + return this.listCommits([ '-1', ...options ]); + } + async log(options = {}) { try { options.file = options.file && this.relativePath(options.file); @@ -102,6 +110,10 @@ export default class Git { return (await this.git.show([ shortHash, '--pretty=%H', '-s' ])).trim(); } + async restore(path, commit) { + return this.git.raw([ 'restore', '-s', commit, '--', path ]); + } + relativePath(absolutePath) { // Git needs a path relative to the .git directory, not an absolute one return path.relative(this.path, absolutePath); diff --git a/src/storage-adapters/git/index.js b/src/storage-adapters/git/index.js index abf6a15c9..27aeead37 100644 --- a/src/storage-adapters/git/index.js +++ b/src/storage-adapters/git/index.js @@ -8,55 +8,46 @@ import path from 'path'; import mime from 'mime'; +import DataMapper, { COMMIT_MESSAGE_PREFIXES_REGEXP, COMMIT_MESSAGE_PREFIX } from './dataMapper.js'; import Git from './git.js'; const fs = fsApi.promises; +const PDF_MIME_TYPE = 'application/pdf'; mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown` -const COMMIT_MESSAGE_PREFIX = { - startTracking: 'Start tracking', - refilter: 'Refilter', - update: 'Update', -}; -const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); - -const PDF_MIME_TYPE = 'application/pdf'; - export default class GitAdapter { constructor({ path, author, publish, prefixMessageToSnapshotId }) { this.path = path; - this.author = author; this.needsPublication = publish; - this.prefixMessageToSnapshotId = prefixMessageToSnapshotId; + this.git = new Git({ path: this.path, author }); + this.dataMapper = new DataMapper({ adapter: this, prefixMessageToSnapshotId }); } async initialize() { - this.git = new Git({ path: this.path, author: this.author }); - await this.git.initialize(); return this; } - async record({ serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId }) { - if (content instanceof Promise) { - content = await content; + async save(record) { + const { serviceId, documentType, fetchDate } = record; + + if (record.isFirstRecord === undefined || record.isFirstRecord === null) { + record.isFirstRecord = await this.#isFirstRecord(serviceId, documentType); } - const isFirstRecord = await this._isFirstRecord(serviceId, documentType); - const message = this._generateCommitMessage({ serviceId, documentType, isRefilter, snapshotId, isFirstRecord }); - const fileExtension = mime.getExtension(mimeType); - const filePath = await this._save({ serviceId, documentType, content, fileExtension }); - const sha = await this._commit({ filePath, message, date: fetchDate }); + const { message, content, fileExtension } = await this.dataMapper.toPersistence(record); + + const filePath = await this.#writeFile({ serviceId, documentType, content, fileExtension }); + const sha = await this.#commit({ filePath, message, date: fetchDate }); if (!sha) { return {}; } - return { - id: sha, - isFirstRecord, - }; + record.id = sha; + + return record; } finalize() { @@ -67,55 +58,99 @@ export default class GitAdapter { return this.git.pushChanges(); } - async getLatest(serviceId, documentType) { - const [commit] = await this.git.log([ '-1', '--name-only', `${serviceId}/${documentType}.*` ]); + async findLatestByServiceIdAndDocumentType(serviceId, documentType, { lazyLoadContent } = {}) { + const [commit] = await this.git.getCommit([`${serviceId}/${documentType}.*`]); - return this._convertCommitToRecord(commit); + return this.dataMapper.toDomain(commit, { lazyLoadContent }); } - async get(recordId) { - const [commit] = await this.git.log([ '-1', '--name-only', recordId ]); + async findById(recordId, { lazyLoadContent } = {}) { + const [commit] = await this.git.getCommit([recordId]); - return this._convertCommitToRecord(commit); + return this.dataMapper.toDomain(commit, { lazyLoadContent }); } - async getAll() { - return Promise.all((await this._getSortedRecordsRelatedCommits()).map(this._convertCommitToRecord.bind(this))); + async findAll({ lazyLoadContent } = {}) { + return Promise.all((await this.#getSortedRecordsRelatedCommits()).map(commit => this.dataMapper.toDomain(commit, { lazyLoadContent }))); } async count() { - return Number((await this.git.raw([ 'rev-list', '--count', 'HEAD' ])).trim()); + return (await this.git.log([ + `--grep=${COMMIT_MESSAGE_PREFIX.startTracking}`, + `--grep=${COMMIT_MESSAGE_PREFIX.refilter}`, + `--grep=${COMMIT_MESSAGE_PREFIX.update}`, + ])).length; } - async* iterate() { - const commits = await this._getSortedRecordsRelatedCommits(); + async* iterate({ lazyLoadContent } = {}) { + const commits = await this.#getSortedRecordsRelatedCommits(); for (const commit of commits) { - yield this._convertCommitToRecord(commit); + yield this.dataMapper.toDomain(commit, { lazyLoadContent }); } } - async _getSortedRecordsRelatedCommits() { - return (await this.git.log([ '--reverse', '--no-merges', '--name-only' ])) + async removeAll() { + const files = await fs.readdir(this.path, { withFileTypes: true }); + const promises = files.map(file => { + const filePath = path.join(this.path, file.name); + + if (file.isDirectory()) { + return fs.rm(filePath, { recursive: true }); + } + + return fs.unlink(filePath); + }); + + await Promise.all(promises); + + return this.initialize(); + } + + async loadRecordContent(record) { + const relativeFilePath = `${record.serviceId}/${record.documentType}.${mime.getExtension(record.mimeType)}`; + + if (record.mimeType != PDF_MIME_TYPE) { + record.content = await this.git.show(`${record.id}:${relativeFilePath}`); + + return; + } + + // In case of PDF, `git show` cannot be used as it converts PDF binary into string which not retain the original binary representation + // It is impossible to restore the original binary data from the resulting string + let pdfBuffer; + + try { + await this.git.restore(relativeFilePath, record.id); // So, temporarily restore the PDF file to a specific commit + pdfBuffer = await fs.readFile(`${this.path}/${relativeFilePath}`); // …read the content + } finally { + await this.git.restore(relativeFilePath, 'HEAD'); // …and finally restore the file to its last state. + } + + record.content = pdfBuffer; + } + + async #getSortedRecordsRelatedCommits() { + return (await this.git.listCommits()) .filter(({ message }) => message.match(COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order } - async _save({ serviceId, documentType, content, fileExtension }) { + async #writeFile({ serviceId, documentType, content, fileExtension }) { const directory = `${this.path}/${serviceId}`; if (!fsApi.existsSync(directory)) { await fs.mkdir(directory, { recursive: true }); } - const filePath = this._getPathFor(serviceId, documentType, fileExtension); + const filePath = this.#getPathFor(serviceId, documentType, fileExtension); await fs.writeFile(filePath, content); return filePath; } - async _commit({ filePath, message, date }) { + async #commit({ filePath, message, date }) { try { await this.git.add(filePath); @@ -125,98 +160,17 @@ export default class GitAdapter { } } - _getPathFor(serviceId, documentType, fileExtension) { + #getPathFor(serviceId, documentType, fileExtension) { return `${this.path}/${serviceId}/${documentType}.${fileExtension}`; } - _isTracked(serviceId, documentType) { - const filePath = this._getPathFor(serviceId, documentType, '*'); + #isTracked(serviceId, documentType) { + const filePath = this.#getPathFor(serviceId, documentType, '*'); return this.git.isTracked(filePath); } - async _isFirstRecord(serviceId, documentType) { - return !await this._isTracked(serviceId, documentType); - } - - _generateCommitMessage({ serviceId, documentType, isRefilter, snapshotId, isFirstRecord }) { - let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update; - - prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIX.startTracking : prefix; - - let message = `${prefix} ${serviceId} ${documentType}`; - - if (snapshotId) { - message = `${message}\n\n${this.prefixMessageToSnapshotId}${snapshotId}`; - } - - return message; - } - - async _convertCommitToRecord(commit) { - if (!commit || !commit.hash) { - return {}; - } - - const { hash, date, message, body, diff } = commit; - - const modifiedFilesInCommit = diff.files.map(({ file }) => file); - - if (modifiedFilesInCommit.length > 1) { - throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`); - } - - const [relativeFilePath] = modifiedFilesInCommit; - - const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); - const adapter = this; - - return { - id: hash, - serviceId: path.dirname(relativeFilePath), - documentType: path.basename(relativeFilePath, path.extname(relativeFilePath)), - mimeType: mime.getType(relativeFilePath), - fetchDate: new Date(date), - isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), - isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), - snapshotId: snapshotIdMatch && snapshotIdMatch[0], - get content() { // In this scope, `this` is the `result` object, not the adapter - return (async () => { - if (this.mimeType != PDF_MIME_TYPE) { - return adapter.git.show(`${hash}:${relativeFilePath}`); - } - - // In case of PDF, `git show` cannot be used as it converts PDF binary into string which not retain the original binary representation - // It is impossible to restore the original binary data from the resulting string - let pdfBuffer; - - try { - await adapter.git.raw([ 'restore', '-s', hash, '--', relativeFilePath ]); // So, temporarily restore the PDF file to a specific commit - pdfBuffer = await fs.readFile(`${adapter.path}/${relativeFilePath}`); // …read the content - } finally { - await adapter.git.raw([ 'restore', '-s', 'HEAD', '--', relativeFilePath ]); // …and finally restore the file to its last state. - } - - return pdfBuffer; - })(); - }, - }; - } - - async _removeAll() { - const files = await fs.readdir(this.path, { withFileTypes: true }); - const promises = files.map(file => { - const filePath = path.join(this.path, file.name); - - if (file.isDirectory()) { - return fs.rm(filePath, { recursive: true }); - } - - return fs.unlink(filePath); - }); - - await Promise.all(promises); - - return this.initialize(); + async #isFirstRecord(serviceId, documentType) { + return !await this.#isTracked(serviceId, documentType); } } diff --git a/src/storage-adapters/git/index.test.js b/src/storage-adapters/git/index.test.js index fe388b543..a5979a171 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/storage-adapters/git/index.test.js @@ -6,6 +6,8 @@ import chai from 'chai'; import config from 'config'; import mime from 'mime'; +import Record from '../record.js'; + import Git from './git.js'; import GitAdapter from './index.js'; @@ -52,104 +54,7 @@ describe('GitAdapter', () => { return subject.initialize(); }); - describe('#_save', () => { - context('when service directory already exists', () => { - before(async () => subject._save({ - serviceId: SERVICE_PROVIDER_ID, - documentType: DOCUMENT_TYPE, - content: CONTENT, - fileExtension: 'html', - })); - - after(async () => subject._removeAll()); - - it('creates a file for the given service', () => { - expect(fs.readFileSync(EXPECTED_FILE_PATH, { encoding: 'utf8' })).to.equal(CONTENT); - }); - }); - - context('when service directory does not already exist', () => { - const NEW_SERVICE_ID = 'test_not_existing_service'; - const NEW_SERVICE_EXPECTED_FILE_PATH = `${RECORDER_PATH}/${NEW_SERVICE_ID}/${DOCUMENT_TYPE}.html`; - - after(async () => subject._removeAll()); - - it('creates a directory and file for the given service', async () => { - await subject._save({ - serviceId: NEW_SERVICE_ID, - documentType: DOCUMENT_TYPE, - content: CONTENT, - fileExtension: 'html', - }); - - expect(fs.readFileSync(NEW_SERVICE_EXPECTED_FILE_PATH, { encoding: 'utf8' })).to.equal(CONTENT); - }); - }); - }); - - describe('#_commit', () => { - const COMMIT_MESSAGE = 'Message to check if the commit message is properly saved'; - let id; - let commit; - - before(async () => { - await subject._save({ - serviceId: SERVICE_PROVIDER_ID, - documentType: DOCUMENT_TYPE, - content: CONTENT, - fileExtension: 'html', - }); - - id = await subject._commit({ - filePath: EXPECTED_FILE_PATH, - message: COMMIT_MESSAGE, - }); - - ([commit] = await git.log()); - }); - - after(async () => subject._removeAll()); - - it('returns the id of the commit', () => { - expect(commit.hash).to.include(id); - }); - - it('properly saves the commit message', () => { - expect(commit.message).to.equal(COMMIT_MESSAGE); - }); - }); - - describe('#_getPathFor', () => { - it('returns the file path with given extension for the given service provider’s document type', () => { - expect(subject._getPathFor(SERVICE_PROVIDER_ID, DOCUMENT_TYPE, 'pdf')).to.equal(EXPECTED_PDF_FILE_PATH); - }); - }); - - describe('#_isTracked', () => { - after(async () => subject._removeAll()); - - context('when the file does not exists', () => { - it('returns false', async () => { - expect(await subject._isTracked(SERVICE_PROVIDER_ID, DOCUMENT_TYPE)).to.be.false; - }); - }); - - context('when the file already exists', () => { - before(async () => { - await subject.record({ - serviceId: SERVICE_PROVIDER_ID, - documentType: DOCUMENT_TYPE, - content: CONTENT, - }); - }); - - it('returns true', async () => { - expect(await subject._isTracked(SERVICE_PROVIDER_ID, DOCUMENT_TYPE)).to.be.true; - }); - }); - }); - - describe('#record', () => { + describe('#save', () => { let id; let commit; let isFirstRecord; @@ -160,21 +65,21 @@ describe('GitAdapter', () => { before(async () => { numberOfRecordsBefore = (await git.log()).length; - ({ id, isFirstRecord } = await subject.record({ + ({ id, isFirstRecord } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - })); + }))); numberOfRecordsAfter = (await git.log()).length; ([commit] = await git.log()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -223,29 +128,29 @@ describe('GitAdapter', () => { const UPDATED_CONTENT = `${CONTENT} updated`; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, - }); + })); numberOfRecordsBefore = (await git.log()).length; - ({ id, isFirstRecord } = await subject.record({ + ({ id, isFirstRecord } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: UPDATED_CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - })); + }))); numberOfRecordsAfter = (await git.log()).length; ([commit] = await git.log()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -262,26 +167,26 @@ describe('GitAdapter', () => { context('when the content has not changed', () => { before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, - }); + })); numberOfRecordsBefore = (await git.log()).length; - ({ id, isFirstRecord } = await subject.record({ + ({ id, isFirstRecord } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, - })); + }))); numberOfRecordsAfter = (await git.log()).length; }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('does not save the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore); @@ -296,15 +201,15 @@ describe('GitAdapter', () => { const REFILTERED_CONTENT = `${CONTENT} refiltered`; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, - }); // A refilter cannot be the first record + })); // A refilter cannot be the first record numberOfRecordsBefore = (await git.log()).length; - ({ id, isFirstRecord } = await subject.record({ + ({ id, isFirstRecord } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: REFILTERED_CONTENT, @@ -312,14 +217,14 @@ describe('GitAdapter', () => { isRefilter: true, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - })); + }))); numberOfRecordsAfter = (await git.log()).length; ([commit] = await git.log()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -338,21 +243,21 @@ describe('GitAdapter', () => { before(async () => { numberOfRecordsBefore = (await git.log()).length; - ({ id, isFirstRecord } = await subject.record({ + ({ id, isFirstRecord } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: PDF_CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: PDF_MIME_TYPE, - })); + }))); numberOfRecordsAfter = (await git.log()).length; ([commit] = await git.log()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -372,24 +277,24 @@ describe('GitAdapter', () => { }); }); - describe('#get', () => { + describe('#findById', () => { let record; let id; before(async () => { - ({ id } = await subject.record({ + ({ id } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - })); + }))); - (record = await subject.get(id)); + (record = await subject.findById(id)); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the record id', () => { expect(record.id).to.include(id); @@ -407,8 +312,8 @@ describe('GitAdapter', () => { expect(record.documentType).to.equal(DOCUMENT_TYPE); }); - it('returns a asynchronous content getter', async () => { - expect(await record.content).to.equal(CONTENT); + it('returns the content', async () => { + expect(record.content).to.equal(CONTENT); }); it('stores the fetch date', () => { @@ -425,39 +330,39 @@ describe('GitAdapter', () => { context('when requested record does not exists', () => { it('returns an empty object', async () => { - expect(await subject.get('inexistantID')).to.deep.equal({}); + expect(await subject.findById('inexistantID')).to.deep.equal({}); }); }); }); - describe('#getAll', () => { + describe('#findAll', () => { let records; const expectedIds = []; before(async () => { - const { id: id1 } = await subject.record({ + const { id: id1 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id1); - const { id: id2 } = await subject.record({ + const { id: id2 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id2); - const { id: id3 } = await subject.record({ + const { id: id3 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, @@ -465,22 +370,22 @@ describe('GitAdapter', () => { fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id3); - (records = await subject.getAll()); + (records = await subject.findAll()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns all records', () => { expect(records.length).to.equal(3); }); - it('returns records with proper keys', () => { + it('returns Record objects', () => { for (const record of records) { - expect(record).to.have.keys([ 'id', 'serviceId', 'documentType', 'mimeType', 'fetchDate', 'content', 'isFirstRecord', 'isRefilter', 'snapshotId' ]); + expect(record).to.be.an.instanceof(Record); } }); @@ -493,23 +398,23 @@ describe('GitAdapter', () => { let count; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); - await subject.record({ + })); + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); - await subject.record({ + })); + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, @@ -517,19 +422,19 @@ describe('GitAdapter', () => { fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); (count = await subject.count()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the proper count', async () => { expect(count).to.equal(3); }); }); - describe('#getLatest', () => { + describe('#findLatestByServiceIdAndDocumentType', () => { context('when there are records for the given service', () => { let lastSnapshotId; let latestRecord; @@ -538,31 +443,31 @@ describe('GitAdapter', () => { const UPDATED_FILE_CONTENT = `${CONTENT} (with additional content to trigger a record)`; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, - }); + })); - ({ id: lastSnapshotId } = await subject.record({ + ({ id: lastSnapshotId } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: UPDATED_FILE_CONTENT, mimeType: MIME_TYPE, - })); + }))); - latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); it('returns the latest record content', async () => { - expect((await latestRecord.content).toString('utf8')).to.equal(UPDATED_FILE_CONTENT); + expect((latestRecord.content).toString('utf8')).to.equal(UPDATED_FILE_CONTENT); }); it('returns the latest record mime type', () => { @@ -572,24 +477,24 @@ describe('GitAdapter', () => { context('with PDF document', () => { before(async () => { - ({ id: lastSnapshotId } = await subject.record({ + ({ id: lastSnapshotId } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: PDF_CONTENT, mimeType: PDF_MIME_TYPE, - })); + }))); - latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); it('returns the latest record content', async () => { - expect((await latestRecord.content).toString('utf8')).to.equal(PDF_CONTENT); + expect((latestRecord.content).toString('utf8')).to.equal(PDF_CONTENT); }); it('returns the latest record mime type', () => { @@ -602,7 +507,7 @@ describe('GitAdapter', () => { let latestRecord; before(async () => { - latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); it('returns an empty object', async () => { @@ -617,29 +522,29 @@ describe('GitAdapter', () => { const fetchDates = []; before(async () => { - const { id: id1 } = await subject.record({ + const { id: id1 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id1); - const { id: id2 } = await subject.record({ + const { id: id2 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id2); - const { id: id3 } = await subject.record({ + const { id: id3 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, @@ -647,7 +552,7 @@ describe('GitAdapter', () => { fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id3); @@ -657,7 +562,7 @@ describe('GitAdapter', () => { } }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('iterates through all records', async () => { expect(ids).to.have.members(expectedIds); diff --git a/src/storage-adapters/mongo/dataMapper.js b/src/storage-adapters/mongo/dataMapper.js new file mode 100644 index 000000000..08495a922 --- /dev/null +++ b/src/storage-adapters/mongo/dataMapper.js @@ -0,0 +1,64 @@ +import { ObjectId } from 'mongodb'; + +import Record from '../record.js'; + +export default class DataMapper { + constructor({ adapter }) { + this.adapter = adapter; + } + + async toPersistence(record) { + if (record.content === undefined || record.content === null) { + await this.adapter.loadRecordContent(record); + } + + const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; + + const recordProperties = Object.fromEntries(Object.entries({ + serviceId, + documentType, + content, + mimeType, + fetchDate, + isRefilter, + snapshotId, + isFirstRecord, + }).filter(([ , value ]) => value)); // Remove empty values + + if (recordProperties.snapshotId) { + recordProperties.snapshotId = new ObjectId(snapshotId); + } + + recordProperties.content = record.content; + recordProperties.created_at = new Date(); + + return recordProperties; + } + + async toDomain(document, { lazyLoadContent = false } = {}) { + if (!document || !document._id) { + return {}; + } + + const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; + + const record = new Record({ + id: _id.toString(), + serviceId, + documentType, + mimeType, + fetchDate: new Date(fetchDate), + isFirstRecord: Boolean(isFirstRecord), + isRefilter: Boolean(isRefilter), + snapshotId: snapshotId && snapshotId.toString(), + }); + + if (lazyLoadContent) { + return record; + } + + await this.adapter.loadRecordContent(record); + + return record; + } +} diff --git a/src/storage-adapters/mongo/index.js b/src/storage-adapters/mongo/index.js index 377cbc744..da4150952 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/storage-adapters/mongo/index.js @@ -3,7 +3,9 @@ * Object IDs are used as opaque unique IDs. */ -import { Binary, MongoClient, ObjectId } from 'mongodb'; +import { MongoClient, ObjectId, Binary } from 'mongodb'; + +import DataMapper from './dataMapper.js'; export default class MongoAdapter { constructor({ database: databaseName, collection: collectionName, connectionURI }) { @@ -12,6 +14,7 @@ export default class MongoAdapter { this.databaseName = databaseName; this.collectionName = collectionName; this.client = client; + this.dataMapper = new DataMapper({ adapter: this }); } async initialize() { @@ -27,108 +30,68 @@ export default class MongoAdapter { return this.client.close(); } - async record({ serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId }) { - if (content instanceof Promise) { - content = await content; - } + async save(record) { + const { serviceId, documentType } = record; - const previousRecord = await this.getLatest(serviceId, documentType); + const previousRecord = await this.findLatestByServiceIdAndDocumentType(serviceId, documentType); - if (previousRecord && await previousRecord.content == content) { - return {}; + if (record.isFirstRecord === undefined || record.isFirstRecord === null) { + record.isFirstRecord = !await this.collection.findOne({ serviceId, documentType }); } - const recordProperties = Object.fromEntries(Object.entries({ - serviceId, - documentType, - content, - mimeType, - fetchDate, - isRefilter, - snapshotId, - }).filter(([ , value ]) => value)); // Remove empty values - - const isFirstRecord = !await this.collection.findOne({ serviceId, documentType }); + const documentProperties = await this.dataMapper.toPersistence(record); - if (snapshotId) { - recordProperties.snapshotId = new ObjectId(snapshotId); + if (previousRecord && previousRecord.content == documentProperties.content) { + return {}; } - if (isFirstRecord) { - recordProperties.isFirstRecord = isFirstRecord; - } + const insertResult = await this.collection.insertOne(documentProperties); - const insertResult = await this.collection.insertOne({ ...recordProperties, created_at: new Date() }); + record.id = insertResult.insertedId.toString(); - return { - id: insertResult.insertedId.toString(), - isFirstRecord, - }; + return record; } - async getLatest(serviceId, documentType) { + async findLatestByServiceIdAndDocumentType(serviceId, documentType, { lazyLoadContent } = {}) { const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find` - return this._convertDocumentToRecord(mongoDocument); + return this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }); } - async get(recordId) { + async findById(recordId, { lazyLoadContent } = {}) { const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) }); - return this._convertDocumentToRecord(mongoDocument); + return this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }); } - async getAll() { - return (await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) - .map(this._convertDocumentToRecord.bind(this)); + async findAll({ lazyLoadContent } = {}) { + return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) + .map(mongoDocument => this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }))); } async count() { return this.collection.find().count(); } - async* iterate() { + async* iterate({ lazyLoadContent } = {}) { const cursor = this.collection.find().sort({ fetchDate: 1 }); /* eslint-disable no-await-in-loop */ while (await cursor.hasNext()) { const mongoDocument = await cursor.next(); - yield this._convertDocumentToRecord(mongoDocument); + yield this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }); } /* eslint-enable no-await-in-loop */ } - async _removeAll() { + async removeAll() { return this.collection.deleteMany(); } - _convertDocumentToRecord(document) { - if (!document || !document._id) { - return {}; - } + async loadRecordContent(record) { + const { content } = await this.collection.findOne({ _id: new ObjectId(record.id) }, { projection: { content: 1 } }); - const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; - - const { collection } = this; - const result = { - id: _id.toString(), - serviceId, - documentType, - mimeType, - fetchDate: new Date(fetchDate), - isFirstRecord: Boolean(isFirstRecord), - isRefilter: Boolean(isRefilter), - snapshotId: snapshotId && snapshotId.toString(), - get content() { - return (async () => { - const { content } = await collection.findOne({ _id }, { projection: { content: 1 } }); - - return content instanceof Binary ? content.buffer : content; - })(); - }, - }; - - return result; + record.content = content instanceof Binary ? content.buffer : content; } } diff --git a/src/storage-adapters/mongo/index.test.js b/src/storage-adapters/mongo/index.test.js index c0bf75019..27aaeb1ce 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/storage-adapters/mongo/index.test.js @@ -6,6 +6,8 @@ import chai from 'chai'; import config from 'config'; import { MongoClient } from 'mongodb'; +import Record from '../record.js'; + import MongoAdapter from './index.js'; const { expect } = chai; @@ -40,7 +42,7 @@ describe('MongoAdapter', () => { collection = db.collection(config.get('recorder.snapshots.storage.mongo.collection')); }); - describe('#record', () => { + describe('#save', () => { let record; let mongoDocument; let numberOfRecordsBefore; @@ -53,14 +55,14 @@ describe('MongoAdapter', () => { documentType: DOCUMENT_TYPE, }).count(); - (record = await subject.record({ + (record = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, - })); + }))); numberOfRecordsAfter = await collection.find({ serviceId: SERVICE_PROVIDER_ID, @@ -73,7 +75,7 @@ describe('MongoAdapter', () => { })); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -122,28 +124,28 @@ describe('MongoAdapter', () => { const UPDATED_CONTENT = `${CONTENT} updated`; before(async () => { - (record = await subject.record({ + (record = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, - })); + }))); numberOfRecordsBefore = await collection.find({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, }).count(); - (record = await subject.record({ + (record = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: UPDATED_CONTENT, mimeType: MIME_TYPE, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, - })); + }))); numberOfRecordsAfter = await collection.find({ serviceId: SERVICE_PROVIDER_ID, @@ -156,7 +158,7 @@ describe('MongoAdapter', () => { }).limit(1).sort({ created_at: -1 }).toArray()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -173,26 +175,26 @@ describe('MongoAdapter', () => { context('when the content has not changed', () => { before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, fetchDate: FETCH_DATE, - }); + })); numberOfRecordsBefore = await collection.find({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, }).count(); - (record = await subject.record({ + (record = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, fetchDate: FETCH_DATE_LATER, - })); + }))); numberOfRecordsAfter = await collection.find({ serviceId: SERVICE_PROVIDER_ID, @@ -200,7 +202,7 @@ describe('MongoAdapter', () => { }).count(); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('does not save the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore); @@ -215,18 +217,18 @@ describe('MongoAdapter', () => { const REFILTERED_CONTENT = `${CONTENT} refiltered`; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, - }); // A refilter cannot be the first record + })); // A refilter cannot be the first record numberOfRecordsBefore = await collection.find({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, }).count(); - (record = await subject.record({ + (record = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: REFILTERED_CONTENT, @@ -234,7 +236,7 @@ describe('MongoAdapter', () => { fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, isRefilter: true, - })); + }))); numberOfRecordsAfter = await collection.find({ serviceId: SERVICE_PROVIDER_ID, @@ -247,7 +249,7 @@ describe('MongoAdapter', () => { }).limit(1).sort({ created_at: -1 }).toArray()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -271,14 +273,14 @@ describe('MongoAdapter', () => { mimeType: PDF_MIME_TYPE, }).count(); - (record = await subject.record({ + (record = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: PDF_CONTENT, mimeType: PDF_MIME_TYPE, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, - })); + }))); numberOfRecordsAfter = await collection.find({ serviceId: SERVICE_PROVIDER_ID, @@ -291,7 +293,7 @@ describe('MongoAdapter', () => { })); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('saves the record', () => { expect(numberOfRecordsAfter).to.equal(numberOfRecordsBefore + 1); @@ -313,24 +315,24 @@ describe('MongoAdapter', () => { }); }); - describe('#get', () => { + describe('#findById', () => { let record; let id; before(async () => { - ({ id } = await subject.record({ + ({ id } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - })); + }))); - (record = await subject.get(id)); + (record = await subject.findById(id)); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the record id', () => { expect(record.id).to.include(id); @@ -348,8 +350,8 @@ describe('MongoAdapter', () => { expect(record.documentType).to.equal(DOCUMENT_TYPE); }); - it('returns a asynchronous content getter', async () => { - expect(await record.content).to.equal(CONTENT); + it('returns the content', async () => { + expect(record.content).to.equal(CONTENT); }); it('stores the fetch date', () => { @@ -366,39 +368,39 @@ describe('MongoAdapter', () => { context('when requested record does not exists', () => { it('returns an empty object', async () => { - expect(await subject.get('inexistantID')).to.deep.equal({}); + expect(await subject.findById('inexistantID')).to.deep.equal({}); }); }); }); - describe('#getAll', () => { + describe('#findAll', () => { let records; const expectedIds = []; before(async () => { - const { id: id1 } = await subject.record({ + const { id: id1 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, - // snapshotId: SNAPSHOT_ID, + snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id1); - const { id: id2 } = await subject.record({ + const { id: id2 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id2); - const { id: id3 } = await subject.record({ + const { id: id3 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, @@ -406,22 +408,22 @@ describe('MongoAdapter', () => { fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id3); - (records = await subject.getAll()); + (records = await subject.findAll()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns all records', () => { expect(records.length).to.equal(3); }); - it('returns records with proper keys', () => { + it('returns Record objects', () => { for (const record of records) { - expect(record).to.have.keys([ 'id', 'serviceId', 'documentType', 'mimeType', 'fetchDate', 'content', 'isFirstRecord', 'isRefilter', 'snapshotId' ]); + expect(record).to.be.an.instanceof(Record); } }); @@ -434,23 +436,23 @@ describe('MongoAdapter', () => { let count; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); - await subject.record({ + })); + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); - await subject.record({ + })); + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, @@ -458,19 +460,19 @@ describe('MongoAdapter', () => { fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); (count = await subject.count()); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the proper count', async () => { expect(count).to.equal(3); }); }); - describe('#getLatest', () => { + describe('#findLatestByServiceIdAndDocumentType', () => { context('when there are records for the given service', () => { let lastSnapshotId; let latestRecord; @@ -479,28 +481,28 @@ describe('MongoAdapter', () => { const UPDATED_CONTENT = `${CONTENT} (with additional content to trigger a record)`; before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, - }); + })); - ({ id: lastSnapshotId } = await subject.record({ + ({ id: lastSnapshotId } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: UPDATED_CONTENT, mimeType: MIME_TYPE, fetchDate: FETCH_DATE_LATER, - })); + }))); - latestRecord = await subject.getLatest( + latestRecord = await subject.findLatestByServiceIdAndDocumentType( SERVICE_PROVIDER_ID, DOCUMENT_TYPE, ); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); @@ -517,33 +519,33 @@ describe('MongoAdapter', () => { context('with PDF document', () => { before(async () => { - await subject.record({ + await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: PDF_CONTENT, mimeType: PDF_MIME_TYPE, fetchDate: FETCH_DATE, - }); + })); - ({ id: lastSnapshotId } = await subject.record({ + ({ id: lastSnapshotId } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: UPDATED_PDF_CONTENT, mimeType: PDF_MIME_TYPE, fetchDate: FETCH_DATE_LATER, - })); + }))); - latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('returns the latest record id', () => { expect(latestRecord.id).to.include(lastSnapshotId); }); it('returns the latest record content', async () => { - const isSameContent = Buffer.compare(await latestRecord.content, UPDATED_PDF_CONTENT) == 0; + const isSameContent = Buffer.compare(latestRecord.content, UPDATED_PDF_CONTENT) == 0; expect(isSameContent).to.be.true; }); @@ -558,7 +560,7 @@ describe('MongoAdapter', () => { let latestRecord; before(async () => { - latestRecord = await subject.getLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); it('returns an empty object', async () => { @@ -573,29 +575,29 @@ describe('MongoAdapter', () => { const fetchDates = []; before(async () => { - const { id: id1 } = await subject.record({ + const { id: id1 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id1); - const { id: id2 } = await subject.record({ + const { id: id2 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated`, fetchDate: FETCH_DATE_LATER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id2); - const { id: id3 } = await subject.record({ + const { id: id3 } = await subject.save(new Record({ serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: `${CONTENT} - updated 2`, @@ -603,7 +605,7 @@ describe('MongoAdapter', () => { fetchDate: FETCH_DATE_EARLIER, snapshotId: SNAPSHOT_ID, mimeType: MIME_TYPE, - }); + })); expectedIds.push(id3); @@ -613,7 +615,7 @@ describe('MongoAdapter', () => { } }); - after(async () => subject._removeAll()); + after(async () => subject.removeAll()); it('iterates through all records', async () => { expect(ids).to.have.members(expectedIds); diff --git a/src/storage-adapters/record.js b/src/storage-adapters/record.js new file mode 100644 index 000000000..d5f3c8391 --- /dev/null +++ b/src/storage-adapters/record.js @@ -0,0 +1,15 @@ +export default class Record { + constructor({ id, serviceId, documentType, mimeType, fetchDate, isFirstRecord, isRefilter, snapshotId, content }) { + this.id = id; + this.serviceId = serviceId; + this.documentType = documentType; + this.mimeType = mimeType; + this.fetchDate = fetchDate; + this.isFirstRecord = isFirstRecord; + this.isRefilter = isRefilter; + this.snapshotId = snapshotId; + if (content) { + this.content = content; + } + } +} From 6d225e89e447aaa65381583d09257999956b5207 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 11:45:28 +0200 Subject: [PATCH 33/74] Rename `adapter` to `repository` Avoid confusion as the pattern followed is the Repository pattern and not the Adapter pattern --- README.md | 14 ++-- ops/README.md | 2 +- scripts/dataset/export/index.js | 8 +- scripts/dataset/export/index.test.js | 20 ++--- scripts/history/migrate-services.js | 26 +++---- scripts/history/update-to-full-hash.js | 30 ++++---- scripts/history/utils/index.js | 16 ++-- scripts/import/index.js | 2 +- scripts/import/loadCommits.js | 2 +- scripts/rewrite/initializer/index.js | 2 +- scripts/rewrite/rewrite-snapshots.js | 8 +- scripts/rewrite/rewrite-versions.js | 8 +- src/archivist/index.js | 2 +- src/archivist/index.test.js | 26 +++---- src/archivist/recorder/index.js | 22 +++--- src/archivist/recorder/index.test.js | 76 +++++++++---------- src/index.js | 20 ++--- src/main.js | 6 +- .../git/dataMapper.js | 10 +-- .../git/git.js | 0 .../git/index.js | 4 +- .../git/index.test.js | 6 +- .../mongo/dataMapper.js | 8 +- .../mongo/index.js | 4 +- .../mongo/index.test.js | 6 +- .../record.js | 0 26 files changed, 164 insertions(+), 164 deletions(-) rename src/{storage-adapters => repositories}/git/dataMapper.js (90%) rename src/{storage-adapters => repositories}/git/git.js (100%) rename src/{storage-adapters => repositories}/git/index.js (97%) rename src/{storage-adapters => repositories}/git/index.test.js (99%) rename src/{storage-adapters => repositories}/mongo/dataMapper.js (89%) rename src/{storage-adapters => repositories}/mongo/index.js (96%) rename src/{storage-adapters => repositories}/mongo/index.test.js (99%) rename src/{storage-adapters => repositories}/record.js (100%) diff --git a/README.md b/README.md index 43dae8068..487292c43 100644 --- a/README.md +++ b/README.md @@ -19,11 +19,11 @@ - [Core](#core) - [Configuring](#configuring) - [Configuration file](#configuration-file) - - [Storage adapters](#storage-adapters) + - [Storage repositories](#storage-repositories) - [Environment variables](#environment-variables) - [Running](#running) - [Deploying](#deploying) -- [Publishing](#publishing) +- [Publishing](#publishing) - [Contributing](#contributing) - [Adding or updating a service](#adding-a-new-service-or-updating-an-existing-service) - [Core engine](#core-engine) @@ -144,7 +144,7 @@ When refering to the base folder, it means the folder where you will be `git pul 5. If you are using a special repo instance (e.g., `dating-declarations`), create a new [config file](#configuring), `config/development.json`, and add: ```json { - + "services": { "declarationsPath": "..//declarations" } @@ -185,12 +185,12 @@ The default configuration can be found in `config/default.json`. The full refere "recorder": { "versions": { "storage": { - "": "Storage adapter configuration object; see below" + "": "Storage repository configuration object; see below" } }, "snapshots": { "storage": { - "": "Storage adapter configuration object; see below" + "": "Storage repository configuration object; see below" } } }, @@ -235,9 +235,9 @@ The default configuration is merged with (and overridden by) environment-specifi If you want to change your local configuration, we suggest you create a `config/development.json` file with overridden values. An example of a production configuration file can be found in `config/production.json`. -##### Storage adapters +##### Storage repositories -Two storage adapters are currently supported: Git and MongoDB. Each one can be used independently for versions and snapshots. +Two storage repositories are currently supported: Git and MongoDB. Each one can be used independently for versions and snapshots. ###### Git diff --git a/ops/README.md b/ops/README.md index 078e9eeb4..a75ee1770 100644 --- a/ops/README.md +++ b/ops/README.md @@ -168,7 +168,7 @@ In order to automatically set up a virtual machine: 2. Install [VirtualBox](https://www.virtualbox.org/wiki/Downloads) to manage virtual machines. If you prefer Docker, or have an Apple Silicon machine, install [Docker](https://docs.docker.com/get-docker/) instead. 3. Create a dedicated SSH key with no password: `ssh-keygen -f ~/.ssh/ota-vagrant -q -N ""`. This key will be automatically used by Vagrant. -> VirtualBox is not compatible with Apple Silicon (M1…) processors. If you have such a machine, you will need to use the Docker provider. Since MongoDB cannot be installed on ARM, it is skipped in the infrastructure installation process. This means you cannot test the MongoDB storage adapter with Vagrant with an Apple Silicon processor. +> VirtualBox is not compatible with Apple Silicon (M1…) processors. If you have such a machine, you will need to use the Docker provider. Since MongoDB cannot be installed on ARM, it is skipped in the infrastructure installation process. This means you cannot test the MongoDB storage repository with Vagrant with an Apple Silicon processor. ### Launch diff --git a/scripts/dataset/export/index.js b/scripts/dataset/export/index.js index f6cfbcb68..7598aba0f 100644 --- a/scripts/dataset/export/index.js +++ b/scripts/dataset/export/index.js @@ -4,7 +4,7 @@ import { fileURLToPath } from 'url'; import archiver from 'archiver'; -import { instantiateVersionsStorageAdapter } from '../../../src/index.js'; +import { instantiateVersionsRepository } from '../../../src/index.js'; import * as renamer from '../../utils/renamer/index.js'; import readme from '../assets/README.template.js'; import logger from '../logger/index.js'; @@ -16,7 +16,7 @@ const fs = fsApi.promises; const ARCHIVE_FORMAT = 'zip'; // for supported formats, see https://www.archiverjs.com/docs/archive-formats export default async function generate({ archivePath, releaseDate }) { - const versionsStorageAdapter = await (instantiateVersionsStorageAdapter()).initialize(); + const versionsRepository = await (instantiateVersionsRepository()).initialize(); const archive = await initializeArchive(archivePath); @@ -28,7 +28,7 @@ export default async function generate({ archivePath, releaseDate }) { let index = 1; - for await (const version of versionsStorageAdapter.iterate()) { + for await (const version of versionsRepository.iterate()) { const { content, fetchDate } = version; const { serviceId, documentType } = renamer.applyRules(version.serviceId, version.documentType); @@ -70,7 +70,7 @@ export default async function generate({ archivePath, releaseDate }) { archive.stream.finalize(); await archive.done; - await versionsStorageAdapter.finalize(); + await versionsRepository.finalize(); return { servicesCount: services.size, diff --git a/scripts/dataset/export/index.test.js b/scripts/dataset/export/index.test.js index 51fd4b45f..7c4b55d36 100644 --- a/scripts/dataset/export/index.test.js +++ b/scripts/dataset/export/index.test.js @@ -8,8 +8,8 @@ import dircompare from 'dir-compare'; import mime from 'mime'; import StreamZip from 'node-stream-zip'; -import GitAdapter from '../../../src/storage-adapters/git/index.js'; -import Record from '../../../src/storage-adapters/record.js'; +import GitRepository from '../../../src/repositories/git/index.js'; +import Record from '../../../src/repositories/record.js'; import generateArchive from './index.js'; @@ -44,19 +44,19 @@ describe('Export', () => { const TMP_PATH = path.resolve(__dirname, './tmp'); const EXPECTED_DATASET_PATH = path.resolve(__dirname, './test/fixtures/dataset'); - let storageAdapter; + let repository; let zip; before(async function () { this.timeout(10000); - storageAdapter = new GitAdapter({ + repository = new GitRepository({ ...config.get('recorder.versions.storage.git'), path: path.resolve(__dirname, '../../../', config.get('recorder.versions.storage.git.path')), }); - await storageAdapter.initialize(); + await repository.initialize(); - await storageAdapter.save(new Record({ + await repository.save(new Record({ serviceId: FIRST_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: FIRST_CONTENT, @@ -65,7 +65,7 @@ describe('Export', () => { snapshotId: SNAPSHOT_ID, })); - await storageAdapter.save(new Record({ + await repository.save(new Record({ serviceId: FIRST_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: SECOND_CONTENT, @@ -74,7 +74,7 @@ describe('Export', () => { snapshotId: SNAPSHOT_ID, })); - await storageAdapter.save(new Record({ + await repository.save(new Record({ serviceId: SECOND_SERVICE_PROVIDER_ID, documentType: FIRST_DOCUMENT_TYPE, content: FIRST_CONTENT, @@ -83,7 +83,7 @@ describe('Export', () => { snapshotId: SNAPSHOT_ID, })); - await storageAdapter.save(new Record({ + await repository.save(new Record({ serviceId: SECOND_SERVICE_PROVIDER_ID, documentType: SECOND_DOCUMENT_TYPE, content: FIRST_CONTENT, @@ -104,7 +104,7 @@ describe('Export', () => { after(async () => { await fs.rm(TMP_PATH, { recursive: true }); - await storageAdapter.removeAll(); + await repository.removeAll(); }); it('is an archive', () => { diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index 2dd66b45f..8efc471bb 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -5,7 +5,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; import winston from 'winston'; -import GitAdapter from '../../src/storage-adapters/git/index.js'; +import GitRepository from '../../src/repositories/git/index.js'; import { format } from './logger/index.js'; import { importReadme } from './utils/index.js'; @@ -40,22 +40,22 @@ const COUNTERS = { services: CONFIG.servicesToMigrate, from: { snapshots: { - source: new GitAdapter({ + source: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.snapshots}`), }), - destination: new GitAdapter({ + destination: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.snapshots}-migrated`), }), logger: winston.createLogger({ transports: [ new (winston.transports.File)({ filename: `${__dirname}/logs/${CONFIG.from.snapshots}.log` }), new winston.transports.Console() ], format }), }, versions: { - source: new GitAdapter({ + source: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.versions}`), }), - destination: new GitAdapter({ + destination: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.from.versions}-migrated`), prefixMessageToSnapshotId: CONFIG.from.prefixMessageToSnapshotId, @@ -65,22 +65,22 @@ const COUNTERS = { }, to: { snapshots: { - source: new GitAdapter({ + source: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.snapshots}`), }), - destination: new GitAdapter({ + destination: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.snapshots}-migrated`), }), logger: winston.createLogger({ transports: [ new (winston.transports.File)({ filename: `${__dirname}/logs/${CONFIG.to.snapshots}.log` }), new winston.transports.Console() ], format }), }, versions: { - source: new GitAdapter({ + source: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.versions}`), }), - destination: new GitAdapter({ + destination: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: path.resolve(ROOT_PATH, `./data/${CONFIG.to.versions}-migrated`), prefixMessageToSnapshotId: CONFIG.to.prefixMessageToSnapshotId, @@ -136,11 +136,11 @@ const COUNTERS = { await finalize(migration); }()); -async function rewriteSnapshots(adapter, records, idsMapping, logger) { +async function rewriteSnapshots(repository, records, idsMapping, logger) { let i = 1; for (const record of records) { - const { id: recordId } = await adapter.record(record); // eslint-disable-line no-await-in-loop + const { id: recordId } = await repository.record(record); // eslint-disable-line no-await-in-loop idsMapping[record.id] = recordId; // Saves the mapping between the old ID and the new one. @@ -154,7 +154,7 @@ async function rewriteSnapshots(adapter, records, idsMapping, logger) { } } -async function rewriteVersions(adapter, records, idsMapping, logger) { +async function rewriteVersions(repository, records, idsMapping, logger) { let i = 1; for (const record of records) { @@ -166,7 +166,7 @@ async function rewriteVersions(adapter, records, idsMapping, logger) { record.snapshotId = newSnapshotId; - const { id: recordId } = await adapter.record(record); // eslint-disable-line no-await-in-loop + const { id: recordId } = await repository.record(record); // eslint-disable-line no-await-in-loop if (recordId) { logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); diff --git a/scripts/history/update-to-full-hash.js b/scripts/history/update-to-full-hash.js index 37d6d4beb..d24dc5699 100644 --- a/scripts/history/update-to-full-hash.js +++ b/scripts/history/update-to-full-hash.js @@ -3,7 +3,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; -import GitAdapter from '../../src/storage-adapters/git/index.js'; +import GitRepository from '../../src/repositories/git/index.js'; import logger from './logger/index.js'; import { importReadme } from './utils/index.js'; @@ -14,35 +14,35 @@ const ROOT_PATH = path.resolve(__dirname, '../../'); (async function main() { console.time('Total time'); - const versionsAdapter = new GitAdapter({ + const versionsRepository = new GitRepository({ ...config.get('recorder.versions.storage.git'), path: path.resolve(ROOT_PATH, './data/france-elections-versions'), }); - const versionsTargetAdapter = new GitAdapter({ + const versionsTargetRepository = new GitRepository({ ...config.get('recorder.versions.storage.git'), prefixMessageToSnapshotId: 'This version was recorded after filtering snapshot https://github.com/OpenTermsArchive/france-elections-snapshots/commit/', path: path.resolve(ROOT_PATH, './data/france-elections-versions-hash-updated-test'), }); - const snapshotsAdapter = new GitAdapter({ + const snapshotsRepository = new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: path.resolve(ROOT_PATH, './data/france-elections-snapshots'), }); - await versionsAdapter.initialize(); - await versionsTargetAdapter.initialize(); - await snapshotsAdapter.initialize(); + await versionsRepository.initialize(); + await versionsTargetRepository.initialize(); + await snapshotsRepository.initialize(); - await importReadme({ from: versionsAdapter, to: versionsTargetAdapter }); + await importReadme({ from: versionsRepository, to: versionsTargetRepository }); - const total = await versionsAdapter.count(); + const total = await versionsRepository.count(); let current = 1; - for await (const record of versionsAdapter.iterate()) { - const fullSnapshotId = await snapshotsAdapter.git.getFullHash(record.snapshotId); + for await (const record of versionsRepository.iterate()) { + const fullSnapshotId = await snapshotsRepository.git.getFullHash(record.snapshotId); - const { id: recordId } = await versionsTargetAdapter.record({ ...record, snapshotId: fullSnapshotId }); + const { id: recordId } = await versionsTargetRepository.record({ ...record, snapshotId: fullSnapshotId }); if (!recordId) { logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.documentType, id: record.id, current, total }); @@ -53,7 +53,7 @@ const ROOT_PATH = path.resolve(__dirname, '../../'); current++; } - await versionsAdapter.finalize(); - await versionsTargetAdapter.finalize(); - await snapshotsAdapter.finalize(); + await versionsRepository.finalize(); + await versionsTargetRepository.finalize(); + await snapshotsRepository.finalize(); }()); diff --git a/scripts/history/utils/index.js b/scripts/history/utils/index.js index 9b4f742f0..fcdf23fe1 100644 --- a/scripts/history/utils/index.js +++ b/scripts/history/utils/index.js @@ -2,21 +2,21 @@ import fsApi from 'fs'; const fs = fsApi.promises; -export async function importReadme({ from: sourceAdapter, to: targetAdapter }) { - const sourceAdapterReadmePath = `${sourceAdapter.path}/README.md`; - const targetAdapterReadmePath = `${targetAdapter.path}/README.md`; +export async function importReadme({ from: sourceRepository, to: targetRepository }) { + const sourceRepositoryReadmePath = `${sourceRepository.path}/README.md`; + const targetRepositoryReadmePath = `${targetRepository.path}/README.md`; - const [readmeCommit] = await sourceAdapter.git.log(['README.md']); + const [readmeCommit] = await sourceRepository.git.log(['README.md']); if (!readmeCommit) { - console.warn(`No commits found for README in ${sourceAdapter.path}`); + console.warn(`No commits found for README in ${sourceRepository.path}`); return; } - await fs.copyFile(sourceAdapterReadmePath, targetAdapterReadmePath); - await targetAdapter._commit({ - filePath: targetAdapterReadmePath, + await fs.copyFile(sourceRepositoryReadmePath, targetRepositoryReadmePath); + await targetRepository._commit({ + filePath: targetRepositoryReadmePath, message: readmeCommit.message, date: readmeCommit.date, }); diff --git a/scripts/import/index.js b/scripts/import/index.js index 1436b10a4..0513f45d9 100644 --- a/scripts/import/index.js +++ b/scripts/import/index.js @@ -8,7 +8,7 @@ import mime from 'mime'; import { MongoClient } from 'mongodb'; import nodeFetch from 'node-fetch'; -import Git from '../../src/storage-adapters/git/git.js'; +import Git from '../../src/repositories/git/git.js'; import * as renamer from '../utils/renamer/index.js'; import logger from './logger/index.js'; diff --git a/scripts/import/loadCommits.js b/scripts/import/loadCommits.js index 7c4b501a5..415e2f197 100644 --- a/scripts/import/loadCommits.js +++ b/scripts/import/loadCommits.js @@ -5,7 +5,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; import { MongoClient } from 'mongodb'; -import Git from '../../src/storage-adapters/git/git.js'; +import Git from '../../src/repositories/git/git.js'; import logger from './logger/index.js'; diff --git a/scripts/rewrite/initializer/index.js b/scripts/rewrite/initializer/index.js index c1d803718..fa0824137 100644 --- a/scripts/rewrite/initializer/index.js +++ b/scripts/rewrite/initializer/index.js @@ -4,7 +4,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; -import Git from '../../../src/storage-adapters/git/git.js'; +import Git from '../../../src/repositories/git/git.js'; import { fileExists } from '../utils.js'; const fs = fsApi.promises; diff --git a/scripts/rewrite/rewrite-snapshots.js b/scripts/rewrite/rewrite-snapshots.js index 19e4fec94..b9d7f2704 100644 --- a/scripts/rewrite/rewrite-snapshots.js +++ b/scripts/rewrite/rewrite-snapshots.js @@ -4,8 +4,8 @@ import { fileURLToPath } from 'url'; import config from 'config'; import Recorder from '../../src/archivist/recorder/index.js'; -import Git from '../../src/storage-adapters/git/git.js'; -import GitAdapter from '../../src/storage-adapters/git/index.js'; +import Git from '../../src/repositories/git/git.js'; +import GitRepository from '../../src/repositories/git/index.js'; import * as renamer from '../utils/renamer/index.js'; import * as initializer from './initializer/index.js'; @@ -50,11 +50,11 @@ let recorder; } recorder = new Recorder({ - versionsStorageAdapter: new GitAdapter({ + versionsRepository: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: VERSIONS_TARGET_PATH, }), - snapshotsStorageAdapter: new GitAdapter({ + snapshotsRepository: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: SNAPSHOTS_TARGET_PATH, }), diff --git a/scripts/rewrite/rewrite-versions.js b/scripts/rewrite/rewrite-versions.js index 47d734149..8ae5fb546 100644 --- a/scripts/rewrite/rewrite-versions.js +++ b/scripts/rewrite/rewrite-versions.js @@ -7,8 +7,8 @@ import { InaccessibleContentError } from '../../src/archivist/errors.js'; import filter from '../../src/archivist/filter/index.js'; import Recorder from '../../src/archivist/recorder/index.js'; import * as services from '../../src/archivist/services/index.js'; -import Git from '../../src/storage-adapters/git/git.js'; -import GitAdapter from '../../src/storage-adapters/git/index.js'; +import Git from '../../src/repositories/git/git.js'; +import GitRepository from '../../src/repositories/git/index.js'; import * as renamer from '../utils/renamer/index.js'; import * as initializer from './initializer/index.js'; @@ -59,11 +59,11 @@ let recorder; } recorder = new Recorder({ - versionsStorageAdapter: new GitAdapter({ + versionsRepository: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: VERSIONS_TARGET_PATH, }), - snapshotsStorageAdapter: new GitAdapter({ + snapshotsRepository: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: SNAPSHOTS_SOURCE_PATH, }), diff --git a/src/archivist/index.js b/src/archivist/index.js index aca1bf9db..3c1f1aad9 100644 --- a/src/archivist/index.js +++ b/src/archivist/index.js @@ -43,7 +43,7 @@ export default class Archivist extends events.EventEmitter { constructor({ storage: { versions, snapshots } }) { super(); - this.recorder = new Recorder({ versionsStorageAdapter: versions, snapshotsStorageAdapter: snapshots }); + this.recorder = new Recorder({ versionsRepository: versions, snapshotsRepository: snapshots }); } async initialize() { diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index 312c407a5..37bbe1879 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -8,8 +8,8 @@ import nock from 'nock'; import sinon from 'sinon'; import sinonChai from 'sinon-chai'; -import Git from '../storage-adapters/git/git.js'; -import GitAdapter from '../storage-adapters/git/index.js'; +import Git from '../repositories/git/git.js'; +import GitRepository from '../repositories/git/index.js'; import Archivist, { AVAILABLE_EVENTS } from './index.js'; @@ -27,11 +27,11 @@ const VERSIONS_PATH = path.resolve(ROOT_PATH, config.get('recorder.versions.stor const MIME_TYPE = 'text/html'; const FETCH_DATE = new Date('2000-01-02T12:00:00.000Z'); -let snapshotsStorageAdapter; -let versionsStorageAdapter; +let snapshotsRepository; +let versionsRepository; async function resetGitRepositories() { - return Promise.all([ snapshotsStorageAdapter.removeAll(), versionsStorageAdapter.removeAll() ]); + return Promise.all([ snapshotsRepository.removeAll(), versionsRepository.removeAll() ]); } let gitVersion; @@ -69,11 +69,11 @@ describe('Archivist', function () { serviceAVersionExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/service_A_terms.md'), { encoding: 'utf8' }); serviceBSnapshotExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/terms.pdf')); serviceBVersionExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/termsFromPDF.md'), { encoding: 'utf8' }); - snapshotsStorageAdapter = new GitAdapter({ + snapshotsRepository = new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: SNAPSHOTS_PATH, }); - versionsStorageAdapter = new GitAdapter({ + versionsRepository = new GitRepository({ ...config.get('recorder.versions.storage.git'), path: VERSIONS_PATH, }); @@ -85,7 +85,7 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - app = new Archivist({ storage: { versions: versionsStorageAdapter, snapshots: snapshotsStorageAdapter } }); + app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); await app.initialize(); }); @@ -163,13 +163,13 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - const app = new Archivist({ storage: { versions: versionsStorageAdapter, snapshots: snapshotsStorageAdapter } }); + const app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); await app.initialize(); await app.trackChanges(serviceIds); - ({ id: originalSnapshotId } = await snapshotsStorageAdapter.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); - ({ id: firstVersionId } = await versionsStorageAdapter.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: originalSnapshotId } = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: firstVersionId } = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH }); @@ -219,7 +219,7 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - const app = new Archivist({ storage: { versions: versionsStorageAdapter, snapshots: snapshotsStorageAdapter } }); + const app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); await app.initialize(); await app.trackChanges(serviceIds); @@ -265,7 +265,7 @@ describe('Archivist', function () { } before(async () => { - app = new Archivist({ storage: { versions: versionsStorageAdapter, snapshots: snapshotsStorageAdapter } }); + app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); await app.initialize(); AVAILABLE_EVENTS.forEach(event => { diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index 349b301a9..d8bc4e025 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -1,25 +1,25 @@ -import Record from '../../storage-adapters/record.js'; +import Record from '../../repositories/record.js'; export default class Recorder { - constructor({ versionsStorageAdapter, snapshotsStorageAdapter }) { - if (!versionsStorageAdapter || !snapshotsStorageAdapter) { - throw new RangeError('Storage adapters should be defined both for versions and snapshots'); + constructor({ versionsRepository, snapshotsRepository }) { + if (!versionsRepository || !snapshotsRepository) { + throw new RangeError('Storage repositories should be defined both for versions and snapshots'); } - this.versionsStorageAdapter = versionsStorageAdapter; - this.snapshotsStorageAdapter = snapshotsStorageAdapter; + this.versionsRepository = versionsRepository; + this.snapshotsRepository = snapshotsRepository; } async initialize() { - return Promise.all([ this.versionsStorageAdapter.initialize(), this.snapshotsStorageAdapter.initialize() ]); + return Promise.all([ this.versionsRepository.initialize(), this.snapshotsRepository.initialize() ]); } async finalize() { - return Promise.all([ this.versionsStorageAdapter.finalize(), this.snapshotsStorageAdapter.finalize() ]); + return Promise.all([ this.versionsRepository.finalize(), this.snapshotsRepository.finalize() ]); } async getLatestSnapshot(serviceId, documentType) { - return this.snapshotsStorageAdapter.findLatestByServiceIdAndDocumentType(serviceId, documentType); + return this.snapshotsRepository.findLatestByServiceIdAndDocumentType(serviceId, documentType); } async recordSnapshot({ serviceId, documentType, fetchDate, mimeType, content }) { @@ -43,7 +43,7 @@ export default class Recorder { throw new Error('A document mime type is required to ensure data consistency'); } - return this.snapshotsStorageAdapter.save(new Record({ serviceId, documentType, fetchDate, mimeType, content })); + return this.snapshotsRepository.save(new Record({ serviceId, documentType, fetchDate, mimeType, content })); } async recordVersion({ serviceId, documentType, snapshotId, fetchDate, mimeType, content, isRefilter }) { @@ -71,7 +71,7 @@ export default class Recorder { throw new Error('A document mime type is required to ensure data consistency'); } - return this.versionsStorageAdapter.save(new Record({ serviceId, documentType, snapshotId, fetchDate, mimeType, content, isRefilter })); + return this.versionsRepository.save(new Record({ serviceId, documentType, snapshotId, fetchDate, mimeType, content, isRefilter })); } async recordRefilter(params) { diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index a4af2db4b..2702bda4b 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -4,8 +4,8 @@ import { fileURLToPath } from 'url'; import chai from 'chai'; import config from 'config'; -import GitAdapter from '../../storage-adapters/git/index.js'; -import MongoAdapter from '../../storage-adapters/mongo/index.js'; +import GitRepository from '../../repositories/git/index.js'; +import MongoRepository from '../../repositories/mongo/index.js'; import Recorder from './index.js'; @@ -24,25 +24,25 @@ describe('Recorder', () => { const SERVICE_ID = 'test_service'; const TYPE = 'Terms of Service'; - const adaptersTypes = { + const repositoriesTypes = { git: { - snapshots: new GitAdapter({ + snapshots: new GitRepository({ ...config.get('recorder.snapshots.storage.git'), path: SNAPSHOTS_PATH, }), - versions: new GitAdapter({ + versions: new GitRepository({ ...config.get('recorder.versions.storage.git'), path: VERSIONS_PATH, }), }, mongo: { - snapshots: new MongoAdapter(config.get('recorder.versions.storage.mongo')), - versions: new MongoAdapter(config.get('recorder.snapshots.storage.mongo')), + snapshots: new MongoRepository(config.get('recorder.versions.storage.mongo')), + versions: new MongoRepository(config.get('recorder.snapshots.storage.mongo')), }, }; - for (const [ adapterName, { versions: versionsAdapter, snapshots: snapshotsAdapter }] of Object.entries(adaptersTypes)) { - describe(adapterName, () => { + for (const [ repositoryName, { versions: versionsRepository, snapshots: snapshotsRepository }] of Object.entries(repositoriesTypes)) { + describe(repositoryName, () => { describe('#recordSnapshot', () => { const CONTENT = '

ToS fixture data with UTF-8 çhãràčtęrs

'; let recorder; @@ -52,19 +52,19 @@ describe('Recorder', () => { before(async () => { recorder = new Recorder({ - versionsStorageAdapter: versionsAdapter, - snapshotsStorageAdapter: snapshotsAdapter, + versionsRepository, + snapshotsRepository, }); await recorder.initialize(); }); after(async () => { - await snapshotsAdapter.removeAll(); + await snapshotsRepository.removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => snapshotsAdapter.removeAll()); + after(async () => snapshotsRepository.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -111,10 +111,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await snapshotsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter.removeAll()); + after(async () => snapshotsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -149,10 +149,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter.removeAll()); + after(async () => snapshotsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -185,10 +185,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => snapshotsAdapter.removeAll()); + after(async () => snapshotsRepository.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -206,8 +206,8 @@ describe('Recorder', () => { before(async () => { recorder = new Recorder({ - versionsStorageAdapter: versionsAdapter, - snapshotsStorageAdapter: snapshotsAdapter, + versionsRepository, + snapshotsRepository, }); await recorder.initialize(); }); @@ -217,7 +217,7 @@ describe('Recorder', () => { }); context('when a required param is missing', () => { - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -267,10 +267,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -307,10 +307,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -349,10 +349,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -370,19 +370,19 @@ describe('Recorder', () => { before(async () => { recorder = new Recorder({ - versionsStorageAdapter: versionsAdapter, - snapshotsStorageAdapter: snapshotsAdapter, + versionsRepository, + snapshotsRepository, }); await recorder.initialize(); }); after(async () => { - await versionsAdapter.removeAll(); + await versionsRepository.removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -432,10 +432,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter.removeAll()); after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); after(async () => versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -472,10 +472,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -514,10 +514,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsAdapter.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); }); - after(async () => versionsAdapter.removeAll()); + after(async () => versionsRepository.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; diff --git a/src/index.js b/src/index.js index 6a4cce8aa..154f278f2 100644 --- a/src/index.js +++ b/src/index.js @@ -3,34 +3,34 @@ import { fileURLToPath } from 'url'; import config from 'config'; -import GitAdapter from './storage-adapters/git/index.js'; -import MongoAdapter from './storage-adapters/mongo/index.js'; +import GitRepository from './repositories/git/index.js'; +import MongoRepository from './repositories/mongo/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); -export function instantiateVersionsStorageAdapter() { - return instantiateStorageAdapter('versions'); +export function instantiateVersionsRepository() { + return instantiateRepository('versions'); } -export function instantiateSnapshotsStorageAdapter() { - return instantiateStorageAdapter('snapshots'); +export function instantiateSnapshotsRepository() { + return instantiateRepository('snapshots'); } -function instantiateStorageAdapter(recordType) { +function instantiateRepository(recordType) { let result; switch (config.get(`recorder.${recordType}.storage.type`)) { case 'git': - result = new GitAdapter({ + result = new GitRepository({ ...config.get(`recorder.${recordType}.storage.git`), path: path.resolve(__dirname, '../', config.get(`recorder.${recordType}.storage.git.path`)), }); break; case 'mongo': - result = new MongoAdapter(config.get(`recorder.${recordType}.storage.mongo`)); + result = new MongoRepository(config.get(`recorder.${recordType}.storage.mongo`)); break; default: - throw new Error(`No configuration found for ${recordType} storage adapter`); + throw new Error(`No configuration found for ${recordType} storage repository`); } return result; diff --git a/src/main.js b/src/main.js index c5bbb5a9f..366b57418 100644 --- a/src/main.js +++ b/src/main.js @@ -5,7 +5,7 @@ import logger from './logger/index.js'; import Notifier from './notifier/index.js'; import Tracker from './tracker/index.js'; -import { instantiateVersionsStorageAdapter, instantiateSnapshotsStorageAdapter } from './index.js'; +import { instantiateVersionsRepository, instantiateSnapshotsRepository } from './index.js'; const args = process.argv.slice(2); const refilterOnly = args.includes('--refilter-only'); @@ -15,8 +15,8 @@ const extraArgs = args.filter(arg => !arg.startsWith('--')); (async function startOpenTermsArchive() { const archivist = new Archivist({ storage: { - versions: instantiateVersionsStorageAdapter(), - snapshots: instantiateSnapshotsStorageAdapter(), + versions: instantiateVersionsRepository(), + snapshots: instantiateSnapshotsRepository(), }, }); diff --git a/src/storage-adapters/git/dataMapper.js b/src/repositories/git/dataMapper.js similarity index 90% rename from src/storage-adapters/git/dataMapper.js rename to src/repositories/git/dataMapper.js index c18457aac..437c1dfba 100644 --- a/src/storage-adapters/git/dataMapper.js +++ b/src/repositories/git/dataMapper.js @@ -13,14 +13,14 @@ export const COMMIT_MESSAGE_PREFIX = { export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); export default class GitDataMapper { - constructor({ adapter, prefixMessageToSnapshotId }) { - this.adapter = adapter; + constructor({ repository, prefixMessageToSnapshotId }) { + this.repository = repository; this.prefixMessageToSnapshotId = prefixMessageToSnapshotId; } async toPersistence(record) { if (!record.content) { - await this.adapter.loadRecordContent(record); + await this.repository.loadRecordContent(record); } const { serviceId, documentType, isRefilter, snapshotId, mimeType, isFirstRecord } = record; @@ -67,14 +67,14 @@ export default class GitDataMapper { isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), snapshotId: snapshotIdMatch && snapshotIdMatch[0], - adapter: this.adapter, + repository: this.repository, }); if (lazyLoadContent) { return record; } - await this.adapter.loadRecordContent(record); + await this.repository.loadRecordContent(record); return record; } diff --git a/src/storage-adapters/git/git.js b/src/repositories/git/git.js similarity index 100% rename from src/storage-adapters/git/git.js rename to src/repositories/git/git.js diff --git a/src/storage-adapters/git/index.js b/src/repositories/git/index.js similarity index 97% rename from src/storage-adapters/git/index.js rename to src/repositories/git/index.js index 27aeead37..615f4ce84 100644 --- a/src/storage-adapters/git/index.js +++ b/src/repositories/git/index.js @@ -16,12 +16,12 @@ const PDF_MIME_TYPE = 'application/pdf'; mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown` -export default class GitAdapter { +export default class GitRepository { constructor({ path, author, publish, prefixMessageToSnapshotId }) { this.path = path; this.needsPublication = publish; this.git = new Git({ path: this.path, author }); - this.dataMapper = new DataMapper({ adapter: this, prefixMessageToSnapshotId }); + this.dataMapper = new DataMapper({ repository: this, prefixMessageToSnapshotId }); } async initialize() { diff --git a/src/storage-adapters/git/index.test.js b/src/repositories/git/index.test.js similarity index 99% rename from src/storage-adapters/git/index.test.js rename to src/repositories/git/index.test.js index a5979a171..78087d5f2 100644 --- a/src/storage-adapters/git/index.test.js +++ b/src/repositories/git/index.test.js @@ -10,7 +10,7 @@ import Record from '../record.js'; import Git from './git.js'; -import GitAdapter from './index.js'; +import GitRepository from './index.js'; const { expect } = chai; @@ -32,7 +32,7 @@ const PDF_MIME_TYPE = 'application/pdf'; let git; -describe('GitAdapter', () => { +describe('GitRepository', () => { let subject; before(async () => { @@ -46,7 +46,7 @@ describe('GitAdapter', () => { await git.initialize(); - subject = new GitAdapter({ + subject = new GitRepository({ ...config.get('recorder.versions.storage.git'), path: RECORDER_PATH, }); diff --git a/src/storage-adapters/mongo/dataMapper.js b/src/repositories/mongo/dataMapper.js similarity index 89% rename from src/storage-adapters/mongo/dataMapper.js rename to src/repositories/mongo/dataMapper.js index 08495a922..ca37122a3 100644 --- a/src/storage-adapters/mongo/dataMapper.js +++ b/src/repositories/mongo/dataMapper.js @@ -3,13 +3,13 @@ import { ObjectId } from 'mongodb'; import Record from '../record.js'; export default class DataMapper { - constructor({ adapter }) { - this.adapter = adapter; + constructor({ repository }) { + this.repository = repository; } async toPersistence(record) { if (record.content === undefined || record.content === null) { - await this.adapter.loadRecordContent(record); + await this.repository.loadRecordContent(record); } const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; @@ -57,7 +57,7 @@ export default class DataMapper { return record; } - await this.adapter.loadRecordContent(record); + await this.repository.loadRecordContent(record); return record; } diff --git a/src/storage-adapters/mongo/index.js b/src/repositories/mongo/index.js similarity index 96% rename from src/storage-adapters/mongo/index.js rename to src/repositories/mongo/index.js index da4150952..758558d52 100644 --- a/src/storage-adapters/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -7,14 +7,14 @@ import { MongoClient, ObjectId, Binary } from 'mongodb'; import DataMapper from './dataMapper.js'; -export default class MongoAdapter { +export default class MongoRepository { constructor({ database: databaseName, collection: collectionName, connectionURI }) { const client = new MongoClient(connectionURI); this.databaseName = databaseName; this.collectionName = collectionName; this.client = client; - this.dataMapper = new DataMapper({ adapter: this }); + this.dataMapper = new DataMapper({ repository: this }); } async initialize() { diff --git a/src/storage-adapters/mongo/index.test.js b/src/repositories/mongo/index.test.js similarity index 99% rename from src/storage-adapters/mongo/index.test.js rename to src/repositories/mongo/index.test.js index 27aaeb1ce..8cd74d480 100644 --- a/src/storage-adapters/mongo/index.test.js +++ b/src/repositories/mongo/index.test.js @@ -8,7 +8,7 @@ import { MongoClient } from 'mongodb'; import Record from '../record.js'; -import MongoAdapter from './index.js'; +import MongoRepository from './index.js'; const { expect } = chai; const __dirname = path.dirname(fileURLToPath(import.meta.url)); @@ -30,11 +30,11 @@ const PDF_MIME_TYPE = 'application/pdf'; let collection; -describe('MongoAdapter', () => { +describe('MongoRepository', () => { let subject; before(async () => { - subject = new MongoAdapter(config.get('recorder.snapshots.storage.mongo')); + subject = new MongoRepository(config.get('recorder.snapshots.storage.mongo')); await subject.initialize(); await client.connect(); const db = client.db(config.get('recorder.snapshots.storage.mongo.database')); diff --git a/src/storage-adapters/record.js b/src/repositories/record.js similarity index 100% rename from src/storage-adapters/record.js rename to src/repositories/record.js From 270ede0a03859c35606c8425386d66a7af6210d0 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 16:50:19 +0200 Subject: [PATCH 34/74] Update scripts --- scripts/history/migrate-services.js | 8 ++++---- scripts/history/update-to-full-hash.js | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index 8efc471bb..084caba60 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -92,14 +92,14 @@ const COUNTERS = { await initialize(migration); - const fromSnapshotsRecords = await (await migration.from.snapshots.source.getAll()); - const toSnapshotsRecords = await (await migration.to.snapshots.source.getAll()); + const fromSnapshotsRecords = await (await migration.from.snapshots.source.findAll({ lazyLoadContent: true })); + const toSnapshotsRecords = await (await migration.to.snapshots.source.findAll({ lazyLoadContent: true })); const snapshotsToMigrate = fromSnapshotsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); const fromSnapshotsRecordsToRewrite = fromSnapshotsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); const toSnapshotsRecordsMigrated = [ ...toSnapshotsRecords, ...snapshotsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); - const fromVersionsRecords = await (await migration.from.versions.source.getAll()); - const toVersionsRecords = await (await migration.to.versions.source.getAll()); + const fromVersionsRecords = await (await migration.from.versions.source.findAll({ lazyLoadContent: true })); + const toVersionsRecords = await (await migration.to.versions.source.findAll({ lazyLoadContent: true })); const versionsToMigrate = fromVersionsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); const fromVersionsRecordsToRewrite = fromVersionsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); const toVersionsRecordsMigrated = [ ...toVersionsRecords, ...versionsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); diff --git a/scripts/history/update-to-full-hash.js b/scripts/history/update-to-full-hash.js index d24dc5699..c09a28987 100644 --- a/scripts/history/update-to-full-hash.js +++ b/scripts/history/update-to-full-hash.js @@ -42,7 +42,7 @@ const ROOT_PATH = path.resolve(__dirname, '../../'); for await (const record of versionsRepository.iterate()) { const fullSnapshotId = await snapshotsRepository.git.getFullHash(record.snapshotId); - const { id: recordId } = await versionsTargetRepository.record({ ...record, snapshotId: fullSnapshotId }); + const { id: recordId } = await versionsTargetRepository.save({ ...record, snapshotId: fullSnapshotId }); if (!recordId) { logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.documentType, id: record.id, current, total }); From 4a0a4ca9c4b8e58a5740ac6ced22d467293f0cb5 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 14:11:41 +0200 Subject: [PATCH 35/74] Improve naming --- scripts/history/migrate-services.js | 8 ++++---- src/repositories/git/dataMapper.js | 6 ++---- src/repositories/git/index.js | 16 ++++++++-------- src/repositories/mongo/dataMapper.js | 16 ++++++++-------- src/repositories/mongo/index.js | 26 +++++++++++++------------- 5 files changed, 35 insertions(+), 37 deletions(-) diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index 084caba60..d42226adc 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -92,14 +92,14 @@ const COUNTERS = { await initialize(migration); - const fromSnapshotsRecords = await (await migration.from.snapshots.source.findAll({ lazyLoadContent: true })); - const toSnapshotsRecords = await (await migration.to.snapshots.source.findAll({ lazyLoadContent: true })); + const fromSnapshotsRecords = await (await migration.from.snapshots.source.findAll({ deferContentLoading: true })); + const toSnapshotsRecords = await (await migration.to.snapshots.source.findAll({ deferContentLoading: true })); const snapshotsToMigrate = fromSnapshotsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); const fromSnapshotsRecordsToRewrite = fromSnapshotsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); const toSnapshotsRecordsMigrated = [ ...toSnapshotsRecords, ...snapshotsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); - const fromVersionsRecords = await (await migration.from.versions.source.findAll({ lazyLoadContent: true })); - const toVersionsRecords = await (await migration.to.versions.source.findAll({ lazyLoadContent: true })); + const fromVersionsRecords = await (await migration.from.versions.source.findAll({ deferContentLoading: true })); + const toVersionsRecords = await (await migration.to.versions.source.findAll({ deferContentLoading: true })); const versionsToMigrate = fromVersionsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); const fromVersionsRecordsToRewrite = fromVersionsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); const toVersionsRecordsMigrated = [ ...toVersionsRecords, ...versionsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); diff --git a/src/repositories/git/dataMapper.js b/src/repositories/git/dataMapper.js index 437c1dfba..c094a47a6 100644 --- a/src/repositories/git/dataMapper.js +++ b/src/repositories/git/dataMapper.js @@ -11,7 +11,6 @@ export const COMMIT_MESSAGE_PREFIX = { }; export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); - export default class GitDataMapper { constructor({ repository, prefixMessageToSnapshotId }) { this.repository = repository; @@ -42,7 +41,7 @@ export default class GitDataMapper { }; } - async toDomain(commit, { lazyLoadContent } = {}) { + async toDomain(commit, { deferContentLoading } = {}) { if (!commit) { return {}; } @@ -67,10 +66,9 @@ export default class GitDataMapper { isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), snapshotId: snapshotIdMatch && snapshotIdMatch[0], - repository: this.repository, }); - if (lazyLoadContent) { + if (deferContentLoading) { return record; } diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index 615f4ce84..dfe322246 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -58,20 +58,20 @@ export default class GitRepository { return this.git.pushChanges(); } - async findLatestByServiceIdAndDocumentType(serviceId, documentType, { lazyLoadContent } = {}) { + async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { const [commit] = await this.git.getCommit([`${serviceId}/${documentType}.*`]); - return this.dataMapper.toDomain(commit, { lazyLoadContent }); + return this.dataMapper.toDomain(commit, { deferContentLoading }); } - async findById(recordId, { lazyLoadContent } = {}) { + async findById(recordId, { deferContentLoading } = {}) { const [commit] = await this.git.getCommit([recordId]); - return this.dataMapper.toDomain(commit, { lazyLoadContent }); + return this.dataMapper.toDomain(commit, { deferContentLoading }); } - async findAll({ lazyLoadContent } = {}) { - return Promise.all((await this.#getSortedRecordsRelatedCommits()).map(commit => this.dataMapper.toDomain(commit, { lazyLoadContent }))); + async findAll({ deferContentLoading } = {}) { + return Promise.all((await this.#getSortedRecordsRelatedCommits()).map(commit => this.dataMapper.toDomain(commit, { deferContentLoading }))); } async count() { @@ -82,11 +82,11 @@ export default class GitRepository { ])).length; } - async* iterate({ lazyLoadContent } = {}) { + async* iterate({ deferContentLoading } = {}) { const commits = await this.#getSortedRecordsRelatedCommits(); for (const commit of commits) { - yield this.dataMapper.toDomain(commit, { lazyLoadContent }); + yield this.dataMapper.toDomain(commit, { deferContentLoading }); } } diff --git a/src/repositories/mongo/dataMapper.js b/src/repositories/mongo/dataMapper.js index ca37122a3..6763f0401 100644 --- a/src/repositories/mongo/dataMapper.js +++ b/src/repositories/mongo/dataMapper.js @@ -14,7 +14,7 @@ export default class DataMapper { const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; - const recordProperties = Object.fromEntries(Object.entries({ + const documentFields = Object.fromEntries(Object.entries({ serviceId, documentType, content, @@ -25,17 +25,17 @@ export default class DataMapper { isFirstRecord, }).filter(([ , value ]) => value)); // Remove empty values - if (recordProperties.snapshotId) { - recordProperties.snapshotId = new ObjectId(snapshotId); + if (documentFields.snapshotId) { + documentFields.snapshotId = new ObjectId(snapshotId); } - recordProperties.content = record.content; - recordProperties.created_at = new Date(); + documentFields.content = record.content; + documentFields.created_at = new Date(); - return recordProperties; + return documentFields; } - async toDomain(document, { lazyLoadContent = false } = {}) { + async toDomain(document, { deferContentLoading = false } = {}) { if (!document || !document._id) { return {}; } @@ -53,7 +53,7 @@ export default class DataMapper { snapshotId: snapshotId && snapshotId.toString(), }); - if (lazyLoadContent) { + if (deferContentLoading) { return record; } diff --git a/src/repositories/mongo/index.js b/src/repositories/mongo/index.js index 758558d52..5f56d2a22 100644 --- a/src/repositories/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -33,54 +33,54 @@ export default class MongoRepository { async save(record) { const { serviceId, documentType } = record; - const previousRecord = await this.findLatestByServiceIdAndDocumentType(serviceId, documentType); - if (record.isFirstRecord === undefined || record.isFirstRecord === null) { record.isFirstRecord = !await this.collection.findOne({ serviceId, documentType }); } - const documentProperties = await this.dataMapper.toPersistence(record); + const documentFields = await this.dataMapper.toPersistence(record); + + const { content: previousRecordContent } = await this.findLatestByServiceIdAndDocumentType(serviceId, documentType); - if (previousRecord && previousRecord.content == documentProperties.content) { + if (previousRecordContent == documentFields.content) { return {}; } - const insertResult = await this.collection.insertOne(documentProperties); + const insertResult = await this.collection.insertOne(documentFields); record.id = insertResult.insertedId.toString(); return record; } - async findLatestByServiceIdAndDocumentType(serviceId, documentType, { lazyLoadContent } = {}) { + async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find` - return this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }); + return this.dataMapper.toDomain(mongoDocument, { deferContentLoading }); } - async findById(recordId, { lazyLoadContent } = {}) { + async findById(recordId, { deferContentLoading } = {}) { const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) }); - return this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }); + return this.dataMapper.toDomain(mongoDocument, { deferContentLoading }); } - async findAll({ lazyLoadContent } = {}) { + async findAll({ deferContentLoading } = {}) { return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) - .map(mongoDocument => this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }))); + .map(mongoDocument => this.dataMapper.toDomain(mongoDocument, { deferContentLoading }))); } async count() { return this.collection.find().count(); } - async* iterate({ lazyLoadContent } = {}) { + async* iterate({ deferContentLoading } = {}) { const cursor = this.collection.find().sort({ fetchDate: 1 }); /* eslint-disable no-await-in-loop */ while (await cursor.hasNext()) { const mongoDocument = await cursor.next(); - yield this.dataMapper.toDomain(mongoDocument, { lazyLoadContent }); + yield this.dataMapper.toDomain(mongoDocument, { deferContentLoading }); } /* eslint-enable no-await-in-loop */ } From b47a08464e63c1b872c9c9c096f1f555f9429b0a Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 14:11:41 +0200 Subject: [PATCH 36/74] Minor doc update --- src/repositories/git/index.js | 2 +- src/repositories/mongo/index.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index dfe322246..988e92a91 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -1,5 +1,5 @@ /** - * This file is the boundary beyond which the usage of git is abstracted. + * This module is the boundary beyond which the usage of git is abstracted. * Commit SHAs are used as opaque unique IDs. */ diff --git a/src/repositories/mongo/index.js b/src/repositories/mongo/index.js index 5f56d2a22..be2748863 100644 --- a/src/repositories/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -1,5 +1,5 @@ /** - * This file is the boundary beyond which the usage of MongoDB is abstracted. + * This module is the boundary beyond which the usage of MongoDB is abstracted. * Object IDs are used as opaque unique IDs. */ From 299e6c5d5a7f59a164f8bab7ba045daf891e1a00 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 14:11:41 +0200 Subject: [PATCH 37/74] Ensure markdown extension is .md --- src/repositories/git/dataMapper.js | 2 ++ 1 file changed, 2 insertions(+) diff --git a/src/repositories/git/dataMapper.js b/src/repositories/git/dataMapper.js index c094a47a6..6ab0f0a35 100644 --- a/src/repositories/git/dataMapper.js +++ b/src/repositories/git/dataMapper.js @@ -4,6 +4,8 @@ import mime from 'mime'; import Record from '../record.js'; +mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown` + export const COMMIT_MESSAGE_PREFIX = { startTracking: 'Start tracking', refilter: 'Refilter', From a14715aeb64149d195bdef311ac704dc4af73c37 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 14:11:41 +0200 Subject: [PATCH 38/74] Create interface for repositories --- src/repositories/git/index.js | 5 +- src/repositories/mongo/index.js | 5 +- src/repositories/repositoryInterface.js | 105 ++++++++++++++++++++++++ 3 files changed, 113 insertions(+), 2 deletions(-) create mode 100644 src/repositories/repositoryInterface.js diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index 988e92a91..4aaea9855 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -8,6 +8,8 @@ import path from 'path'; import mime from 'mime'; +import RepositoryInterface from '../repositoryInterface.js'; + import DataMapper, { COMMIT_MESSAGE_PREFIXES_REGEXP, COMMIT_MESSAGE_PREFIX } from './dataMapper.js'; import Git from './git.js'; @@ -16,8 +18,9 @@ const PDF_MIME_TYPE = 'application/pdf'; mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown` -export default class GitRepository { +export default class GitRepository extends RepositoryInterface { constructor({ path, author, publish, prefixMessageToSnapshotId }) { + super(); this.path = path; this.needsPublication = publish; this.git = new Git({ path: this.path, author }); diff --git a/src/repositories/mongo/index.js b/src/repositories/mongo/index.js index be2748863..647e68f3a 100644 --- a/src/repositories/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -5,10 +5,13 @@ import { MongoClient, ObjectId, Binary } from 'mongodb'; +import RepositoryInterface from '../repositoryInterface.js'; + import DataMapper from './dataMapper.js'; -export default class MongoRepository { +export default class MongoRepository extends RepositoryInterface { constructor({ database: databaseName, collection: collectionName, connectionURI }) { + super(); const client = new MongoClient(connectionURI); this.databaseName = databaseName; diff --git a/src/repositories/repositoryInterface.js b/src/repositories/repositoryInterface.js new file mode 100644 index 000000000..5deb62bb9 --- /dev/null +++ b/src/repositories/repositoryInterface.js @@ -0,0 +1,105 @@ +/* eslint-disable no-unused-vars, require-yield, class-methods-use-this, no-empty-function */ + +/** + * Interface for classes that represent a repository and act like a collection of domain objects with querying capabilities + * + * @interface + */ +export default class RepositoryInterface { + /** + * [Optional] Initialize repository + * Override this function if the repository needs some asynchronous initialization code (open DB connection and create collections, initializate git repository, …) + * + * @returns {Promise} Promise that will be resolved with the current repository + */ + async initialize() {} + + /** + * [Optional] Finalize repository + * Override this function if the repository needs some asynchronous code to properly close the repository (close DB connection, push changes on git remote, …) + * + * @returns {Promise} Promise that will be resolved with the current repository + */ + async finalize() {} + + /** + * Save the given record if it's not already exist + * + * @param {Record} record - Record to save - @see {@link ./record.js} + * @returns {Promise} Promise that will be resolved with the given record updated with its recorded Id + */ + async save(record) { + throw new Error(`#save function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Find the latest record that matches given service Id and document type + * + * @param {string} serviceId - Service Id of record to find + * @param {string} documentType - Document type of record to find + * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method + * @returns {Promise} Promise that will be resolved with the found record + */ + async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { + throw new Error(`#findLatestByServiceIdAndDocumentType function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Find the record for the given record Id + * + * @param {string} recordId - Record Id of the record to find + * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method + * @returns {Promise} Promise that will be resolved with the found record + */ + async findById(recordId, { deferContentLoading } = {}) { + throw new Error(`#findById function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Find all records + * + * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method + * @returns {Promise>} Promise that will be resolved with an array of all found records + */ + async findAll({ deferContentLoading } = {}) { + throw new Error(`#findAll function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Count the number of records + * + * @returns {Promise} Promise that will be resolved with the total number of records + */ + async count() { + throw new Error(`#count function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Iterate through all records + * + * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method + * @yields {Record} Next record in the iteration + */ + async* iterate({ deferContentLoading } = {}) { + throw new Error(`#iterate function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Remove all records + * + * @returns {Promise} Promise that will be resolved when all records are removed + */ + async removeAll() { + throw new Error(`#removeAll function is not yet implemented in ${this.constructor.name}`); + } + + /** + * Load content in the given record + * + * @param {Record} record - Record to load content - @see {@link ./record.js} + * @returns {Promise} Promise that will be resolved when the content will be loaded + */ + async loadRecordContent(record) { + throw new Error(`#loadRecordContent function is not yet implemented in ${this.constructor.name}`); + } +} From 7bac4cc3d37a1ae3e734a078860195685c5dc7bb Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Tue, 7 Jun 2022 17:43:14 +0200 Subject: [PATCH 39/74] Validate required params when instantiating Record --- src/repositories/git/index.test.js | 9 +++++++++ src/repositories/mongo/index.test.js | 3 +++ src/repositories/record.js | 22 ++++++++++++++++++++-- 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/src/repositories/git/index.test.js b/src/repositories/git/index.test.js index 78087d5f2..72e978f18 100644 --- a/src/repositories/git/index.test.js +++ b/src/repositories/git/index.test.js @@ -132,6 +132,8 @@ describe('GitRepository', () => { serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, + mimeType: MIME_TYPE, + fetchDate: FETCH_DATE, })); numberOfRecordsBefore = (await git.log()).length; @@ -172,6 +174,7 @@ describe('GitRepository', () => { documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, + fetchDate: FETCH_DATE, })); numberOfRecordsBefore = (await git.log()).length; @@ -181,6 +184,7 @@ describe('GitRepository', () => { documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, + fetchDate: FETCH_DATE, }))); numberOfRecordsAfter = (await git.log()).length; @@ -205,6 +209,8 @@ describe('GitRepository', () => { serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, + mimeType: MIME_TYPE, + fetchDate: FETCH_DATE_EARLIER, })); // A refilter cannot be the first record numberOfRecordsBefore = (await git.log()).length; @@ -448,6 +454,7 @@ describe('GitRepository', () => { documentType: DOCUMENT_TYPE, content: CONTENT, mimeType: MIME_TYPE, + fetchDate: FETCH_DATE_EARLIER, })); ({ id: lastSnapshotId } = await subject.save(new Record({ @@ -455,6 +462,7 @@ describe('GitRepository', () => { documentType: DOCUMENT_TYPE, content: UPDATED_FILE_CONTENT, mimeType: MIME_TYPE, + fetchDate: FETCH_DATE, }))); latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); @@ -482,6 +490,7 @@ describe('GitRepository', () => { documentType: DOCUMENT_TYPE, content: PDF_CONTENT, mimeType: PDF_MIME_TYPE, + fetchDate: FETCH_DATE, }))); latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); diff --git a/src/repositories/mongo/index.test.js b/src/repositories/mongo/index.test.js index 8cd74d480..3ceb1e522 100644 --- a/src/repositories/mongo/index.test.js +++ b/src/repositories/mongo/index.test.js @@ -221,6 +221,8 @@ describe('MongoRepository', () => { serviceId: SERVICE_PROVIDER_ID, documentType: DOCUMENT_TYPE, content: CONTENT, + mimeType: MIME_TYPE, + fetchDate: FETCH_DATE_EARLIER, })); // A refilter cannot be the first record numberOfRecordsBefore = await collection.find({ @@ -486,6 +488,7 @@ describe('MongoRepository', () => { documentType: DOCUMENT_TYPE, content: CONTENT, fetchDate: FETCH_DATE, + mimeType: MIME_TYPE, })); ({ id: lastSnapshotId } = await subject.save(new Record({ diff --git a/src/repositories/record.js b/src/repositories/record.js index d5f3c8391..5d3bf21d4 100644 --- a/src/repositories/record.js +++ b/src/repositories/record.js @@ -1,6 +1,11 @@ export default class Record { - constructor({ id, serviceId, documentType, mimeType, fetchDate, isFirstRecord, isRefilter, snapshotId, content }) { - this.id = id; + #REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; + + constructor(params) { + this.#validate(params); + + const { id, serviceId, documentType, mimeType, fetchDate, isFirstRecord, isRefilter, snapshotId, content } = params; + this.serviceId = serviceId; this.documentType = documentType; this.mimeType = mimeType; @@ -8,8 +13,21 @@ export default class Record { this.isFirstRecord = isFirstRecord; this.isRefilter = isRefilter; this.snapshotId = snapshotId; + + if (id) { + this.id = id; + } + if (content) { this.content = content; } } + + #validate(givenParams) { + for (const param of this.#REQUIRED_PARAMS) { + if (!Object.prototype.hasOwnProperty.call(givenParams, param)) { + throw new Error(`"${param}" is required`); + } + } + } } From 7b56f296e7c5504d6cc4ce080c6828ae6e86efef Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:03:17 +0200 Subject: [PATCH 40/74] Improve separation Move back responsibility to load record content in repository --- src/repositories/git/dataMapper.js | 27 +++--------------- src/repositories/git/index.js | 42 ++++++++++++++++++++++------ src/repositories/mongo/dataMapper.js | 22 ++------------- src/repositories/mongo/index.js | 41 +++++++++++++++++++++------ 4 files changed, 71 insertions(+), 61 deletions(-) diff --git a/src/repositories/git/dataMapper.js b/src/repositories/git/dataMapper.js index 6ab0f0a35..61e48a660 100644 --- a/src/repositories/git/dataMapper.js +++ b/src/repositories/git/dataMapper.js @@ -14,16 +14,7 @@ export const COMMIT_MESSAGE_PREFIX = { export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); export default class GitDataMapper { - constructor({ repository, prefixMessageToSnapshotId }) { - this.repository = repository; - this.prefixMessageToSnapshotId = prefixMessageToSnapshotId; - } - - async toPersistence(record) { - if (!record.content) { - await this.repository.loadRecordContent(record); - } - + static toPersistence(record, prefixMessageToSnapshotId) { const { serviceId, documentType, isRefilter, snapshotId, mimeType, isFirstRecord } = record; let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update; @@ -33,7 +24,7 @@ export default class GitDataMapper { let message = `${prefix} ${serviceId} ${documentType}`; if (snapshotId) { - message = `${message}\n\n${this.prefixMessageToSnapshotId}${snapshotId}`; + message = `${message}\n\n${prefixMessageToSnapshotId}${snapshotId}`; } return { @@ -43,11 +34,7 @@ export default class GitDataMapper { }; } - async toDomain(commit, { deferContentLoading } = {}) { - if (!commit) { - return {}; - } - + static toDomain(commit) { const { hash, date, message, body, diff } = commit; const modifiedFilesInCommit = diff.files.map(({ file }) => file); @@ -57,8 +44,8 @@ export default class GitDataMapper { } const [relativeFilePath] = modifiedFilesInCommit; - const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); + const record = new Record({ id: hash, serviceId: path.dirname(relativeFilePath), @@ -70,12 +57,6 @@ export default class GitDataMapper { snapshotId: snapshotIdMatch && snapshotIdMatch[0], }); - if (deferContentLoading) { - return record; - } - - await this.repository.loadRecordContent(record); - return record; } } diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index 4aaea9855..ccf509ac7 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -24,7 +24,7 @@ export default class GitRepository extends RepositoryInterface { this.path = path; this.needsPublication = publish; this.git = new Git({ path: this.path, author }); - this.dataMapper = new DataMapper({ repository: this, prefixMessageToSnapshotId }); + this.prefixMessageToSnapshotId = prefixMessageToSnapshotId; } async initialize() { @@ -39,7 +39,7 @@ export default class GitRepository extends RepositoryInterface { if (record.isFirstRecord === undefined || record.isFirstRecord === null) { record.isFirstRecord = await this.#isFirstRecord(serviceId, documentType); } - const { message, content, fileExtension } = await this.dataMapper.toPersistence(record); + const { message, content, fileExtension } = await this.#toPersistence(record); const filePath = await this.#writeFile({ serviceId, documentType, content, fileExtension }); const sha = await this.#commit({ filePath, message, date: fetchDate }); @@ -64,17 +64,17 @@ export default class GitRepository extends RepositoryInterface { async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { const [commit] = await this.git.getCommit([`${serviceId}/${documentType}.*`]); - return this.dataMapper.toDomain(commit, { deferContentLoading }); + return this.#toDomain(commit); } async findById(recordId, { deferContentLoading } = {}) { const [commit] = await this.git.getCommit([recordId]); - return this.dataMapper.toDomain(commit, { deferContentLoading }); + return this.#toDomain(commit); } - async findAll({ deferContentLoading } = {}) { - return Promise.all((await this.#getSortedRecordsRelatedCommits()).map(commit => this.dataMapper.toDomain(commit, { deferContentLoading }))); + async findAll() { + return Promise.all((await this.#getCommits()).map(commit => this.#toDomain(commit, { deferContentLoading: true }))); } async count() { @@ -85,11 +85,11 @@ export default class GitRepository extends RepositoryInterface { ])).length; } - async* iterate({ deferContentLoading } = {}) { - const commits = await this.#getSortedRecordsRelatedCommits(); + async* iterate() { + const commits = await this.#getCommits(); for (const commit of commits) { - yield this.dataMapper.toDomain(commit, { deferContentLoading }); + yield this.#toDomain(commit); } } @@ -176,4 +176,28 @@ export default class GitRepository extends RepositoryInterface { async #isFirstRecord(serviceId, documentType) { return !await this.#isTracked(serviceId, documentType); } + + async #toDomain(commit, { deferContentLoading } = {}) { + if (!commit) { + return Object(null); + } + + const record = DataMapper.toDomain(commit); + + if (deferContentLoading) { + return record; + } + + await this.loadRecordContent(record); + + return record; + } + + async #toPersistence(record) { + if (record.content === undefined || record.content === null) { + await this.loadRecordContent(record); + } + + return DataMapper.toPersistence(record, this.prefixMessageToSnapshotId); + } } diff --git a/src/repositories/mongo/dataMapper.js b/src/repositories/mongo/dataMapper.js index 6763f0401..c156a6cb2 100644 --- a/src/repositories/mongo/dataMapper.js +++ b/src/repositories/mongo/dataMapper.js @@ -3,15 +3,7 @@ import { ObjectId } from 'mongodb'; import Record from '../record.js'; export default class DataMapper { - constructor({ repository }) { - this.repository = repository; - } - - async toPersistence(record) { - if (record.content === undefined || record.content === null) { - await this.repository.loadRecordContent(record); - } - + static toPersistence(record) { const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; const documentFields = Object.fromEntries(Object.entries({ @@ -35,11 +27,7 @@ export default class DataMapper { return documentFields; } - async toDomain(document, { deferContentLoading = false } = {}) { - if (!document || !document._id) { - return {}; - } - + static toDomain(document) { const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; const record = new Record({ @@ -53,12 +41,6 @@ export default class DataMapper { snapshotId: snapshotId && snapshotId.toString(), }); - if (deferContentLoading) { - return record; - } - - await this.repository.loadRecordContent(record); - return record; } } diff --git a/src/repositories/mongo/index.js b/src/repositories/mongo/index.js index 647e68f3a..8a8d09178 100644 --- a/src/repositories/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -17,7 +17,6 @@ export default class MongoRepository extends RepositoryInterface { this.databaseName = databaseName; this.collectionName = collectionName; this.client = client; - this.dataMapper = new DataMapper({ repository: this }); } async initialize() { @@ -40,7 +39,7 @@ export default class MongoRepository extends RepositoryInterface { record.isFirstRecord = !await this.collection.findOne({ serviceId, documentType }); } - const documentFields = await this.dataMapper.toPersistence(record); + const documentFields = await this.#toPersistence(record); const { content: previousRecordContent } = await this.findLatestByServiceIdAndDocumentType(serviceId, documentType); @@ -58,32 +57,32 @@ export default class MongoRepository extends RepositoryInterface { async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find` - return this.dataMapper.toDomain(mongoDocument, { deferContentLoading }); + return this.#toDomain(mongoDocument); } - async findById(recordId, { deferContentLoading } = {}) { + async findById(recordId) { const mongoDocument = await this.collection.findOne({ _id: new ObjectId(recordId) }); - return this.dataMapper.toDomain(mongoDocument, { deferContentLoading }); + return this.#toDomain(mongoDocument); } - async findAll({ deferContentLoading } = {}) { + async findAll() { return Promise.all((await this.collection.find().project({ content: 0 }).sort({ fetchDate: 1 }).toArray()) - .map(mongoDocument => this.dataMapper.toDomain(mongoDocument, { deferContentLoading }))); + .map(mongoDocument => this.#toDomain(mongoDocument, { deferContentLoading: true }))); } async count() { return this.collection.find().count(); } - async* iterate({ deferContentLoading } = {}) { + async* iterate() { const cursor = this.collection.find().sort({ fetchDate: 1 }); /* eslint-disable no-await-in-loop */ while (await cursor.hasNext()) { const mongoDocument = await cursor.next(); - yield this.dataMapper.toDomain(mongoDocument, { deferContentLoading }); + yield this.#toDomain(mongoDocument); } /* eslint-enable no-await-in-loop */ } @@ -97,4 +96,28 @@ export default class MongoRepository extends RepositoryInterface { record.content = content instanceof Binary ? content.buffer : content; } + + async #toDomain(document, { deferContentLoading } = {}) { + if (!document) { + return Object(null); + } + + const record = DataMapper.toDomain(document); + + if (deferContentLoading) { + return record; + } + + await this.loadRecordContent(record); + + return record; + } + + async #toPersistence(record) { + if (record.content === undefined || record.content === null) { + await this.repository.loadRecordContent(record); + } + + return DataMapper.toPersistence(record); + } } From db8eb0d10b396393c6734fae0706b57bd1add673 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:04:17 +0200 Subject: [PATCH 41/74] Improve repository API --- .eslintrc.yaml | 7 +++ src/archivist/index.test.js | 4 +- src/archivist/recorder/index.js | 2 +- src/archivist/recorder/index.test.js | 18 +++--- src/repositories/git/index.js | 6 +- src/repositories/git/index.test.js | 8 +-- src/repositories/mongo/index.js | 4 +- src/repositories/mongo/index.test.js | 8 +-- src/repositories/repositoryInterface.js | 73 ++++++++++++------------- 9 files changed, 67 insertions(+), 63 deletions(-) diff --git a/.eslintrc.yaml b/.eslintrc.yaml index 0e6c3821f..20b5b640a 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -106,3 +106,10 @@ overrides: rules: func-names: 0 import/no-extraneous-dependencies: 0 + - files: + - src/**/*Interface.js + rules: + no-unused-vars: 0 + require-yield: 0 + class-methods-use-this: 0 + no-empty-function: 0 diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index 37bbe1879..4deafb954 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -168,8 +168,8 @@ describe('Archivist', function () { await app.initialize(); await app.trackChanges(serviceIds); - ({ id: originalSnapshotId } = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); - ({ id: firstVersionId } = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: originalSnapshotId } = await snapshotsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: firstVersionId } = await versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE)); serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH }); diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index d8bc4e025..731ac170d 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -19,7 +19,7 @@ export default class Recorder { } async getLatestSnapshot(serviceId, documentType) { - return this.snapshotsRepository.findLatestByServiceIdAndDocumentType(serviceId, documentType); + return this.snapshotsRepository.findLatest(serviceId, documentType); } async recordSnapshot({ serviceId, documentType, fetchDate, mimeType, content }) { diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index 2702bda4b..25db4f143 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -111,7 +111,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await snapshotsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => snapshotsRepository.removeAll()); @@ -149,7 +149,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await snapshotsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => snapshotsRepository.removeAll()); @@ -185,7 +185,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await snapshotsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => snapshotsRepository.removeAll()); @@ -267,7 +267,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => versionsRepository.removeAll()); @@ -307,7 +307,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => versionsRepository.removeAll()); @@ -349,7 +349,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => versionsRepository.removeAll()); @@ -432,7 +432,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => versionsRepository.removeAll()); after(async () => versionsRepository.removeAll()); @@ -472,7 +472,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => versionsRepository.removeAll()); @@ -514,7 +514,7 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatestByServiceIdAndDocumentType(SERVICE_ID, TYPE); + record = await versionsRepository.findLatest(SERVICE_ID, TYPE); }); after(async () => versionsRepository.removeAll()); diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index ccf509ac7..662f05486 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -61,14 +61,14 @@ export default class GitRepository extends RepositoryInterface { return this.git.pushChanges(); } - async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { const [commit] = await this.git.getCommit([`${serviceId}/${documentType}.*`]); + async findLatest(serviceId, documentType) { return this.#toDomain(commit); } - async findById(recordId, { deferContentLoading } = {}) { const [commit] = await this.git.getCommit([recordId]); + async findById(recordId) { return this.#toDomain(commit); } @@ -127,7 +127,7 @@ export default class GitRepository extends RepositoryInterface { await this.git.restore(relativeFilePath, record.id); // So, temporarily restore the PDF file to a specific commit pdfBuffer = await fs.readFile(`${this.path}/${relativeFilePath}`); // …read the content } finally { - await this.git.restore(relativeFilePath, 'HEAD'); // …and finally restore the file to its last state. + await this.git.restore(relativeFilePath, 'HEAD'); // …and finally restore the file to its last state } record.content = pdfBuffer; diff --git a/src/repositories/git/index.test.js b/src/repositories/git/index.test.js index 72e978f18..1ad97ee44 100644 --- a/src/repositories/git/index.test.js +++ b/src/repositories/git/index.test.js @@ -440,7 +440,7 @@ describe('GitRepository', () => { }); }); - describe('#findLatestByServiceIdAndDocumentType', () => { + describe('#findLatest', () => { context('when there are records for the given service', () => { let lastSnapshotId; let latestRecord; @@ -465,7 +465,7 @@ describe('GitRepository', () => { fetchDate: FETCH_DATE, }))); - latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); after(async () => subject.removeAll()); @@ -493,7 +493,7 @@ describe('GitRepository', () => { fetchDate: FETCH_DATE, }))); - latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); after(async () => subject.removeAll()); @@ -516,7 +516,7 @@ describe('GitRepository', () => { let latestRecord; before(async () => { - latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); it('returns an empty object', async () => { diff --git a/src/repositories/mongo/index.js b/src/repositories/mongo/index.js index 8a8d09178..d3b221f33 100644 --- a/src/repositories/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -41,7 +41,7 @@ export default class MongoRepository extends RepositoryInterface { const documentFields = await this.#toPersistence(record); - const { content: previousRecordContent } = await this.findLatestByServiceIdAndDocumentType(serviceId, documentType); + const { content: previousRecordContent } = await this.findLatest(serviceId, documentType); if (previousRecordContent == documentFields.content) { return {}; @@ -54,7 +54,7 @@ export default class MongoRepository extends RepositoryInterface { return record; } - async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { + async findLatest(serviceId, documentType) { const [mongoDocument] = await this.collection.find({ serviceId, documentType }).limit(1).sort({ fetchDate: -1 }).toArray(); // `findOne` doesn't support the `sort` method, so even for only one document use `find` return this.#toDomain(mongoDocument); diff --git a/src/repositories/mongo/index.test.js b/src/repositories/mongo/index.test.js index 3ceb1e522..658b7b32e 100644 --- a/src/repositories/mongo/index.test.js +++ b/src/repositories/mongo/index.test.js @@ -474,7 +474,7 @@ describe('MongoRepository', () => { }); }); - describe('#findLatestByServiceIdAndDocumentType', () => { + describe('#findLatest', () => { context('when there are records for the given service', () => { let lastSnapshotId; let latestRecord; @@ -499,7 +499,7 @@ describe('MongoRepository', () => { fetchDate: FETCH_DATE_LATER, }))); - latestRecord = await subject.findLatestByServiceIdAndDocumentType( + latestRecord = await subject.findLatest( SERVICE_PROVIDER_ID, DOCUMENT_TYPE, ); @@ -538,7 +538,7 @@ describe('MongoRepository', () => { fetchDate: FETCH_DATE_LATER, }))); - latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); after(async () => subject.removeAll()); @@ -563,7 +563,7 @@ describe('MongoRepository', () => { let latestRecord; before(async () => { - latestRecord = await subject.findLatestByServiceIdAndDocumentType(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); + latestRecord = await subject.findLatest(SERVICE_PROVIDER_ID, DOCUMENT_TYPE); }); it('returns an empty object', async () => { diff --git a/src/repositories/repositoryInterface.js b/src/repositories/repositoryInterface.js index 5deb62bb9..650029e2a 100644 --- a/src/repositories/repositoryInterface.js +++ b/src/repositories/repositoryInterface.js @@ -1,14 +1,12 @@ -/* eslint-disable no-unused-vars, require-yield, class-methods-use-this, no-empty-function */ - /** - * Interface for classes that represent a repository and act like a collection of domain objects with querying capabilities - * + * Interface for classes that model a collection of domain objects with querying capabilities + * @see {@link https://martinfowler.com/eaaCatalog/repository.html|Repository} * @interface */ export default class RepositoryInterface { /** * [Optional] Initialize repository - * Override this function if the repository needs some asynchronous initialization code (open DB connection and create collections, initializate git repository, …) + * Override this method if the repository needs some asynchronous initialization code (open database connection and create collections, initialize Git…) * * @returns {Promise} Promise that will be resolved with the current repository */ @@ -16,72 +14,71 @@ export default class RepositoryInterface { /** * [Optional] Finalize repository - * Override this function if the repository needs some asynchronous code to properly close the repository (close DB connection, push changes on git remote, …) + * Override this method if the repository needs some asynchronous code to properly close the repository (close database connection, push changes on Git remote…) * * @returns {Promise} Promise that will be resolved with the current repository */ async finalize() {} /** - * Save the given record if it's not already exist + * Persist the given record if it does not already exist in repository * - * @param {Record} record - Record to save - @see {@link ./record.js} - * @returns {Promise} Promise that will be resolved with the given record updated with its recorded Id + * @param {Record} record - Record to persist + * @returns {Promise} Promise that will be resolved with the given record when it has been persisted */ async save(record) { - throw new Error(`#save function is not yet implemented in ${this.constructor.name}`); + throw new Error(`#save method is not yet implemented in ${this.constructor.name}`); } /** - * Find the latest record that matches given service Id and document type + * Find the most recent record that matches the given service ID and document type * - * @param {string} serviceId - Service Id of record to find + * @param {string} serviceId - Service ID of record to find * @param {string} documentType - Document type of record to find - * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method - * @returns {Promise} Promise that will be resolved with the found record + * @returns {Promise} Promise that will be resolved with the found record or an empty object if none match the given criteria */ - async findLatestByServiceIdAndDocumentType(serviceId, documentType, { deferContentLoading } = {}) { - throw new Error(`#findLatestByServiceIdAndDocumentType function is not yet implemented in ${this.constructor.name}`); + async findLatest(serviceId, documentType) { + throw new Error(`#findLatest method is not yet implemented in ${this.constructor.name}`); } /** - * Find the record for the given record Id + * Find the record that matches the given record ID * - * @param {string} recordId - Record Id of the record to find - * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method - * @returns {Promise} Promise that will be resolved with the found record + * @param {string} recordId - Record ID of the record to find + * @returns {Promise} Promise that will be resolved with the found record or an empty object if none match the given ID */ - async findById(recordId, { deferContentLoading } = {}) { - throw new Error(`#findById function is not yet implemented in ${this.constructor.name}`); + async findById(recordId) { + throw new Error(`#findById method is not yet implemented in ${this.constructor.name}`); } /** * Find all records + * For performance reasons, the content of the records will not be loaded by default. Use #loadRecordContent to load the content of individual records * - * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method - * @returns {Promise>} Promise that will be resolved with an array of all found records + * @see RepositoryInterface#loadRecordContent + * @returns {Promise>} Promise that will be resolved with an array of all records */ - async findAll({ deferContentLoading } = {}) { - throw new Error(`#findAll function is not yet implemented in ${this.constructor.name}`); + async findAll() { + throw new Error(`#findAll method is not yet implemented in ${this.constructor.name}`); } /** - * Count the number of records + * Count the total number of records in the repository + * For performance reasons, use this method rather than counting the number of entries returned by #findAll if you only need the size of a repository * * @returns {Promise} Promise that will be resolved with the total number of records */ async count() { - throw new Error(`#count function is not yet implemented in ${this.constructor.name}`); + throw new Error(`#count method is not yet implemented in ${this.constructor.name}`); } /** - * Iterate through all records + * Iterate over all records in the repository, from oldest to most recent * - * @param {boolean} options.deferContentLoading - Enable deferred content loading to improve performance; load content later with #loadRecordContent method - * @yields {Record} Next record in the iteration + * @yields {Record} */ - async* iterate({ deferContentLoading } = {}) { - throw new Error(`#iterate function is not yet implemented in ${this.constructor.name}`); + async* iterate() { + throw new Error(`#iterate method is not yet implemented in ${this.constructor.name}`); } /** @@ -90,16 +87,16 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved when all records are removed */ async removeAll() { - throw new Error(`#removeAll function is not yet implemented in ${this.constructor.name}`); + throw new Error(`#removeAll method is not yet implemented in ${this.constructor.name}`); } /** - * Load content in the given record + * Load content of the given record * - * @param {Record} record - Record to load content - @see {@link ./record.js} - * @returns {Promise} Promise that will be resolved when the content will be loaded + * @param {Record} record - Record of which to populate content + * @returns {Promise} Promise that will be resolved with the given record when its content has been loaded */ async loadRecordContent(record) { - throw new Error(`#loadRecordContent function is not yet implemented in ${this.constructor.name}`); + throw new Error(`#loadRecordContent method is not yet implemented in ${this.constructor.name}`); } } From e288af21dc0a0a0e912b330f72893376ce462a50 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:05:25 +0200 Subject: [PATCH 42/74] Make getCommit returns only one commit --- src/repositories/git/git.js | 4 +++- src/repositories/git/index.js | 4 ++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/src/repositories/git/git.js b/src/repositories/git/git.js index 452569e4e..5d7b8a60f 100644 --- a/src/repositories/git/git.js +++ b/src/repositories/git/git.js @@ -65,7 +65,9 @@ export default class Git { } async getCommit(options) { - return this.listCommits([ '-1', ...options ]); + const [commit] = await this.listCommits([ '-1', ...options ]); + + return commit; } async log(options = {}) { diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index 662f05486..448135c89 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -61,14 +61,14 @@ export default class GitRepository extends RepositoryInterface { return this.git.pushChanges(); } - const [commit] = await this.git.getCommit([`${serviceId}/${documentType}.*`]); async findLatest(serviceId, documentType) { + const commit = await this.git.getCommit([`${serviceId}/${documentType}.*`]); return this.#toDomain(commit); } - const [commit] = await this.git.getCommit([recordId]); async findById(recordId) { + const commit = await this.git.getCommit([recordId]); return this.#toDomain(commit); } From d2f6c6725f5ea0f664bcfcaba5321875f03a8fbb Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:05:42 +0200 Subject: [PATCH 43/74] Improve function name --- src/repositories/git/index.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index 448135c89..1f602414b 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -133,7 +133,7 @@ export default class GitRepository extends RepositoryInterface { record.content = pdfBuffer; } - async #getSortedRecordsRelatedCommits() { + async #getCommits() { return (await this.git.listCommits()) .filter(({ message }) => message.match(COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order From 5d823dada693c0dc5be7f4f2735fe0555da97d3e Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:07:19 +0200 Subject: [PATCH 44/74] Improve returning null object --- src/repositories/git/index.js | 2 +- src/repositories/mongo/index.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/repositories/git/index.js b/src/repositories/git/index.js index 1f602414b..fa3e8e24c 100644 --- a/src/repositories/git/index.js +++ b/src/repositories/git/index.js @@ -45,7 +45,7 @@ export default class GitRepository extends RepositoryInterface { const sha = await this.#commit({ filePath, message, date: fetchDate }); if (!sha) { - return {}; + return Object(null); } record.id = sha; diff --git a/src/repositories/mongo/index.js b/src/repositories/mongo/index.js index d3b221f33..fef17a5bc 100644 --- a/src/repositories/mongo/index.js +++ b/src/repositories/mongo/index.js @@ -44,7 +44,7 @@ export default class MongoRepository extends RepositoryInterface { const { content: previousRecordContent } = await this.findLatest(serviceId, documentType); if (previousRecordContent == documentFields.content) { - return {}; + return Object(null); } const insertResult = await this.collection.insertOne(documentFields); From e1ac3310f99e0f8f8ce23de14d6f9a81eede813a Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:08:21 +0200 Subject: [PATCH 45/74] Update scripts --- scripts/dataset/export/index.js | 2 +- scripts/history/migrate-services.js | 10 +++++----- scripts/history/update-to-full-hash.js | 4 +++- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/scripts/dataset/export/index.js b/scripts/dataset/export/index.js index 7598aba0f..491c52e43 100644 --- a/scripts/dataset/export/index.js +++ b/scripts/dataset/export/index.js @@ -47,7 +47,7 @@ export default async function generate({ archivePath, releaseDate }) { logger.info({ message: versionPath, counter: index, hash: version.id }); archive.stream.append( - await content, + content, { name: `${archive.basename}/${versionPath}` }, ); index++; diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index d42226adc..f9843f45f 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -92,14 +92,14 @@ const COUNTERS = { await initialize(migration); - const fromSnapshotsRecords = await (await migration.from.snapshots.source.findAll({ deferContentLoading: true })); - const toSnapshotsRecords = await (await migration.to.snapshots.source.findAll({ deferContentLoading: true })); + const fromSnapshotsRecords = await migration.from.snapshots.source.findAll(); + const toSnapshotsRecords = await migration.to.snapshots.source.findAll(); const snapshotsToMigrate = fromSnapshotsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); const fromSnapshotsRecordsToRewrite = fromSnapshotsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); const toSnapshotsRecordsMigrated = [ ...toSnapshotsRecords, ...snapshotsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); - const fromVersionsRecords = await (await migration.from.versions.source.findAll({ deferContentLoading: true })); - const toVersionsRecords = await (await migration.to.versions.source.findAll({ deferContentLoading: true })); + const fromVersionsRecords = await migration.from.versions.source.findAll(); + const toVersionsRecords = await migration.to.versions.source.findAll(); const versionsToMigrate = fromVersionsRecords.filter(({ serviceId }) => migration.services.includes(serviceId)); const fromVersionsRecordsToRewrite = fromVersionsRecords.filter(({ serviceId }) => !migration.services.includes(serviceId)); const toVersionsRecordsMigrated = [ ...toVersionsRecords, ...versionsToMigrate ].sort((recordA, recordB) => new Date(recordA.fetchDate) - new Date(recordB.fetchDate)); @@ -121,7 +121,7 @@ const COUNTERS = { await fs.writeFile(path.join(__dirname, 'ids-mapping.json'), JSON.stringify(idsMapping, null, 4)); - console.log('Snapshots-migrated'); + console.log('Snapshots migrated\n'); await Promise.all([ rewriteVersions(migration.from.versions.destination, fromVersionsRecordsToRewrite, idsMapping, migration.from.versions.logger), diff --git a/scripts/history/update-to-full-hash.js b/scripts/history/update-to-full-hash.js index c09a28987..4f8652ccc 100644 --- a/scripts/history/update-to-full-hash.js +++ b/scripts/history/update-to-full-hash.js @@ -42,7 +42,9 @@ const ROOT_PATH = path.resolve(__dirname, '../../'); for await (const record of versionsRepository.iterate()) { const fullSnapshotId = await snapshotsRepository.git.getFullHash(record.snapshotId); - const { id: recordId } = await versionsTargetRepository.save({ ...record, snapshotId: fullSnapshotId }); + record.snapshotId = fullSnapshotId; + + const { id: recordId } = await versionsTargetRepository.save(record); if (!recordId) { logger.warn({ message: 'Record skipped', serviceId: record.serviceId, type: record.documentType, id: record.id, current, total }); From f728c661c544caf088df4bfb11ccc0c70d8ced32 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 8 Jun 2022 18:15:01 +0200 Subject: [PATCH 46/74] Move repositories in recorder module --- scripts/dataset/export/index.test.js | 4 ++-- scripts/history/migrate-services.js | 2 +- scripts/history/update-to-full-hash.js | 2 +- scripts/import/index.js | 2 +- scripts/import/loadCommits.js | 2 +- scripts/rewrite/initializer/index.js | 2 +- scripts/rewrite/rewrite-snapshots.js | 4 ++-- scripts/rewrite/rewrite-versions.js | 4 ++-- src/archivist/index.test.js | 4 ++-- src/archivist/recorder/index.js | 2 +- src/archivist/recorder/index.test.js | 4 ++-- src/{ => archivist/recorder}/repositories/git/dataMapper.js | 0 src/{ => archivist/recorder}/repositories/git/git.js | 0 src/{ => archivist/recorder}/repositories/git/index.js | 0 src/{ => archivist/recorder}/repositories/git/index.test.js | 2 +- src/{ => archivist/recorder}/repositories/mongo/dataMapper.js | 0 src/{ => archivist/recorder}/repositories/mongo/index.js | 0 src/{ => archivist/recorder}/repositories/mongo/index.test.js | 4 ++-- src/{ => archivist/recorder}/repositories/record.js | 0 .../recorder}/repositories/repositoryInterface.js | 0 src/index.js | 4 ++-- 21 files changed, 21 insertions(+), 21 deletions(-) rename src/{ => archivist/recorder}/repositories/git/dataMapper.js (100%) rename src/{ => archivist/recorder}/repositories/git/git.js (100%) rename src/{ => archivist/recorder}/repositories/git/index.js (100%) rename src/{ => archivist/recorder}/repositories/git/index.test.js (99%) rename src/{ => archivist/recorder}/repositories/mongo/dataMapper.js (100%) rename src/{ => archivist/recorder}/repositories/mongo/index.js (100%) rename src/{ => archivist/recorder}/repositories/mongo/index.test.js (99%) rename src/{ => archivist/recorder}/repositories/record.js (100%) rename src/{ => archivist/recorder}/repositories/repositoryInterface.js (100%) diff --git a/scripts/dataset/export/index.test.js b/scripts/dataset/export/index.test.js index 7c4b55d36..341241233 100644 --- a/scripts/dataset/export/index.test.js +++ b/scripts/dataset/export/index.test.js @@ -8,8 +8,8 @@ import dircompare from 'dir-compare'; import mime from 'mime'; import StreamZip from 'node-stream-zip'; -import GitRepository from '../../../src/repositories/git/index.js'; -import Record from '../../../src/repositories/record.js'; +import GitRepository from '../../../src/archivist/recorder/repositories/git/index.js'; +import Record from '../../../src/archivist/recorder/repositories/record.js'; import generateArchive from './index.js'; diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index f9843f45f..7f9a2ad66 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -5,7 +5,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; import winston from 'winston'; -import GitRepository from '../../src/repositories/git/index.js'; +import GitRepository from '../../src/archivist/recorder/repositories/git/index.js'; import { format } from './logger/index.js'; import { importReadme } from './utils/index.js'; diff --git a/scripts/history/update-to-full-hash.js b/scripts/history/update-to-full-hash.js index 4f8652ccc..ef481372f 100644 --- a/scripts/history/update-to-full-hash.js +++ b/scripts/history/update-to-full-hash.js @@ -3,7 +3,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; -import GitRepository from '../../src/repositories/git/index.js'; +import GitRepository from '../../src/archivist/recorder/repositories/git/index.js'; import logger from './logger/index.js'; import { importReadme } from './utils/index.js'; diff --git a/scripts/import/index.js b/scripts/import/index.js index 0513f45d9..1646c5104 100644 --- a/scripts/import/index.js +++ b/scripts/import/index.js @@ -8,7 +8,7 @@ import mime from 'mime'; import { MongoClient } from 'mongodb'; import nodeFetch from 'node-fetch'; -import Git from '../../src/repositories/git/git.js'; +import Git from '../../src/archivist/recorder/repositories/git/git.js'; import * as renamer from '../utils/renamer/index.js'; import logger from './logger/index.js'; diff --git a/scripts/import/loadCommits.js b/scripts/import/loadCommits.js index 415e2f197..4718b51b9 100644 --- a/scripts/import/loadCommits.js +++ b/scripts/import/loadCommits.js @@ -5,7 +5,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; import { MongoClient } from 'mongodb'; -import Git from '../../src/repositories/git/git.js'; +import Git from '../../src/archivist/recorder/repositories/git/git.js'; import logger from './logger/index.js'; diff --git a/scripts/rewrite/initializer/index.js b/scripts/rewrite/initializer/index.js index fa0824137..00a602f28 100644 --- a/scripts/rewrite/initializer/index.js +++ b/scripts/rewrite/initializer/index.js @@ -4,7 +4,7 @@ import { fileURLToPath } from 'url'; import config from 'config'; -import Git from '../../../src/repositories/git/git.js'; +import Git from '../../../src/archivist/recorder/repositories/git/git.js'; import { fileExists } from '../utils.js'; const fs = fsApi.promises; diff --git a/scripts/rewrite/rewrite-snapshots.js b/scripts/rewrite/rewrite-snapshots.js index b9d7f2704..eb9cbc96f 100644 --- a/scripts/rewrite/rewrite-snapshots.js +++ b/scripts/rewrite/rewrite-snapshots.js @@ -4,8 +4,8 @@ import { fileURLToPath } from 'url'; import config from 'config'; import Recorder from '../../src/archivist/recorder/index.js'; -import Git from '../../src/repositories/git/git.js'; -import GitRepository from '../../src/repositories/git/index.js'; +import Git from '../../src/archivist/recorder/repositories/git/git.js'; +import GitRepository from '../../src/archivist/recorder/repositories/git/index.js'; import * as renamer from '../utils/renamer/index.js'; import * as initializer from './initializer/index.js'; diff --git a/scripts/rewrite/rewrite-versions.js b/scripts/rewrite/rewrite-versions.js index 8ae5fb546..8d6db49ac 100644 --- a/scripts/rewrite/rewrite-versions.js +++ b/scripts/rewrite/rewrite-versions.js @@ -6,9 +6,9 @@ import config from 'config'; import { InaccessibleContentError } from '../../src/archivist/errors.js'; import filter from '../../src/archivist/filter/index.js'; import Recorder from '../../src/archivist/recorder/index.js'; +import Git from '../../src/archivist/recorder/repositories/git/git.js'; +import GitRepository from '../../src/archivist/recorder/repositories/git/index.js'; import * as services from '../../src/archivist/services/index.js'; -import Git from '../../src/repositories/git/git.js'; -import GitRepository from '../../src/repositories/git/index.js'; import * as renamer from '../utils/renamer/index.js'; import * as initializer from './initializer/index.js'; diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index 4deafb954..1981e7e44 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -8,8 +8,8 @@ import nock from 'nock'; import sinon from 'sinon'; import sinonChai from 'sinon-chai'; -import Git from '../repositories/git/git.js'; -import GitRepository from '../repositories/git/index.js'; +import Git from './recorder/repositories/git/git.js'; +import GitRepository from './recorder/repositories/git/index.js'; import Archivist, { AVAILABLE_EVENTS } from './index.js'; diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index 731ac170d..0f2e4898c 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -1,4 +1,4 @@ -import Record from '../../repositories/record.js'; +import Record from './repositories/record.js'; export default class Recorder { constructor({ versionsRepository, snapshotsRepository }) { diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index 25db4f143..bb6451d35 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -4,8 +4,8 @@ import { fileURLToPath } from 'url'; import chai from 'chai'; import config from 'config'; -import GitRepository from '../../repositories/git/index.js'; -import MongoRepository from '../../repositories/mongo/index.js'; +import GitRepository from './repositories/git/index.js'; +import MongoRepository from './repositories/mongo/index.js'; import Recorder from './index.js'; diff --git a/src/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js similarity index 100% rename from src/repositories/git/dataMapper.js rename to src/archivist/recorder/repositories/git/dataMapper.js diff --git a/src/repositories/git/git.js b/src/archivist/recorder/repositories/git/git.js similarity index 100% rename from src/repositories/git/git.js rename to src/archivist/recorder/repositories/git/git.js diff --git a/src/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js similarity index 100% rename from src/repositories/git/index.js rename to src/archivist/recorder/repositories/git/index.js diff --git a/src/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js similarity index 99% rename from src/repositories/git/index.test.js rename to src/archivist/recorder/repositories/git/index.test.js index 1ad97ee44..3db386393 100644 --- a/src/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -27,7 +27,7 @@ const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z'); const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z'); const SNAPSHOT_ID = '513fadb2ae415c87747047e33287805d59e2dd55'; const MIME_TYPE = 'text/html'; -const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf'), { encoding: 'utf8' }); +const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/terms.pdf'), { encoding: 'utf8' }); const PDF_MIME_TYPE = 'application/pdf'; let git; diff --git a/src/repositories/mongo/dataMapper.js b/src/archivist/recorder/repositories/mongo/dataMapper.js similarity index 100% rename from src/repositories/mongo/dataMapper.js rename to src/archivist/recorder/repositories/mongo/dataMapper.js diff --git a/src/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js similarity index 100% rename from src/repositories/mongo/index.js rename to src/archivist/recorder/repositories/mongo/index.js diff --git a/src/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js similarity index 99% rename from src/repositories/mongo/index.test.js rename to src/archivist/recorder/repositories/mongo/index.test.js index 658b7b32e..1b21fbf97 100644 --- a/src/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -24,8 +24,8 @@ const FETCH_DATE = new Date('2000-01-01T12:00:00.000Z'); const FETCH_DATE_LATER = new Date('2000-01-02T12:00:00.000Z'); const FETCH_DATE_EARLIER = new Date('2000-01-01T06:00:00.000Z'); const SNAPSHOT_ID = '61af86dc5ff5caa74ae926ad'; -const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/terms.pdf')); -const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../test/fixtures/termsModified.pdf')); +const PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/terms.pdf')); +const UPDATED_PDF_CONTENT = fs.readFileSync(path.resolve(__dirname, '../../../../../test/fixtures/termsModified.pdf')); const PDF_MIME_TYPE = 'application/pdf'; let collection; diff --git a/src/repositories/record.js b/src/archivist/recorder/repositories/record.js similarity index 100% rename from src/repositories/record.js rename to src/archivist/recorder/repositories/record.js diff --git a/src/repositories/repositoryInterface.js b/src/archivist/recorder/repositories/repositoryInterface.js similarity index 100% rename from src/repositories/repositoryInterface.js rename to src/archivist/recorder/repositories/repositoryInterface.js diff --git a/src/index.js b/src/index.js index 154f278f2..95b21498c 100644 --- a/src/index.js +++ b/src/index.js @@ -3,8 +3,8 @@ import { fileURLToPath } from 'url'; import config from 'config'; -import GitRepository from './repositories/git/index.js'; -import MongoRepository from './repositories/mongo/index.js'; +import GitRepository from './archivist/recorder/repositories/git/index.js'; +import MongoRepository from './archivist/recorder/repositories/mongo/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); From 8db2db2041c4dd5d26500eeee8d65c1940dbf5c9 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 09:47:05 +0200 Subject: [PATCH 47/74] Switch to a repository factory --- scripts/dataset/export/index.js | 5 +- src/archivist/index.js | 4 +- src/archivist/index.test.js | 29 ++---- src/archivist/recorder/index.js | 11 +-- src/archivist/recorder/index.test.js | 98 +++++++------------ .../recorder/repositories/factory.js | 23 +++++ src/main.js | 10 +- 7 files changed, 75 insertions(+), 105 deletions(-) create mode 100644 src/archivist/recorder/repositories/factory.js diff --git a/scripts/dataset/export/index.js b/scripts/dataset/export/index.js index 491c52e43..b171fc867 100644 --- a/scripts/dataset/export/index.js +++ b/scripts/dataset/export/index.js @@ -3,8 +3,9 @@ import path from 'path'; import { fileURLToPath } from 'url'; import archiver from 'archiver'; +import config from 'config'; -import { instantiateVersionsRepository } from '../../../src/index.js'; +import RepositoryFactory from '../../../src/archivist/recorder/repositories/factory.js'; import * as renamer from '../../utils/renamer/index.js'; import readme from '../assets/README.template.js'; import logger from '../logger/index.js'; @@ -16,7 +17,7 @@ const fs = fsApi.promises; const ARCHIVE_FORMAT = 'zip'; // for supported formats, see https://www.archiverjs.com/docs/archive-formats export default async function generate({ archivePath, releaseDate }) { - const versionsRepository = await (instantiateVersionsRepository()).initialize(); + const versionsRepository = await (RepositoryFactory.create(config.get('recorder.versions.storage'))).initialize(); const archive = await initializeArchive(archivePath); diff --git a/src/archivist/index.js b/src/archivist/index.js index 3c1f1aad9..e36f96fd1 100644 --- a/src/archivist/index.js +++ b/src/archivist/index.js @@ -41,9 +41,9 @@ export default class Archivist extends events.EventEmitter { return Object.keys(this.services); } - constructor({ storage: { versions, snapshots } }) { + constructor(config) { super(); - this.recorder = new Recorder({ versionsRepository: versions, snapshotsRepository: snapshots }); + this.recorder = new Recorder(config.get('recorder')); } async initialize() { diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index 1981e7e44..ae8f68a7d 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -9,7 +9,6 @@ import sinon from 'sinon'; import sinonChai from 'sinon-chai'; import Git from './recorder/repositories/git/git.js'; -import GitRepository from './recorder/repositories/git/index.js'; import Archivist, { AVAILABLE_EVENTS } from './index.js'; @@ -27,14 +26,12 @@ const VERSIONS_PATH = path.resolve(ROOT_PATH, config.get('recorder.versions.stor const MIME_TYPE = 'text/html'; const FETCH_DATE = new Date('2000-01-02T12:00:00.000Z'); -let snapshotsRepository; -let versionsRepository; - async function resetGitRepositories() { - return Promise.all([ snapshotsRepository.removeAll(), versionsRepository.removeAll() ]); + return Promise.all([ app.recorder.snapshotsRepository.removeAll(), app.recorder.versionsRepository.removeAll() ]); } let gitVersion; +let app; describe('Archivist', function () { this.timeout(10000); @@ -69,23 +66,13 @@ describe('Archivist', function () { serviceAVersionExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/service_A_terms.md'), { encoding: 'utf8' }); serviceBSnapshotExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/terms.pdf')); serviceBVersionExpectedContent = await fs.readFile(path.resolve(ROOT_PATH, 'test/fixtures/termsFromPDF.md'), { encoding: 'utf8' }); - snapshotsRepository = new GitRepository({ - ...config.get('recorder.snapshots.storage.git'), - path: SNAPSHOTS_PATH, - }); - versionsRepository = new GitRepository({ - ...config.get('recorder.versions.storage.git'), - path: VERSIONS_PATH, - }); }); describe('#trackChanges', () => { - let app; - before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); + app = new Archivist(config); await app.initialize(); }); @@ -163,13 +150,13 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - const app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); + app = new Archivist(config); await app.initialize(); await app.trackChanges(serviceIds); - ({ id: originalSnapshotId } = await snapshotsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE)); - ({ id: firstVersionId } = await versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: originalSnapshotId } = await app.recorder.snapshotsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE)); + ({ id: firstVersionId } = await app.recorder.versionsRepository.findLatest(SERVICE_A_ID, SERVICE_A_TYPE)); serviceBCommits = await gitVersion.log({ file: SERVICE_B_EXPECTED_VERSION_FILE_PATH }); @@ -219,7 +206,7 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - const app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); + app = new Archivist(config); await app.initialize(); await app.trackChanges(serviceIds); @@ -265,7 +252,7 @@ describe('Archivist', function () { } before(async () => { - app = new Archivist({ storage: { versions: versionsRepository, snapshots: snapshotsRepository } }); + app = new Archivist(config); await app.initialize(); AVAILABLE_EVENTS.forEach(event => { diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index 0f2e4898c..79ef64590 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -1,13 +1,10 @@ import Record from './repositories/record.js'; +import RepositoryFactory from './repositories/factory.js'; export default class Recorder { - constructor({ versionsRepository, snapshotsRepository }) { - if (!versionsRepository || !snapshotsRepository) { - throw new RangeError('Storage repositories should be defined both for versions and snapshots'); - } - - this.versionsRepository = versionsRepository; - this.snapshotsRepository = snapshotsRepository; + constructor(config) { + this.versionsRepository = RepositoryFactory.create(config.versions.storage); + this.snapshotsRepository = RepositoryFactory.create(config.snapshots.storage); } async initialize() { diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index bb6451d35..339f37e18 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -1,19 +1,8 @@ -import path from 'path'; -import { fileURLToPath } from 'url'; - import chai from 'chai'; import config from 'config'; -import GitRepository from './repositories/git/index.js'; -import MongoRepository from './repositories/mongo/index.js'; - import Recorder from './index.js'; -const __dirname = path.dirname(fileURLToPath(import.meta.url)); - -export const SNAPSHOTS_PATH = path.resolve(__dirname, '../../../', config.get('recorder.snapshots.storage.git.path')); -export const VERSIONS_PATH = path.resolve(__dirname, '../../../', config.get('recorder.versions.storage.git.path')); - const { expect } = chai; const MIME_TYPE = 'text/html'; @@ -24,25 +13,8 @@ describe('Recorder', () => { const SERVICE_ID = 'test_service'; const TYPE = 'Terms of Service'; - const repositoriesTypes = { - git: { - snapshots: new GitRepository({ - ...config.get('recorder.snapshots.storage.git'), - path: SNAPSHOTS_PATH, - }), - versions: new GitRepository({ - ...config.get('recorder.versions.storage.git'), - path: VERSIONS_PATH, - }), - }, - mongo: { - snapshots: new MongoRepository(config.get('recorder.versions.storage.mongo')), - versions: new MongoRepository(config.get('recorder.snapshots.storage.mongo')), - }, - }; - - for (const [ repositoryName, { versions: versionsRepository, snapshots: snapshotsRepository }] of Object.entries(repositoriesTypes)) { - describe(repositoryName, () => { + for (const repositoryType of [ 'git', 'mongo' ]) { + describe(repositoryType, () => { describe('#recordSnapshot', () => { const CONTENT = '

ToS fixture data with UTF-8 çhãràčtęrs

'; let recorder; @@ -51,20 +23,22 @@ describe('Recorder', () => { let record; before(async () => { - recorder = new Recorder({ - versionsRepository, - snapshotsRepository, - }); + const options = config.util.cloneDeep(config.recorder); + + options.versions.storage.type = repositoryType; + options.snapshots.storage.type = repositoryType; + + recorder = new Recorder(options); await recorder.initialize(); }); after(async () => { - await snapshotsRepository.removeAll(); + await recorder.snapshotsRepository.removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => snapshotsRepository.removeAll()); + after(async () => recorder.snapshotsRepository.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -111,10 +85,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await snapshotsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.snapshotsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => snapshotsRepository.removeAll()); + after(async () => recorder.snapshotsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -149,10 +123,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.snapshotsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => snapshotsRepository.removeAll()); + after(async () => recorder.snapshotsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -185,10 +159,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await snapshotsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.snapshotsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => snapshotsRepository.removeAll()); + after(async () => recorder.snapshotsRepository.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -205,10 +179,7 @@ describe('Recorder', () => { let record; before(async () => { - recorder = new Recorder({ - versionsRepository, - snapshotsRepository, - }); + recorder = new Recorder(config.get('recorder')); await recorder.initialize(); }); @@ -217,7 +188,7 @@ describe('Recorder', () => { }); context('when a required param is missing', () => { - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -267,10 +238,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -307,10 +278,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -349,10 +320,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; @@ -369,20 +340,17 @@ describe('Recorder', () => { let record; before(async () => { - recorder = new Recorder({ - versionsRepository, - snapshotsRepository, - }); + recorder = new Recorder(config.get('recorder')); await recorder.initialize(); }); after(async () => { - await versionsRepository.removeAll(); + await recorder.versionsRepository.removeAll(); await recorder.finalize(); }); context('when a required param is missing', () => { - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); const validParams = { serviceId: SERVICE_ID, @@ -432,10 +400,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE, })); - record = await versionsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => versionsRepository.removeAll()); after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); after(async () => recorder.versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(CONTENT); @@ -472,10 +440,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); it('records the document with the proper content', async () => { expect(await record.content).to.equal(UPDATED_CONTENT); @@ -514,10 +482,10 @@ describe('Recorder', () => { fetchDate: FETCH_DATE_LATER, })); - record = await versionsRepository.findLatest(SERVICE_ID, TYPE); + record = await recorder.versionsRepository.findLatest(SERVICE_ID, TYPE); }); - after(async () => versionsRepository.removeAll()); + after(async () => recorder.versionsRepository.removeAll()); it('does not record the document', async () => { expect(id).to.not.be.ok; diff --git a/src/archivist/recorder/repositories/factory.js b/src/archivist/recorder/repositories/factory.js new file mode 100644 index 000000000..f8463bfe9 --- /dev/null +++ b/src/archivist/recorder/repositories/factory.js @@ -0,0 +1,23 @@ +import path from 'path'; +import { fileURLToPath } from 'url'; + +import GitRepository from './git/index.js'; +import MongoRepository from './mongo/index.js'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); + +export default class RepositoryFactory { + static create(params) { + switch (params.type) { + case 'git': + return new GitRepository({ + ...params.git, + path: path.resolve(__dirname, '../../../../', params.git.path), + }); + case 'mongo': + return new MongoRepository(params.mongo); + default: + throw new Error(`Unknow storage repository configuration with type: ${params.type}`); + } + } +} diff --git a/src/main.js b/src/main.js index 366b57418..57ad60593 100644 --- a/src/main.js +++ b/src/main.js @@ -1,3 +1,4 @@ +import config from 'config'; import cron from 'croner'; import Archivist from './archivist/index.js'; @@ -5,20 +6,13 @@ import logger from './logger/index.js'; import Notifier from './notifier/index.js'; import Tracker from './tracker/index.js'; -import { instantiateVersionsRepository, instantiateSnapshotsRepository } from './index.js'; - const args = process.argv.slice(2); const refilterOnly = args.includes('--refilter-only'); const schedule = args.includes('--schedule'); const extraArgs = args.filter(arg => !arg.startsWith('--')); (async function startOpenTermsArchive() { - const archivist = new Archivist({ - storage: { - versions: instantiateVersionsRepository(), - snapshots: instantiateSnapshotsRepository(), - }, - }); + const archivist = new Archivist(config); archivist.attach(logger); From 2be75cca3c471e0276426ce085c4dec8d6ebd7b3 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 09:47:35 +0200 Subject: [PATCH 48/74] Improve recorder file structure --- .eslintrc.yaml | 2 +- scripts/dataset/export/index.test.js | 2 +- src/archivist/recorder/index.js | 2 +- src/archivist/recorder/{repositories => }/record.js | 0 src/archivist/recorder/repositories/git/dataMapper.js | 2 +- src/archivist/recorder/repositories/git/index.js | 2 +- src/archivist/recorder/repositories/git/index.test.js | 2 +- .../repositories/{repositoryInterface.js => interface.js} | 0 src/archivist/recorder/repositories/mongo/dataMapper.js | 2 +- src/archivist/recorder/repositories/mongo/index.js | 2 +- src/archivist/recorder/repositories/mongo/index.test.js | 2 +- 11 files changed, 9 insertions(+), 9 deletions(-) rename src/archivist/recorder/{repositories => }/record.js (100%) rename src/archivist/recorder/repositories/{repositoryInterface.js => interface.js} (100%) diff --git a/.eslintrc.yaml b/.eslintrc.yaml index 20b5b640a..6dac883ba 100644 --- a/.eslintrc.yaml +++ b/.eslintrc.yaml @@ -107,7 +107,7 @@ overrides: func-names: 0 import/no-extraneous-dependencies: 0 - files: - - src/**/*Interface.js + - src/**/*[iI]nterface.js rules: no-unused-vars: 0 require-yield: 0 diff --git a/scripts/dataset/export/index.test.js b/scripts/dataset/export/index.test.js index 341241233..dbb04176f 100644 --- a/scripts/dataset/export/index.test.js +++ b/scripts/dataset/export/index.test.js @@ -8,8 +8,8 @@ import dircompare from 'dir-compare'; import mime from 'mime'; import StreamZip from 'node-stream-zip'; +import Record from '../../../src/archivist/recorder/record.js'; import GitRepository from '../../../src/archivist/recorder/repositories/git/index.js'; -import Record from '../../../src/archivist/recorder/repositories/record.js'; import generateArchive from './index.js'; diff --git a/src/archivist/recorder/index.js b/src/archivist/recorder/index.js index 79ef64590..2b2a7c1ef 100644 --- a/src/archivist/recorder/index.js +++ b/src/archivist/recorder/index.js @@ -1,4 +1,4 @@ -import Record from './repositories/record.js'; +import Record from './record.js'; import RepositoryFactory from './repositories/factory.js'; export default class Recorder { diff --git a/src/archivist/recorder/repositories/record.js b/src/archivist/recorder/record.js similarity index 100% rename from src/archivist/recorder/repositories/record.js rename to src/archivist/recorder/record.js diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index 61e48a660..5bc518962 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -2,7 +2,7 @@ import path from 'path'; import mime from 'mime'; -import Record from '../record.js'; +import Record from '../../record.js'; mime.define({ 'text/markdown': ['md'] }, true); // ensure extension for markdown files is `.md` and not `.markdown` diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index fa3e8e24c..62691af50 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -8,7 +8,7 @@ import path from 'path'; import mime from 'mime'; -import RepositoryInterface from '../repositoryInterface.js'; +import RepositoryInterface from '../interface.js'; import DataMapper, { COMMIT_MESSAGE_PREFIXES_REGEXP, COMMIT_MESSAGE_PREFIX } from './dataMapper.js'; import Git from './git.js'; diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index 3db386393..17ef82a20 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -6,7 +6,7 @@ import chai from 'chai'; import config from 'config'; import mime from 'mime'; -import Record from '../record.js'; +import Record from '../../record.js'; import Git from './git.js'; diff --git a/src/archivist/recorder/repositories/repositoryInterface.js b/src/archivist/recorder/repositories/interface.js similarity index 100% rename from src/archivist/recorder/repositories/repositoryInterface.js rename to src/archivist/recorder/repositories/interface.js diff --git a/src/archivist/recorder/repositories/mongo/dataMapper.js b/src/archivist/recorder/repositories/mongo/dataMapper.js index c156a6cb2..9b72ff341 100644 --- a/src/archivist/recorder/repositories/mongo/dataMapper.js +++ b/src/archivist/recorder/repositories/mongo/dataMapper.js @@ -1,6 +1,6 @@ import { ObjectId } from 'mongodb'; -import Record from '../record.js'; +import Record from '../../record.js'; export default class DataMapper { static toPersistence(record) { diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index fef17a5bc..981bb8c91 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -5,7 +5,7 @@ import { MongoClient, ObjectId, Binary } from 'mongodb'; -import RepositoryInterface from '../repositoryInterface.js'; +import RepositoryInterface from '../interface.js'; import DataMapper from './dataMapper.js'; diff --git a/src/archivist/recorder/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js index 1b21fbf97..66341b887 100644 --- a/src/archivist/recorder/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -6,7 +6,7 @@ import chai from 'chai'; import config from 'config'; import { MongoClient } from 'mongodb'; -import Record from '../record.js'; +import Record from '../../record.js'; import MongoRepository from './index.js'; From b26e88f87d6ff860d453ec8337765e07c2009fc0 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 15:06:08 +0200 Subject: [PATCH 49/74] Throw an error when trying to access not loaded Record content --- src/archivist/recorder/record.js | 16 +++++++++- src/archivist/recorder/record.test.js | 43 +++++++++++++++++++++++++++ 2 files changed, 58 insertions(+), 1 deletion(-) create mode 100644 src/archivist/recorder/record.test.js diff --git a/src/archivist/recorder/record.js b/src/archivist/recorder/record.js index 5d3bf21d4..0feac7c73 100644 --- a/src/archivist/recorder/record.js +++ b/src/archivist/recorder/record.js @@ -1,4 +1,6 @@ export default class Record { + #content; + #REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; constructor(params) { @@ -19,8 +21,20 @@ export default class Record { } if (content) { - this.content = content; + this.#content = content; + } + } + + get content() { + if (this.#content === undefined) { + throw new Error('Record content not defined, set the content or use Repository#loadRecordContent'); } + + return this.#content; + } + + set content(content) { + this.#content = content; } #validate(givenParams) { diff --git a/src/archivist/recorder/record.test.js b/src/archivist/recorder/record.test.js new file mode 100644 index 000000000..c4c3cf9e4 --- /dev/null +++ b/src/archivist/recorder/record.test.js @@ -0,0 +1,43 @@ +import chai from 'chai'; +import config from 'config'; + +import Record from './record.js'; +import RepositoryFactory from './repositories/factory.js'; + +const { expect } = chai; + +describe('Record', () => { + let repository; + let record; + + before(async () => { + repository = await (RepositoryFactory.create(config.get('recorder.versions.storage'))).initialize(); + await repository.save(new Record({ + serviceId: 'ServiceA', + documentType: 'Terms of Service', + mimeType: 'text/html', + fetchDate: new Date('2000-01-01T12:00:00.000Z'), + content: 'content', + })); + ([record] = await repository.findAll()); + }); + + after(async () => { + await repository.removeAll(); + await repository.finalize(); + }); + + context('when trying to access content and it is neither defined nor loaded', () => { + it('throws an error explaining how to recover', async () => { + try { + console.log(record.content); + } catch (e) { + expect(e).to.be.an('error'); + expect(e.message).to.have.string('set the content or use Repository#loadRecordContent'); + + return; + } + expect.fail('No error was thrown'); + }); + }); +}); From 43720a796be5abb8d37257fd8e529d164923d76c Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 17:39:12 +0200 Subject: [PATCH 50/74] Add validation tests to Record --- src/archivist/recorder/record.js | 2 +- src/archivist/recorder/record.test.js | 60 ++++++++++++++++++++------- 2 files changed, 47 insertions(+), 15 deletions(-) diff --git a/src/archivist/recorder/record.js b/src/archivist/recorder/record.js index 0feac7c73..6311f0d32 100644 --- a/src/archivist/recorder/record.js +++ b/src/archivist/recorder/record.js @@ -39,7 +39,7 @@ export default class Record { #validate(givenParams) { for (const param of this.#REQUIRED_PARAMS) { - if (!Object.prototype.hasOwnProperty.call(givenParams, param)) { + if (!Object.prototype.hasOwnProperty.call(givenParams, param) || givenParams[param] == null) { throw new Error(`"${param}" is required`); } } diff --git a/src/archivist/recorder/record.test.js b/src/archivist/recorder/record.test.js index c4c3cf9e4..225b64395 100644 --- a/src/archivist/recorder/record.test.js +++ b/src/archivist/recorder/record.test.js @@ -9,22 +9,54 @@ const { expect } = chai; describe('Record', () => { let repository; let record; + const REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; + const recordParams = { + serviceId: 'ServiceA', + documentType: 'Terms of Service', + mimeType: 'text/html', + fetchDate: new Date('2000-01-01T12:00:00.000Z'), + }; - before(async () => { - repository = await (RepositoryFactory.create(config.get('recorder.versions.storage'))).initialize(); - await repository.save(new Record({ - serviceId: 'ServiceA', - documentType: 'Terms of Service', - mimeType: 'text/html', - fetchDate: new Date('2000-01-01T12:00:00.000Z'), - content: 'content', - })); - ([record] = await repository.findAll()); - }); + describe('Validation', () => { + describe('Required paramaters', () => { + REQUIRED_PARAMS.forEach(requiredParam => { + describe(`"${requiredParam}"`, () => { + context('when it is missing', () => { + it('throws an error', async () => { + try { + const params = {}; + + Object.keys(recordParams).filter(param => param != requiredParam).forEach(param => { + params[param] = recordParams[param]; + }); + + record = new Record({ ...params }); + } catch (e) { + expect(e).to.be.an('error'); + expect(e.message).to.have.string(`"${requiredParam}" is required`); - after(async () => { - await repository.removeAll(); - await repository.finalize(); + return; + } + expect.fail('No error was thrown'); + }); + }); + + context('when it is null', () => { + it('throws an error', async () => { + try { + record = new Record({ ...recordParams, [requiredParam]: null }); + } catch (e) { + expect(e).to.be.an('error'); + expect(e.message).to.have.string(`"${requiredParam}" is required`); + + return; + } + expect.fail('No error was thrown'); + }); + }); + }); + }); + }); }); context('when trying to access content and it is neither defined nor loaded', () => { From f0beb5f786624d499f25f6049c65c3225c88f72d Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 17:39:19 +0200 Subject: [PATCH 51/74] Improve Record test readability --- src/archivist/recorder/record.test.js | 36 +++++++++++++++++++-------- 1 file changed, 26 insertions(+), 10 deletions(-) diff --git a/src/archivist/recorder/record.test.js b/src/archivist/recorder/record.test.js index 225b64395..77b88b76d 100644 --- a/src/archivist/recorder/record.test.js +++ b/src/archivist/recorder/record.test.js @@ -59,17 +59,33 @@ describe('Record', () => { }); }); - context('when trying to access content and it is neither defined nor loaded', () => { - it('throws an error explaining how to recover', async () => { - try { - console.log(record.content); - } catch (e) { - expect(e).to.be.an('error'); - expect(e.message).to.have.string('set the content or use Repository#loadRecordContent'); + describe('Content access', () => { + before(async () => { + repository = await (RepositoryFactory.create(config.get('recorder.versions.storage'))).initialize(); + await repository.save(new Record({ + ...recordParams, + content: 'content', + })); + ([record] = await repository.findAll()); + }); + + after(async () => { + await repository.removeAll(); + await repository.finalize(); + }); - return; - } - expect.fail('No error was thrown'); + context('when it is neither defined nor loaded', () => { + it('throws an error explaining how to recover', async () => { + try { + console.log(record.content); + } catch (e) { + expect(e).to.be.an('error'); + expect(e.message).to.have.string('set the content or use Repository#loadRecordContent'); + + return; + } + expect.fail('No error was thrown'); + }); }); }); }); From b18240795f026df84a7c1b52704b3d263099dcb0 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:11:43 +0200 Subject: [PATCH 52/74] Add missing space --- src/archivist/recorder/repositories/git/dataMapper.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index 5bc518962..ea10f1c2a 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -13,6 +13,7 @@ export const COMMIT_MESSAGE_PREFIX = { }; export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); + export default class GitDataMapper { static toPersistence(record, prefixMessageToSnapshotId) { const { serviceId, documentType, isRefilter, snapshotId, mimeType, isFirstRecord } = record; From 9fc509c772cb16bf3350a7e2140a3b69e7ab019e Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:12:36 +0200 Subject: [PATCH 53/74] Improve English Co-authored-by: Matti Schneider --- scripts/rewrite/initializer/index.js | 2 +- src/archivist/recorder/repositories/factory.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/rewrite/initializer/index.js b/scripts/rewrite/initializer/index.js index 00a602f28..c69b7d70d 100644 --- a/scripts/rewrite/initializer/index.js +++ b/scripts/rewrite/initializer/index.js @@ -22,7 +22,7 @@ export async function initReadmeAndLicense(targetRepo, targetPath, authorDate) { await targetRepo.add(targetReadmeFilePath); await targetRepo.add(targetLicenseFilePath); await targetRepo.commit({ - message: 'Add Readme and License', + message: 'Add readme and license', date: authorDate, }); } diff --git a/src/archivist/recorder/repositories/factory.js b/src/archivist/recorder/repositories/factory.js index f8463bfe9..c252cd609 100644 --- a/src/archivist/recorder/repositories/factory.js +++ b/src/archivist/recorder/repositories/factory.js @@ -17,7 +17,7 @@ export default class RepositoryFactory { case 'mongo': return new MongoRepository(params.mongo); default: - throw new Error(`Unknow storage repository configuration with type: ${params.type}`); + throw new Error(`Unknown storage repository configuration for type '${params.type}'`); } } } From c1d4fcf41f6ce66603800bbab21ad4d62789bc84 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:13:02 +0200 Subject: [PATCH 54/74] Remove useless parens Co-authored-by: Matti Schneider --- scripts/dataset/export/index.js | 2 +- src/archivist/recorder/record.test.js | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/dataset/export/index.js b/scripts/dataset/export/index.js index b171fc867..33947ed5e 100644 --- a/scripts/dataset/export/index.js +++ b/scripts/dataset/export/index.js @@ -17,7 +17,7 @@ const fs = fsApi.promises; const ARCHIVE_FORMAT = 'zip'; // for supported formats, see https://www.archiverjs.com/docs/archive-formats export default async function generate({ archivePath, releaseDate }) { - const versionsRepository = await (RepositoryFactory.create(config.get('recorder.versions.storage'))).initialize(); + const versionsRepository = await RepositoryFactory.create(config.get('recorder.versions.storage')).initialize(); const archive = await initializeArchive(archivePath); diff --git a/src/archivist/recorder/record.test.js b/src/archivist/recorder/record.test.js index 77b88b76d..611b7df4f 100644 --- a/src/archivist/recorder/record.test.js +++ b/src/archivist/recorder/record.test.js @@ -61,7 +61,7 @@ describe('Record', () => { describe('Content access', () => { before(async () => { - repository = await (RepositoryFactory.create(config.get('recorder.versions.storage'))).initialize(); + repository = await RepositoryFactory.create(config.get('recorder.versions.storage')).initialize(); await repository.save(new Record({ ...recordParams, content: 'content', From 1159c2660bf936d02fb45b2e22a352d89c387ba2 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:13:21 +0200 Subject: [PATCH 55/74] Follow subject convention in tests --- src/archivist/recorder/record.test.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/src/archivist/recorder/record.test.js b/src/archivist/recorder/record.test.js index 611b7df4f..99a330ad3 100644 --- a/src/archivist/recorder/record.test.js +++ b/src/archivist/recorder/record.test.js @@ -8,7 +8,7 @@ const { expect } = chai; describe('Record', () => { let repository; - let record; + let subject; const REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; const recordParams = { serviceId: 'ServiceA', @@ -30,7 +30,7 @@ describe('Record', () => { params[param] = recordParams[param]; }); - record = new Record({ ...params }); + subject = new Record({ ...params }); } catch (e) { expect(e).to.be.an('error'); expect(e.message).to.have.string(`"${requiredParam}" is required`); @@ -44,7 +44,7 @@ describe('Record', () => { context('when it is null', () => { it('throws an error', async () => { try { - record = new Record({ ...recordParams, [requiredParam]: null }); + subject = new Record({ ...recordParams, [requiredParam]: null }); } catch (e) { expect(e).to.be.an('error'); expect(e.message).to.have.string(`"${requiredParam}" is required`); @@ -66,7 +66,7 @@ describe('Record', () => { ...recordParams, content: 'content', })); - ([record] = await repository.findAll()); + ([subject] = await repository.findAll()); }); after(async () => { @@ -77,7 +77,7 @@ describe('Record', () => { context('when it is neither defined nor loaded', () => { it('throws an error explaining how to recover', async () => { try { - console.log(record.content); + console.log(subject.content); } catch (e) { expect(e).to.be.an('error'); expect(e.message).to.have.string('set the content or use Repository#loadRecordContent'); From 3e148da143e6b1e2ca9ba8e34c56e51f3a568d9f Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:13:36 +0200 Subject: [PATCH 56/74] Switch to static property and method --- src/archivist/recorder/record.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/archivist/recorder/record.js b/src/archivist/recorder/record.js index 6311f0d32..791d6d2a6 100644 --- a/src/archivist/recorder/record.js +++ b/src/archivist/recorder/record.js @@ -1,10 +1,10 @@ export default class Record { #content; - #REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; + static #REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; constructor(params) { - this.#validate(params); + Record.#validate(params); const { id, serviceId, documentType, mimeType, fetchDate, isFirstRecord, isRefilter, snapshotId, content } = params; @@ -37,8 +37,8 @@ export default class Record { this.#content = content; } - #validate(givenParams) { - for (const param of this.#REQUIRED_PARAMS) { + static #validate(givenParams) { + for (const param of Record.#REQUIRED_PARAMS) { if (!Object.prototype.hasOwnProperty.call(givenParams, param) || givenParams[param] == null) { throw new Error(`"${param}" is required`); } From 3bf9f2158a9739d0b201ef43b862adbf6f3b9b19 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:13:43 +0200 Subject: [PATCH 57/74] Improve variable name --- scripts/history/utils/index.js | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/scripts/history/utils/index.js b/scripts/history/utils/index.js index fcdf23fe1..74eadb4c7 100644 --- a/scripts/history/utils/index.js +++ b/scripts/history/utils/index.js @@ -6,10 +6,10 @@ export async function importReadme({ from: sourceRepository, to: targetRepositor const sourceRepositoryReadmePath = `${sourceRepository.path}/README.md`; const targetRepositoryReadmePath = `${targetRepository.path}/README.md`; - const [readmeCommit] = await sourceRepository.git.log(['README.md']); + const [firstReadmeCommit] = await sourceRepository.git.log(['README.md']); - if (!readmeCommit) { - console.warn(`No commits found for README in ${sourceRepository.path}`); + if (!firstReadmeCommit) { + console.warn(`No commit found for README in ${sourceRepository.path}`); return; } @@ -17,7 +17,7 @@ export async function importReadme({ from: sourceRepository, to: targetRepositor await fs.copyFile(sourceRepositoryReadmePath, targetRepositoryReadmePath); await targetRepository._commit({ filePath: targetRepositoryReadmePath, - message: readmeCommit.message, - date: readmeCommit.date, + message: firstReadmeCommit.message, + date: firstReadmeCommit.date, }); } From 33c1813e4ffd27f1cdbf8536379fb7c2dc4ff43c Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Mon, 13 Jun 2022 18:13:55 +0200 Subject: [PATCH 58/74] Move variable declaration for clarity --- src/archivist/index.test.js | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index ae8f68a7d..14325189d 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -25,14 +25,13 @@ const VERSIONS_PATH = path.resolve(ROOT_PATH, config.get('recorder.versions.stor const MIME_TYPE = 'text/html'; const FETCH_DATE = new Date('2000-01-02T12:00:00.000Z'); +let gitVersion; +let app; async function resetGitRepositories() { return Promise.all([ app.recorder.snapshotsRepository.removeAll(), app.recorder.versionsRepository.removeAll() ]); } -let gitVersion; -let app; - describe('Archivist', function () { this.timeout(10000); From 12aeaf2c37a9e3a53e6b58160ec2a4e35e3c3d6a Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 09:58:52 +0200 Subject: [PATCH 59/74] Fix method name in script --- scripts/history/migrate-services.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index 7f9a2ad66..9a64e232f 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -140,7 +140,7 @@ async function rewriteSnapshots(repository, records, idsMapping, logger) { let i = 1; for (const record of records) { - const { id: recordId } = await repository.record(record); // eslint-disable-line no-await-in-loop + const { id: recordId } = await repository.save(record); // eslint-disable-line no-await-in-loop idsMapping[record.id] = recordId; // Saves the mapping between the old ID and the new one. @@ -166,7 +166,7 @@ async function rewriteVersions(repository, records, idsMapping, logger) { record.snapshotId = newSnapshotId; - const { id: recordId } = await repository.record(record); // eslint-disable-line no-await-in-loop + const { id: recordId } = await repository.save(record); // eslint-disable-line no-await-in-loop if (recordId) { logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); From 4f1c887517fcc6cd9f98c20e059b3a2ebd4355b4 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 10:59:03 +0200 Subject: [PATCH 60/74] Rename variables for clarity --- scripts/history/migrate-services.js | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index 9a64e232f..148978f5a 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -28,8 +28,8 @@ const CONFIG = { }, }; -const COUNTERS = { - imported: 0, +const counters = { + migrated: 0, skipped: 0, }; @@ -128,9 +128,9 @@ const COUNTERS = { rewriteVersions(migration.to.versions.destination, toVersionsRecordsMigrated, idsMapping, migration.to.versions.logger), ]); - console.log(`Records treated: ${Object.values(COUNTERS).reduce((acc, value) => acc + value, 0)}`); - console.log(`⌙ Migrated records: ${COUNTERS.imported}`); - console.log(`⌙ Skipped records: ${COUNTERS.skipped}`); + console.log(`Records treated: ${Object.values(counters).reduce((acc, value) => acc + value, 0)}`); + console.log(`⌙ Migrated records: ${counters.migrated}`); + console.log(`⌙ Skipped records: ${counters.skipped}`); console.timeEnd('Total time'); await finalize(migration); @@ -146,10 +146,10 @@ async function rewriteSnapshots(repository, records, idsMapping, logger) { if (recordId) { logger.info({ message: `Migrated snapshot with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); - COUNTERS.imported++; + counters.migrated++; } else { logger.info({ message: 'Skipped snapshot', serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); - COUNTERS.skipped++; + counters.skipped++; } } } @@ -170,10 +170,10 @@ async function rewriteVersions(repository, records, idsMapping, logger) { if (recordId) { logger.info({ message: `Migrated version with new ID: ${recordId}`, serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); - COUNTERS.imported++; + counters.migrated++; } else { logger.info({ message: 'Skipped version', serviceId: record.serviceId, type: record.documentType, id: record.id, current: i++, total: records.length }); - COUNTERS.skipped++; + counters.skipped++; } } } From 1acc32ed91561164f7ea661cd4523d0593c493c4 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 11:04:51 +0200 Subject: [PATCH 61/74] Convert obsolete Classes to simple module --- .../recorder/repositories/git/dataMapper.js | 70 +++++++++-------- .../recorder/repositories/git/index.js | 10 +-- .../recorder/repositories/mongo/dataMapper.js | 76 +++++++++---------- .../recorder/repositories/mongo/index.js | 2 +- 4 files changed, 77 insertions(+), 81 deletions(-) diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index ea10f1c2a..116d3aa4f 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -14,50 +14,48 @@ export const COMMIT_MESSAGE_PREFIX = { export const COMMIT_MESSAGE_PREFIXES_REGEXP = new RegExp(`^(${COMMIT_MESSAGE_PREFIX.startTracking}|${COMMIT_MESSAGE_PREFIX.refilter}|${COMMIT_MESSAGE_PREFIX.update})`); -export default class GitDataMapper { - static toPersistence(record, prefixMessageToSnapshotId) { - const { serviceId, documentType, isRefilter, snapshotId, mimeType, isFirstRecord } = record; +export function toPersistence(record, prefixMessageToSnapshotId) { + const { serviceId, documentType, isRefilter, snapshotId, mimeType, isFirstRecord } = record; - let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update; + let prefix = isRefilter ? COMMIT_MESSAGE_PREFIX.refilter : COMMIT_MESSAGE_PREFIX.update; - prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIX.startTracking : prefix; + prefix = isFirstRecord ? COMMIT_MESSAGE_PREFIX.startTracking : prefix; - let message = `${prefix} ${serviceId} ${documentType}`; + let message = `${prefix} ${serviceId} ${documentType}`; - if (snapshotId) { - message = `${message}\n\n${prefixMessageToSnapshotId}${snapshotId}`; - } - - return { - message, - content: record.content, - fileExtension: mime.getExtension(mimeType), - }; + if (snapshotId) { + message = `${message}\n\n${prefixMessageToSnapshotId}${snapshotId}`; } - static toDomain(commit) { - const { hash, date, message, body, diff } = commit; - - const modifiedFilesInCommit = diff.files.map(({ file }) => file); - - if (modifiedFilesInCommit.length > 1) { - throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`); - } + return { + message, + content: record.content, + fileExtension: mime.getExtension(mimeType), + }; +} - const [relativeFilePath] = modifiedFilesInCommit; - const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); +export function toDomain(commit) { + const { hash, date, message, body, diff } = commit; - const record = new Record({ - id: hash, - serviceId: path.dirname(relativeFilePath), - documentType: path.basename(relativeFilePath, path.extname(relativeFilePath)), - mimeType: mime.getType(relativeFilePath), - fetchDate: new Date(date), - isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), - isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), - snapshotId: snapshotIdMatch && snapshotIdMatch[0], - }); + const modifiedFilesInCommit = diff.files.map(({ file }) => file); - return record; + if (modifiedFilesInCommit.length > 1) { + throw new Error(`Only one document should have been recorded in ${hash}, but all these documents were recorded: ${modifiedFilesInCommit.join(', ')}`); } + + const [relativeFilePath] = modifiedFilesInCommit; + const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); + + const record = new Record({ + id: hash, + serviceId: path.dirname(relativeFilePath), + documentType: path.basename(relativeFilePath, path.extname(relativeFilePath)), + mimeType: mime.getType(relativeFilePath), + fetchDate: new Date(date), + isFirstRecord: message.startsWith(COMMIT_MESSAGE_PREFIX.startTracking), + isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), + snapshotId: snapshotIdMatch && snapshotIdMatch[0], + }); + + return record; } diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 62691af50..4a27675de 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -10,7 +10,7 @@ import mime from 'mime'; import RepositoryInterface from '../interface.js'; -import DataMapper, { COMMIT_MESSAGE_PREFIXES_REGEXP, COMMIT_MESSAGE_PREFIX } from './dataMapper.js'; +import * as DataMapper from './dataMapper.js'; import Git from './git.js'; const fs = fsApi.promises; @@ -79,9 +79,9 @@ export default class GitRepository extends RepositoryInterface { async count() { return (await this.git.log([ - `--grep=${COMMIT_MESSAGE_PREFIX.startTracking}`, - `--grep=${COMMIT_MESSAGE_PREFIX.refilter}`, - `--grep=${COMMIT_MESSAGE_PREFIX.update}`, + `--grep=${DataMapper.COMMIT_MESSAGE_PREFIX.startTracking}`, + `--grep=${DataMapper.COMMIT_MESSAGE_PREFIX.refilter}`, + `--grep=${DataMapper.COMMIT_MESSAGE_PREFIX.update}`, ])).length; } @@ -135,7 +135,7 @@ export default class GitRepository extends RepositoryInterface { async #getCommits() { return (await this.git.listCommits()) - .filter(({ message }) => message.match(COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) + .filter(({ message }) => message.match(DataMapper.COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order } diff --git a/src/archivist/recorder/repositories/mongo/dataMapper.js b/src/archivist/recorder/repositories/mongo/dataMapper.js index 9b72ff341..31d5ae81d 100644 --- a/src/archivist/recorder/repositories/mongo/dataMapper.js +++ b/src/archivist/recorder/repositories/mongo/dataMapper.js @@ -2,45 +2,43 @@ import { ObjectId } from 'mongodb'; import Record from '../../record.js'; -export default class DataMapper { - static toPersistence(record) { - const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; - - const documentFields = Object.fromEntries(Object.entries({ - serviceId, - documentType, - content, - mimeType, - fetchDate, - isRefilter, - snapshotId, - isFirstRecord, - }).filter(([ , value ]) => value)); // Remove empty values - - if (documentFields.snapshotId) { - documentFields.snapshotId = new ObjectId(snapshotId); - } - - documentFields.content = record.content; - documentFields.created_at = new Date(); - - return documentFields; +export function toPersistence(record) { + const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; + + const documentFields = Object.fromEntries(Object.entries({ + serviceId, + documentType, + content, + mimeType, + fetchDate, + isRefilter, + snapshotId, + isFirstRecord, + }).filter(([ , value ]) => value)); // Remove empty values + + if (documentFields.snapshotId) { + documentFields.snapshotId = new ObjectId(snapshotId); } - static toDomain(document) { - const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; - - const record = new Record({ - id: _id.toString(), - serviceId, - documentType, - mimeType, - fetchDate: new Date(fetchDate), - isFirstRecord: Boolean(isFirstRecord), - isRefilter: Boolean(isRefilter), - snapshotId: snapshotId && snapshotId.toString(), - }); - - return record; - } + documentFields.content = record.content; + documentFields.created_at = new Date(); + + return documentFields; +} + +export function toDomain(document) { + const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; + + const record = new Record({ + id: _id.toString(), + serviceId, + documentType, + mimeType, + fetchDate: new Date(fetchDate), + isFirstRecord: Boolean(isFirstRecord), + isRefilter: Boolean(isRefilter), + snapshotId: snapshotId && snapshotId.toString(), + }); + + return record; } diff --git a/src/archivist/recorder/repositories/mongo/index.js b/src/archivist/recorder/repositories/mongo/index.js index 981bb8c91..cec3a1b9c 100644 --- a/src/archivist/recorder/repositories/mongo/index.js +++ b/src/archivist/recorder/repositories/mongo/index.js @@ -7,7 +7,7 @@ import { MongoClient, ObjectId, Binary } from 'mongodb'; import RepositoryInterface from '../interface.js'; -import DataMapper from './dataMapper.js'; +import * as DataMapper from './dataMapper.js'; export default class MongoRepository extends RepositoryInterface { constructor({ database: databaseName, collection: collectionName, connectionURI }) { From 3bd7d5270b1ea4a81a1ad67af29cc4ae2b7299bb Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 11:10:52 +0200 Subject: [PATCH 62/74] Improve phrasings Co-authored-by: Matti Schneider --- src/archivist/fetcher/index.test.js | 8 ++++---- .../recorder/repositories/git/index.js | 10 +++++----- .../recorder/repositories/git/index.test.js | 8 ++++---- .../recorder/repositories/interface.js | 20 +++++++++---------- .../recorder/repositories/mongo/index.test.js | 8 ++++---- 5 files changed, 27 insertions(+), 27 deletions(-) diff --git a/src/archivist/fetcher/index.test.js b/src/archivist/fetcher/index.test.js index d9c350cbc..733a90de5 100644 --- a/src/archivist/fetcher/index.test.js +++ b/src/archivist/fetcher/index.test.js @@ -69,7 +69,7 @@ describe('Fetcher', function () { expect(content).to.equal(termsHTML); }); - it('returns the mime type of the given URL', async () => { + it('returns the MIME type of the given URL', async () => { expect(mimeType).to.equal('text/html'); }); @@ -82,7 +82,7 @@ describe('Fetcher', function () { expect(content).to.equal(termsHTML); }); - it('returns the mime type of the given URL', async () => { + it('returns the MIME type of the given URL', async () => { expect(mimeType).to.equal('text/html'); }); }); @@ -99,7 +99,7 @@ describe('Fetcher', function () { expect(content).to.equal(termsHTML); }); - it('returns the mime type of the given URL', async () => { + it('returns the MIME type of the given URL', async () => { expect(mimeType).to.equal('text/html'); }); @@ -112,7 +112,7 @@ describe('Fetcher', function () { expect(content).to.equal(termsHTML); }); - it('returns the mime type of the given URL', async () => { + it('returns the MIME type of the given URL', async () => { expect(mimeType).to.equal('text/html'); }); }); diff --git a/src/archivist/recorder/repositories/git/index.js b/src/archivist/recorder/repositories/git/index.js index 4a27675de..0de415b95 100644 --- a/src/archivist/recorder/repositories/git/index.js +++ b/src/archivist/recorder/repositories/git/index.js @@ -119,15 +119,15 @@ export default class GitRepository extends RepositoryInterface { return; } - // In case of PDF, `git show` cannot be used as it converts PDF binary into string which not retain the original binary representation + // In case of PDF files, `git show` cannot be used as it converts PDF binary into strings that do not retain the original binary representation // It is impossible to restore the original binary data from the resulting string let pdfBuffer; try { - await this.git.restore(relativeFilePath, record.id); // So, temporarily restore the PDF file to a specific commit + await this.git.restore(relativeFilePath, record.id); // Temporarily restore the PDF file to a specific commit pdfBuffer = await fs.readFile(`${this.path}/${relativeFilePath}`); // …read the content } finally { - await this.git.restore(relativeFilePath, 'HEAD'); // …and finally restore the file to its last state + await this.git.restore(relativeFilePath, 'HEAD'); // …and finally restore the file to its most recent state } record.content = pdfBuffer; @@ -135,8 +135,8 @@ export default class GitRepository extends RepositoryInterface { async #getCommits() { return (await this.git.listCommits()) - .filter(({ message }) => message.match(DataMapper.COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE, …) - .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending order + .filter(({ message }) => message.match(DataMapper.COMMIT_MESSAGE_PREFIXES_REGEXP)) // Skip commits which are not a document record (README, LICENSE…) + .sort((commitA, commitB) => new Date(commitA.date) - new Date(commitB.date)); // Make sure that the commits are sorted in ascending chronological order } async #writeFile({ serviceId, documentType, content, fileExtension }) { diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index 17ef82a20..2822682d1 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -114,7 +114,7 @@ describe('GitRepository', () => { expect(new Date(commit.date).getTime()).to.equal(FETCH_DATE.getTime()); }); - it('stores the mime type', () => { + it('stores the MIME type', () => { expect(mime.getType(EXPECTED_FILE_PATH)).to.equal(MIME_TYPE); }); @@ -277,7 +277,7 @@ describe('GitRepository', () => { expect(fs.readFileSync(EXPECTED_PDF_FILE_PATH, { encoding: 'utf8' })).to.equal(PDF_CONTENT); }); - it('stores the mime type', () => { + it('stores the MIME type', () => { expect(mime.getType(EXPECTED_PDF_FILE_PATH)).to.equal(PDF_MIME_TYPE); }); }); @@ -326,7 +326,7 @@ describe('GitRepository', () => { expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime()); }); - it('stores the mime type', () => { + it('stores the MIME type', () => { expect(record.mimeType).to.equal(MIME_TYPE); }); @@ -334,7 +334,7 @@ describe('GitRepository', () => { expect(record.snapshotId).to.equal(SNAPSHOT_ID); }); - context('when requested record does not exists', () => { + context('when requested record does not exist', () => { it('returns an empty object', async () => { expect(await subject.findById('inexistantID')).to.deep.equal({}); }); diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index 650029e2a..648996806 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -8,7 +8,7 @@ export default class RepositoryInterface { * [Optional] Initialize repository * Override this method if the repository needs some asynchronous initialization code (open database connection and create collections, initialize Git…) * - * @returns {Promise} Promise that will be resolved with the current repository + * @returns {Promise} Promise that will be resolved with the current repository instance */ async initialize() {} @@ -16,7 +16,7 @@ export default class RepositoryInterface { * [Optional] Finalize repository * Override this method if the repository needs some asynchronous code to properly close the repository (close database connection, push changes on Git remote…) * - * @returns {Promise} Promise that will be resolved with the current repository + * @returns {Promise} Promise that will be resolved with the current repository instance */ async finalize() {} @@ -27,7 +27,7 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved with the given record when it has been persisted */ async save(record) { - throw new Error(`#save method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#save method is not implemented in ${this.constructor.name}`); } /** @@ -38,7 +38,7 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved with the found record or an empty object if none match the given criteria */ async findLatest(serviceId, documentType) { - throw new Error(`#findLatest method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#findLatest method is not implemented in ${this.constructor.name}`); } /** @@ -48,7 +48,7 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved with the found record or an empty object if none match the given ID */ async findById(recordId) { - throw new Error(`#findById method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#findById method is not implemented in ${this.constructor.name}`); } /** @@ -59,7 +59,7 @@ export default class RepositoryInterface { * @returns {Promise>} Promise that will be resolved with an array of all records */ async findAll() { - throw new Error(`#findAll method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#findAll method is not implemented in ${this.constructor.name}`); } /** @@ -69,7 +69,7 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved with the total number of records */ async count() { - throw new Error(`#count method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#count method is not implemented in ${this.constructor.name}`); } /** @@ -78,7 +78,7 @@ export default class RepositoryInterface { * @yields {Record} */ async* iterate() { - throw new Error(`#iterate method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#iterate method is not implemented in ${this.constructor.name}`); } /** @@ -87,7 +87,7 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved when all records are removed */ async removeAll() { - throw new Error(`#removeAll method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#removeAll method is not implemented in ${this.constructor.name}`); } /** @@ -97,6 +97,6 @@ export default class RepositoryInterface { * @returns {Promise} Promise that will be resolved with the given record when its content has been loaded */ async loadRecordContent(record) { - throw new Error(`#loadRecordContent method is not yet implemented in ${this.constructor.name}`); + throw new Error(`#loadRecordContent method is not implemented in ${this.constructor.name}`); } } diff --git a/src/archivist/recorder/repositories/mongo/index.test.js b/src/archivist/recorder/repositories/mongo/index.test.js index 66341b887..1a703b710 100644 --- a/src/archivist/recorder/repositories/mongo/index.test.js +++ b/src/archivist/recorder/repositories/mongo/index.test.js @@ -110,7 +110,7 @@ describe('MongoRepository', () => { expect(new Date(mongoDocument.fetchDate).getTime()).to.equal(FETCH_DATE.getTime()); }); - it('stores the mime type', () => { + it('stores the MIME type', () => { expect(mongoDocument.mimeType).to.equal(MIME_TYPE); }); @@ -311,7 +311,7 @@ describe('MongoRepository', () => { expect(isSameContent).to.be.true; }); - it('stores the mime type', () => { + it('stores the MIME type', () => { expect(mongoDocument.mimeType).to.equal(PDF_MIME_TYPE); }); }); @@ -360,7 +360,7 @@ describe('MongoRepository', () => { expect(new Date(record.fetchDate).getTime()).to.equal(FETCH_DATE.getTime()); }); - it('stores the mime type', () => { + it('stores the MIME type', () => { expect(record.mimeType).to.equal(MIME_TYPE); }); @@ -368,7 +368,7 @@ describe('MongoRepository', () => { expect(record.snapshotId).to.equal(SNAPSHOT_ID); }); - context('when requested record does not exists', () => { + context('when requested record does not exist', () => { it('returns an empty object', async () => { expect(await subject.findById('inexistantID')).to.deep.equal({}); }); From 7172c9146aa4b93db401773a6b83a121a4b85c29 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 11:11:07 +0200 Subject: [PATCH 63/74] Remove obsolete parens --- src/archivist/recorder/repositories/git/index.test.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/archivist/recorder/repositories/git/index.test.js b/src/archivist/recorder/repositories/git/index.test.js index 2822682d1..2ff742f14 100644 --- a/src/archivist/recorder/repositories/git/index.test.js +++ b/src/archivist/recorder/repositories/git/index.test.js @@ -475,7 +475,7 @@ describe('GitRepository', () => { }); it('returns the latest record content', async () => { - expect((latestRecord.content).toString('utf8')).to.equal(UPDATED_FILE_CONTENT); + expect(latestRecord.content.toString('utf8')).to.equal(UPDATED_FILE_CONTENT); }); it('returns the latest record mime type', () => { @@ -503,7 +503,7 @@ describe('GitRepository', () => { }); it('returns the latest record content', async () => { - expect((latestRecord.content).toString('utf8')).to.equal(PDF_CONTENT); + expect(latestRecord.content.toString('utf8')).to.equal(PDF_CONTENT); }); it('returns the latest record mime type', () => { From 2ee7c2a942b9ac606cd4bfdac0a83c2db63a51bd Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 11:12:09 +0200 Subject: [PATCH 64/74] Ensure optional methods behavior in RepositoryInterface Co-authored-by: Matti Schneider --- src/archivist/recorder/repositories/interface.js | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/src/archivist/recorder/repositories/interface.js b/src/archivist/recorder/repositories/interface.js index 648996806..a2c54354a 100644 --- a/src/archivist/recorder/repositories/interface.js +++ b/src/archivist/recorder/repositories/interface.js @@ -10,7 +10,9 @@ export default class RepositoryInterface { * * @returns {Promise} Promise that will be resolved with the current repository instance */ - async initialize() {} + async initialize() { + return this; + } /** * [Optional] Finalize repository @@ -18,7 +20,9 @@ export default class RepositoryInterface { * * @returns {Promise} Promise that will be resolved with the current repository instance */ - async finalize() {} + async finalize() { + return this; + } /** * Persist the given record if it does not already exist in repository From c19c6ccc500fd02753b7f94edbfbfd01f3743173 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 14:46:21 +0200 Subject: [PATCH 65/74] Remove obsolete code --- src/archivist/recorder/repositories/git/dataMapper.js | 4 +--- src/archivist/recorder/repositories/mongo/dataMapper.js | 4 +--- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/src/archivist/recorder/repositories/git/dataMapper.js b/src/archivist/recorder/repositories/git/dataMapper.js index 116d3aa4f..fc8e23f74 100644 --- a/src/archivist/recorder/repositories/git/dataMapper.js +++ b/src/archivist/recorder/repositories/git/dataMapper.js @@ -46,7 +46,7 @@ export function toDomain(commit) { const [relativeFilePath] = modifiedFilesInCommit; const snapshotIdMatch = body.match(/\b[0-9a-f]{5,40}\b/g); - const record = new Record({ + return new Record({ id: hash, serviceId: path.dirname(relativeFilePath), documentType: path.basename(relativeFilePath, path.extname(relativeFilePath)), @@ -56,6 +56,4 @@ export function toDomain(commit) { isRefilter: message.startsWith(COMMIT_MESSAGE_PREFIX.refilter), snapshotId: snapshotIdMatch && snapshotIdMatch[0], }); - - return record; } diff --git a/src/archivist/recorder/repositories/mongo/dataMapper.js b/src/archivist/recorder/repositories/mongo/dataMapper.js index 31d5ae81d..84dd83977 100644 --- a/src/archivist/recorder/repositories/mongo/dataMapper.js +++ b/src/archivist/recorder/repositories/mongo/dataMapper.js @@ -29,7 +29,7 @@ export function toPersistence(record) { export function toDomain(document) { const { _id, serviceId, documentType, fetchDate, mimeType, isRefilter, isFirstRecord, snapshotId } = document; - const record = new Record({ + return new Record({ id: _id.toString(), serviceId, documentType, @@ -39,6 +39,4 @@ export function toDomain(document) { isRefilter: Boolean(isRefilter), snapshotId: snapshotId && snapshotId.toString(), }); - - return record; } From bd6a44485ac0fbd2546607efb59d5a8b5c51750b Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 14:59:14 +0200 Subject: [PATCH 66/74] Assign all given params to a new Record --- src/archivist/recorder/record.js | 18 +++--------------- 1 file changed, 3 insertions(+), 15 deletions(-) diff --git a/src/archivist/recorder/record.js b/src/archivist/recorder/record.js index 791d6d2a6..064a11b67 100644 --- a/src/archivist/recorder/record.js +++ b/src/archivist/recorder/record.js @@ -6,22 +6,10 @@ export default class Record { constructor(params) { Record.#validate(params); - const { id, serviceId, documentType, mimeType, fetchDate, isFirstRecord, isRefilter, snapshotId, content } = params; - - this.serviceId = serviceId; - this.documentType = documentType; - this.mimeType = mimeType; - this.fetchDate = fetchDate; - this.isFirstRecord = isFirstRecord; - this.isRefilter = isRefilter; - this.snapshotId = snapshotId; - - if (id) { - this.id = id; - } + Object.assign(this, Object.fromEntries(Object.entries(params))); - if (content) { - this.#content = content; + if (params.content) { + this.#content = params.content; } } From a66529483e8d234b57afb78c8de2b7d692f60397 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 14:59:49 +0200 Subject: [PATCH 67/74] Do not filter Record fields before saving it in Mongo --- .../recorder/repositories/mongo/dataMapper.js | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) diff --git a/src/archivist/recorder/repositories/mongo/dataMapper.js b/src/archivist/recorder/repositories/mongo/dataMapper.js index 84dd83977..ca05c2a4b 100644 --- a/src/archivist/recorder/repositories/mongo/dataMapper.js +++ b/src/archivist/recorder/repositories/mongo/dataMapper.js @@ -3,21 +3,10 @@ import { ObjectId } from 'mongodb'; import Record from '../../record.js'; export function toPersistence(record) { - const { serviceId, documentType, content, mimeType, fetchDate, isRefilter, snapshotId, isFirstRecord } = record; - - const documentFields = Object.fromEntries(Object.entries({ - serviceId, - documentType, - content, - mimeType, - fetchDate, - isRefilter, - snapshotId, - isFirstRecord, - }).filter(([ , value ]) => value)); // Remove empty values + const documentFields = Object.fromEntries(Object.entries(record)); if (documentFields.snapshotId) { - documentFields.snapshotId = new ObjectId(snapshotId); + documentFields.snapshotId = new ObjectId(record.snapshotId); } documentFields.content = record.content; From f70e547e74c06202208c1a56edc583062c8eaa94 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 15:00:06 +0200 Subject: [PATCH 68/74] Switch method and property to public --- src/archivist/recorder/record.js | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/src/archivist/recorder/record.js b/src/archivist/recorder/record.js index 064a11b67..f21b4bd69 100644 --- a/src/archivist/recorder/record.js +++ b/src/archivist/recorder/record.js @@ -1,10 +1,10 @@ export default class Record { #content; - static #REQUIRED_PARAMS = [ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]; + static REQUIRED_PARAMS = Object.freeze([ 'serviceId', 'documentType', 'mimeType', 'fetchDate' ]); constructor(params) { - Record.#validate(params); + Record.validate(params); Object.assign(this, Object.fromEntries(Object.entries(params))); @@ -25,8 +25,8 @@ export default class Record { this.#content = content; } - static #validate(givenParams) { - for (const param of Record.#REQUIRED_PARAMS) { + static validate(givenParams) { + for (const param of Record.REQUIRED_PARAMS) { if (!Object.prototype.hasOwnProperty.call(givenParams, param) || givenParams[param] == null) { throw new Error(`"${param}" is required`); } From f7a6880a4b75735515a978865029cfdfdffd28fa Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 16:10:29 +0200 Subject: [PATCH 69/74] Remove obsolete file --- src/index.js | 37 ------------------------------------- 1 file changed, 37 deletions(-) delete mode 100644 src/index.js diff --git a/src/index.js b/src/index.js deleted file mode 100644 index 95b21498c..000000000 --- a/src/index.js +++ /dev/null @@ -1,37 +0,0 @@ -import path from 'path'; -import { fileURLToPath } from 'url'; - -import config from 'config'; - -import GitRepository from './archivist/recorder/repositories/git/index.js'; -import MongoRepository from './archivist/recorder/repositories/mongo/index.js'; - -const __dirname = path.dirname(fileURLToPath(import.meta.url)); - -export function instantiateVersionsRepository() { - return instantiateRepository('versions'); -} - -export function instantiateSnapshotsRepository() { - return instantiateRepository('snapshots'); -} - -function instantiateRepository(recordType) { - let result; - - switch (config.get(`recorder.${recordType}.storage.type`)) { - case 'git': - result = new GitRepository({ - ...config.get(`recorder.${recordType}.storage.git`), - path: path.resolve(__dirname, '../', config.get(`recorder.${recordType}.storage.git.path`)), - }); - break; - case 'mongo': - result = new MongoRepository(config.get(`recorder.${recordType}.storage.mongo`)); - break; - default: - throw new Error(`No configuration found for ${recordType} storage repository`); - } - - return result; -} From 4a1f2da8ee8b1377bb8b7941cfb7816d0100e7f9 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 16:10:56 +0200 Subject: [PATCH 70/74] Remove redundant operations --- src/archivist/recorder/index.test.js | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/src/archivist/recorder/index.test.js b/src/archivist/recorder/index.test.js index 339f37e18..b69c6c861 100644 --- a/src/archivist/recorder/index.test.js +++ b/src/archivist/recorder/index.test.js @@ -32,10 +32,7 @@ describe('Recorder', () => { await recorder.initialize(); }); - after(async () => { - await recorder.snapshotsRepository.removeAll(); - await recorder.finalize(); - }); + after(async () => recorder.finalize()); context('when a required param is missing', () => { after(async () => recorder.snapshotsRepository.removeAll()); @@ -183,9 +180,7 @@ describe('Recorder', () => { await recorder.initialize(); }); - after(async () => { - await recorder.finalize(); - }); + after(async () => recorder.finalize()); context('when a required param is missing', () => { after(async () => recorder.versionsRepository.removeAll()); @@ -344,10 +339,7 @@ describe('Recorder', () => { await recorder.initialize(); }); - after(async () => { - await recorder.versionsRepository.removeAll(); - await recorder.finalize(); - }); + after(async () => recorder.finalize()); context('when a required param is missing', () => { after(async () => recorder.versionsRepository.removeAll()); From f9c992e575923edab16ac2e20da07833952052a5 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 16:11:50 +0200 Subject: [PATCH 71/74] Pass only required config --- src/archivist/index.js | 4 ++-- src/main.js | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/archivist/index.js b/src/archivist/index.js index e36f96fd1..5c2b30237 100644 --- a/src/archivist/index.js +++ b/src/archivist/index.js @@ -41,9 +41,9 @@ export default class Archivist extends events.EventEmitter { return Object.keys(this.services); } - constructor(config) { + constructor({ recorder }) { super(); - this.recorder = new Recorder(config.get('recorder')); + this.recorder = new Recorder(recorder); } async initialize() { diff --git a/src/main.js b/src/main.js index 57ad60593..af8bbccfb 100644 --- a/src/main.js +++ b/src/main.js @@ -12,7 +12,7 @@ const schedule = args.includes('--schedule'); const extraArgs = args.filter(arg => !arg.startsWith('--')); (async function startOpenTermsArchive() { - const archivist = new Archivist(config); + const archivist = new Archivist({ recorder: config.get('recorder') }); archivist.attach(logger); From f95cf88c6997f4f37936472808beceecd5c0a06b Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 16:48:18 +0200 Subject: [PATCH 72/74] Fix main in package.json --- package.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/package.json b/package.json index d31d16ff5..072cc0244 100644 --- a/package.json +++ b/package.json @@ -13,7 +13,7 @@ "license": "EUPL-1.2", "author": "ambanum", "type": "module", - "main": "index.js", + "main": "src/main.js", "bin": { "ota-lint-declarations": "./bin/lint-declarations.js", "ota-validate-declarations": "./bin/validate-declarations.js" From 3e9593facb5c65b17b7079796edfad488ff2a4f8 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 16:49:42 +0200 Subject: [PATCH 73/74] Improve variable name to avoid confusion --- src/archivist/index.js | 4 ++-- src/archivist/index.test.js | 8 ++++---- src/main.js | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/archivist/index.js b/src/archivist/index.js index 5c2b30237..ff86b1607 100644 --- a/src/archivist/index.js +++ b/src/archivist/index.js @@ -41,9 +41,9 @@ export default class Archivist extends events.EventEmitter { return Object.keys(this.services); } - constructor({ recorder }) { + constructor({ recorderConfig }) { super(); - this.recorder = new Recorder(recorder); + this.recorder = new Recorder(recorderConfig); } async initialize() { diff --git a/src/archivist/index.test.js b/src/archivist/index.test.js index 14325189d..93f037332 100644 --- a/src/archivist/index.test.js +++ b/src/archivist/index.test.js @@ -71,7 +71,7 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - app = new Archivist(config); + app = new Archivist({ recorderConfig: config.get('recorder') }); await app.initialize(); }); @@ -149,7 +149,7 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - app = new Archivist(config); + app = new Archivist({ recorderConfig: config.get('recorder') }); await app.initialize(); await app.trackChanges(serviceIds); @@ -205,7 +205,7 @@ describe('Archivist', function () { before(async () => { nock('https://www.servicea.example').get('/tos').reply(200, serviceASnapshotExpectedContent, { 'Content-Type': 'text/html' }); nock('https://www.serviceb.example').get('/privacy').reply(200, serviceBSnapshotExpectedContent, { 'Content-Type': 'application/pdf' }); - app = new Archivist(config); + app = new Archivist({ recorderConfig: config.get('recorder') }); await app.initialize(); await app.trackChanges(serviceIds); @@ -251,7 +251,7 @@ describe('Archivist', function () { } before(async () => { - app = new Archivist(config); + app = new Archivist({ recorderConfig: config.get('recorder') }); await app.initialize(); AVAILABLE_EVENTS.forEach(event => { diff --git a/src/main.js b/src/main.js index af8bbccfb..42196fca5 100644 --- a/src/main.js +++ b/src/main.js @@ -12,7 +12,7 @@ const schedule = args.includes('--schedule'); const extraArgs = args.filter(arg => !arg.startsWith('--')); (async function startOpenTermsArchive() { - const archivist = new Archivist({ recorder: config.get('recorder') }); + const archivist = new Archivist({ recorderConfig: config.get('recorder') }); archivist.attach(logger); From 69db45db901032582ee0750aee3a1cefa1d9d1f1 Mon Sep 17 00:00:00 2001 From: Nicolas Dupont Date: Wed, 15 Jun 2022 17:01:31 +0200 Subject: [PATCH 74/74] Improve naming --- scripts/history/migrate-services.js | 10 +++++----- scripts/history/update-to-full-hash.js | 4 ++-- scripts/history/utils/index.js | 2 +- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/scripts/history/migrate-services.js b/scripts/history/migrate-services.js index 148978f5a..4e3a32538 100644 --- a/scripts/history/migrate-services.js +++ b/scripts/history/migrate-services.js @@ -8,7 +8,7 @@ import winston from 'winston'; import GitRepository from '../../src/archivist/recorder/repositories/git/index.js'; import { format } from './logger/index.js'; -import { importReadme } from './utils/index.js'; +import { importReadmeInGit } from './utils/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT_PATH = path.resolve(__dirname, '../../'); @@ -191,10 +191,10 @@ async function initialize(migration) { ]); return Promise.all([ - importReadme({ from: migration.from.snapshots.source, to: migration.from.snapshots.destination }), - importReadme({ from: migration.from.versions.source, to: migration.from.versions.destination }), - importReadme({ from: migration.to.snapshots.source, to: migration.to.snapshots.destination }), - importReadme({ from: migration.to.versions.source, to: migration.to.versions.destination }), + importReadmeInGit({ from: migration.from.snapshots.source, to: migration.from.snapshots.destination }), + importReadmeInGit({ from: migration.from.versions.source, to: migration.from.versions.destination }), + importReadmeInGit({ from: migration.to.snapshots.source, to: migration.to.snapshots.destination }), + importReadmeInGit({ from: migration.to.versions.source, to: migration.to.versions.destination }), ]); } diff --git a/scripts/history/update-to-full-hash.js b/scripts/history/update-to-full-hash.js index ef481372f..f01ac171d 100644 --- a/scripts/history/update-to-full-hash.js +++ b/scripts/history/update-to-full-hash.js @@ -6,7 +6,7 @@ import config from 'config'; import GitRepository from '../../src/archivist/recorder/repositories/git/index.js'; import logger from './logger/index.js'; -import { importReadme } from './utils/index.js'; +import { importReadmeInGit } from './utils/index.js'; const __dirname = path.dirname(fileURLToPath(import.meta.url)); const ROOT_PATH = path.resolve(__dirname, '../../'); @@ -34,7 +34,7 @@ const ROOT_PATH = path.resolve(__dirname, '../../'); await versionsTargetRepository.initialize(); await snapshotsRepository.initialize(); - await importReadme({ from: versionsRepository, to: versionsTargetRepository }); + await importReadmeInGit({ from: versionsRepository, to: versionsTargetRepository }); const total = await versionsRepository.count(); let current = 1; diff --git a/scripts/history/utils/index.js b/scripts/history/utils/index.js index 74eadb4c7..b242d35b8 100644 --- a/scripts/history/utils/index.js +++ b/scripts/history/utils/index.js @@ -2,7 +2,7 @@ import fsApi from 'fs'; const fs = fsApi.promises; -export async function importReadme({ from: sourceRepository, to: targetRepository }) { +export async function importReadmeInGit({ from: sourceRepository, to: targetRepository }) { const sourceRepositoryReadmePath = `${sourceRepository.path}/README.md`; const targetRepositoryReadmePath = `${targetRepository.path}/README.md`;