diff --git a/dev/ci/yarn-build-separate.sh b/dev/ci/yarn-build-separate.sh deleted file mode 100755 index e0db2a2b778e4..0000000000000 --- a/dev/ci/yarn-build-separate.sh +++ /dev/null @@ -1,12 +0,0 @@ -#!/usr/bin/env bash - -set -e - -echo 'ENTERPRISE='$ENTERPRISE - -cd $1 -echo "--- yarn" -yarn --frozen-lockfile --network-timeout 60000 - -echo "--- build" -yarn -s run build diff --git a/lsif/data_model.md b/lsif/data_model.md new file mode 100644 index 0000000000000..ca24da3e40bcd --- /dev/null +++ b/lsif/data_model.md @@ -0,0 +1,241 @@ +# LSIF data model + +This document outlines the data model for a single LSIF dump. The definition of the database tables and the entities encoded within it can be found in `models.database.ts`. + +In the following document, we collapse ranges to keep the document readable, where `a:b-c:d` is shorthand for the following: + +``` +{ + "startLine": a, + "startCharacter": b, + "endLine": c, + "endCharacter": d +} +``` + +This applies to JSON payloads, and a similar shorthand is used for the columns of the `definitions` and `references` tables. + +## Source + +The following source files compose the package `sample`, which is used as the running example for this document. + +**foo.ts** + +```typescript +export function foo(value: string): string { + return value.substring(1, value.length - 1) +} +``` + +**bar.ts** + +```typescript +import { foo } from './foo' + +export function bar(input: string): string { + return foo(foo(input)) +} +``` + +## Database values + +**`meta` table** + +This table is populated with **exactly** one row containing the version of the LSIF input, the version of the software that converted it into a SQLite database, and the approximate number of result chunk rows for this dump. + +The last value is used in order to achieve a consistent hash of identifiers that map to the correct result chunk row identifier. This will be explained in more detail later in this document. + +| id | lsifVersion | sourcegraphVersion | numResultChunks | +| --- | ----------- | ------------------ | --------------- | +| 0 | 0.4.3 | 0.1.0 | 1 | + +**`documents` table** + +This table is populated with a gzipped, base64-encoded JSON payload that represents the ranges as well as each range's definition, reference, and hover result identifiers. The table is keyed on the path of the document relative to the project root. + +| path | data | +| ------ | ---------------------------- | +| foo.ts | _gzipped_ and _json-encoded_ | +| bar.ts | _gzipped_ and _json-encoded_ | + +Each payload has the following form. As the documents are large, we show only the decoded version for `foo.ts`. + +**encoded `foo.ts` payload** + +````json +{ + "ranges": { + "9": 0, + "14": 1, + "21": 2, + "25": 3, + "36": 4, + "38": 5, + "47": 6 + }, + "orderedRanges": [ + { + "range": "0:0-0:0", + "definitionResultId": "49", + "referenceResultId": "52", + "monikerIds": ["9007199254740990"] + }, + { + "range": "0:16-0:19", + "definitionResultId": "55", + "referenceResultId": "58", + "hoverResultId": "16", + "monikerIds": ["9007199254740987"] + }, + { + "range": "0:20-0:25", + "definitionResultId": "61", + "referenceResultId": "64", + "hoverResultId": "23", + "monikerIds": [] + }, + { + "range": "1:9-1:14", + "definitionResultId": "61", + "referenceResultId": "64", + "hoverResultId": "23", + "monikerIds": [] + }, + { + "range": "1:15-1:24", + "definitionResultId": "144", + "referenceResultId": "68", + "hoverResultId": "34", + "monikerIds": ["30"] + }, + { + "range": "1:28-1:33", + "definitionResultId": "61", + "referenceResultId": "64", + "hoverResultId": "23", + "monikerIds": [] + }, + { + "range": "1:34-1:40", + "definitionResultId": "148", + "referenceResultId": "71", + "hoverResultId": "45", + "monikerIds": [] + } + ], + "hoverResults": { + "16": "```typescript\nfunction foo(value: string): string\n```", + "23": "```typescript\n(parameter) value: string\n```", + "34": "```typescript\n(method) String.substring(start: number, end?: number): string\n```\n\n---\n\nReturns the substring at the specified location within a String object.", + "45": "```typescript\n(property) String.length: number\n```\n\n---\n\nReturns the length of a String object." + }, + "monikers": { + "9007199254740987": { + "kind": "export", + "scheme": "npm", + "identifier": "sample:foo:foo", + "packageInformationId": "9007199254740991" + }, + "9007199254740990": { + "kind": "export", + "scheme": "npm", + "identifier": "sample:foo:", + "packageInformationId": "9007199254740991" + } + }, + "packageInformation": { + "9007199254740991": { + "name": "sample", + "version": "0.1.0" + } + } +} +```` + +The `ranges` field holds a map from range identifier to the index of the range data in the `orderedRanges` field. The `orderedRanges` hold the full range data and the array is sorted by the starting line and starting character. Having both fields allows us to efficiently lookup a range by identifier or by a position in the document. + +Each range has optional fields for a definition result, a reference result, and a hover result. Each range also has a possibly empty list of moniker ids. + +To retrieve a definition or reference result by its identifier, we must first determine in which result chunk it is defined. This requires that we take the hash of the identifier (modulo the `numResultChunks` field of the `meta` table). This gives us the unique identifier into the `resultChunks` table. The format of this payload is discussed below. + +In this example, there is only one result chunk. Larger dumps will have a greater number of result chunks to keep the amount of data encoded in a single database row reasonable. + +The hover result and moniker identifiers index into the `hoverResults` and `monikers` field of the document. + +**`resultChunks` table** + +This table is populated with gzipped, base64-encoded JSON payloads that contains a mapping from definition result or reference result identifiers to the set of ranges that compose that result. A definition or reference result may be referred to by many documents, which is why it is encoded separately. The table is keyed on the common hash of each definition and reference result id inserted in this chunk. + +| id | data | +| --- | ---------------------------- | +| 0 | _gzipped_ and _json-encoded_ | + +Each payload has the following form. + +**encoded result chunk #0 payload** + +```json +{ + "documentPaths": { + "4": "foo.ts", + "80": "bar.ts" + }, + "documentIdRangeIds": { + "49": [{ "documentId": "4", "rangeId": "9" }], + "55": [{ "documentId": "4", "rangeId": "4" }], + "61": [{ "documentId": "4", "rangeId": "21" }], + "71": [{ "documentId": "4", "rangeId": "47" }], + "52": [{ "documentId": "4", "rangeId": "9" }, { "documentId": "80", "rangeId": "95" }], + "58": [ + { "documentId": "4", "rangeId": "14" }, + { "documentId": "80", "rangeId": "91" }, + { "documentId": "80", "rangeId": "111" }, + { "documentId": "80", "rangeId": "113" } + ], + "64": [ + { "documentId": "4", "rangeId": "21" }, + { "documentId": "4", "rangeId": "25" }, + { "documentId": "4", "rangeId": "38" } + ], + "68": [{ "documentId": "4", "rangeId": "36" }], + "117": [{ "documentId": "80", "rangeId": "85" }], + "120": [{ "documentId": "80", "rangeId": "85" }], + "125": [{ "documentId": "80", "rangeId": "100" }], + "128": [{ "documentId": "80", "rangeId": "100" }], + "131": [{ "documentId": "80", "rangeId": "107" }], + "134": [{ "documentId": "80", "rangeId": "107" }, { "documentId": "80", "rangeId": "115" }] + } +} +``` + +The `documentIdRangeIds` field store a list of _pairs_ of document identifiers and range identifiers. To look up a range in this format, the `documentId` must be translated into a document path via the `documentPaths` field. This gives the primary key of the document containing the range in the `documents` table, and the range identifier can be looked up in the decoded payload. + +**definitions table** + +This table is populated with the monikers attached to a definition result. The table is keyed on the `(scheme, identifier)` pair to allow quick lookup by moniker. + +| id | scheme | identifier | documentPath | range | +| --- | ------ | -------------- | ------------ | ------------ | +| 1 | npm | sample:foo: | foo.ts | 0:0 to 0:0 | +| 2 | npm | sample:foo:foo | foo.ts | 0:16 to 0:19 | +| 3 | npm | sample:bar: | bar.ts | 0:0 to 0:0 | +| 4 | npm | sample:bar:bar | bar.ts | 2:16 to 2:19 | + +The row with id `2` correlates the `npm` moniker for the `foo` function with the range where it is defined in `foo.ts`. Similarly, the row with id `4` correlates the exported `npm` moniker for the `bar` function with the range where it is defined in `bar.ts`. + +**references table** + +This table is populated with the monikers attached to a reference result. The table is keyed on the `(scheme, identifier)` pair to allow quick lookup by moniker. + +| id | scheme | identifier | documentPath | range | +| --- | ------ | -------------- | ------------ | ------------ | +| 1 | npm | sample:foo | foo.ts | 0:0 to 0:0 | +| 2 | npm | sample:foo | bar.ts | 0:20 to 0:27 | +| 3 | npm | sample:bar | bar.ts | 0:0 to 0:0 | +| 4 | npm | sample:foo:foo | foo.ts | 0:16 to 0:19 | +| 5 | npm | sample:foo:foo | bar.ts | 0:9 to 0:12 | +| 6 | npm | sample:foo:foo | bar.ts | 3:9 to 3:12 | +| 7 | npm | sample:foo:foo | bar.ts | 3:13 to 3:16 | +| 8 | npm | sample:bar:bar | bar.ts | 2:16 to 2:19 | + +THe row with ids `4` through `7` correlate the `npm` moniker for the `foo` function with its references: the definition in `foo.ts`, its import in `bar.ts`, and its two uses in `bar.ts`, respectively. diff --git a/lsif/package.json b/lsif/package.json index 15e2fa96ae073..1f96201a09b75 100644 --- a/lsif/package.json +++ b/lsif/package.json @@ -13,8 +13,8 @@ "lsif-protocol": "0.4.3", "mz": "^2.7.0", "relateurl": "^0.2.7", + "rimraf": "^3.0.0", "sqlite3": "^4.1.0", - "temp": "^0.9.0", "typeorm": "^0.2.18", "vscode-languageserver": "^5.2.1", "yallist": "^3.0.3" @@ -28,8 +28,8 @@ "@types/lodash": "^4.14.138", "@types/mz": "0.0.32", "@types/relateurl": "^0.2.28", + "@types/rimraf": "^2.0.2", "@types/sinon": "^7.0.13", - "@types/temp": "^0.8.34", "@types/yallist": "^3.0.1", "babel-jest": "^24.8.0", "jest": "^24.8.0", diff --git a/lsif/src/backend.ts b/lsif/src/backend.ts index c14759bc78fdf..0acb18ae26bdc 100644 --- a/lsif/src/backend.ts +++ b/lsif/src/backend.ts @@ -1,15 +1,14 @@ -import * as path from 'path' import * as fs from 'mz/fs' +import * as path from 'path' import * as readline from 'mz/readline' +import { ConnectionCache, DocumentCache, ResultChunkCache } from './cache' import { Database } from './database' +import { DefinitionModel, DocumentModel, MetaModel, ReferenceModel, ResultChunkModel } from './models.database' +import { Edge, Vertex } from 'lsif-protocol' import { hasErrorCode } from './util' import { importLsif } from './importer' -import { XrepoDatabase } from './xrepo' import { Readable } from 'stream' -import { ConnectionCache, DocumentCache } from './cache' -import { DefModel, MetaModel, RefModel, DocumentModel } from './models' -import { Edge, Vertex } from 'lsif-protocol' -import { EntityManager } from 'typeorm' +import { XrepoDatabase } from './xrepo' export const ERRNOLSIFDATA = 'NoLSIFDataError' @@ -27,12 +26,13 @@ export class NoLSIFDataError extends Error { /** * Backend for LSIF dumps stored in SQLite. */ -export class SQLiteBackend { +export class Backend { constructor( private storageRoot: string, private xrepoDatabase: XrepoDatabase, private connectionCache: ConnectionCache, - private documentCache: DocumentCache + private documentCache: DocumentCache, + private resultChunkCache: ResultChunkCache ) {} /** @@ -56,8 +56,12 @@ export class SQLiteBackend { const { packages, references } = await this.connectionCache.withTransactionalEntityManager( outFile, - [DefModel, DocumentModel, MetaModel, RefModel], - (entityManager: EntityManager) => importLsif(entityManager, parseLines(readline.createInterface({ input }))) + [DefinitionModel, DocumentModel, MetaModel, ReferenceModel, ResultChunkModel], + entityManager => importLsif(entityManager, parseLines(readline.createInterface({ input }))), + async connection => { + await connection.query('PRAGMA synchronous = OFF') + await connection.query('PRAGMA journal_mode = OFF') + } ) // These needs to be done in sequence as SQLite can only have one @@ -90,6 +94,7 @@ export class SQLiteBackend { this.xrepoDatabase, this.connectionCache, this.documentCache, + this.resultChunkCache, repository, commit, file @@ -132,8 +137,9 @@ async function* parseLines(lines: AsyncIterable): AsyncIterable { + documentCache: DocumentCache, + resultChunkCache: ResultChunkCache +): Promise { try { await fs.mkdir(storageRoot) } catch (e) { @@ -152,5 +158,11 @@ export async function createBackend( } } - return new SQLiteBackend(storageRoot, new XrepoDatabase(connectionCache, filename), connectionCache, documentCache) + return new Backend( + storageRoot, + new XrepoDatabase(connectionCache, filename), + connectionCache, + documentCache, + resultChunkCache + ) } diff --git a/lsif/src/cache.test.ts b/lsif/src/cache.test.ts index e9c678af4d12d..dd71e2e47fe60 100644 --- a/lsif/src/cache.test.ts +++ b/lsif/src/cache.test.ts @@ -25,7 +25,7 @@ describe('GenericCache', () => { factory.onCall(i).returns(Promise.resolve(value)) } - const cache = new GenericCache(5, () => 1, () => Promise.resolve()) + const cache = new GenericCache(5, () => 1, () => {}) for (const value of values) { const returnValue = await cache.withValue(value, () => factory(value), v => Promise.resolve(v)) expect(returnValue).toBe(value) @@ -40,7 +40,7 @@ describe('GenericCache', () => { const { wait, done } = createBarrierPromise() factory.returns(wait.then(() => 'bar')) - const cache = new GenericCache(5, () => 1, () => Promise.resolve()) + const cache = new GenericCache(5, () => 1, () => {}) const p1 = cache.withValue('foo', factory, v => Promise.resolve(v)) const p2 = cache.withValue('foo', factory, v => Promise.resolve(v)) const p3 = cache.withValue('foo', factory, v => Promise.resolve(v)) @@ -59,11 +59,7 @@ describe('GenericCache', () => { ] const { wait, done } = createBarrierPromise() - const disposer = sinon.spy(() => { - done() - return Promise.resolve() - }) - + const disposer = sinon.spy(done) const cache = new GenericCache(2, () => 1, disposer) for (const value of values) { @@ -89,7 +85,7 @@ describe('GenericCache', () => { factory.onCall(i).returns(Promise.resolve(value)) } - const cache = new GenericCache(5, v => v, () => Promise.resolve()) + const cache = new GenericCache(5, v => v, () => {}) for (const value of values) { await cache.withValue(value, () => factory(value), v => Promise.resolve(v)) } @@ -99,11 +95,7 @@ describe('GenericCache', () => { it('should not evict referenced cache entries', async () => { const { wait, done } = createBarrierPromise() - const disposer = sinon.spy(() => { - done() - return Promise.resolve() - }) - + const disposer = sinon.spy(done) const cache = new GenericCache(5, () => 1, disposer) const fooResolver = () => Promise.resolve('foo') @@ -145,11 +137,7 @@ describe('GenericCache', () => { it('should dispose busted keys', async () => { const { wait, done } = createBarrierPromise() - const disposer = sinon.spy(() => { - done() - return Promise.resolve() - }) - + const disposer = sinon.spy(done) const cache = new GenericCache(5, () => 1, disposer) const factory = sinon.stub>() @@ -173,11 +161,7 @@ describe('GenericCache', () => { const { wait: wait2, done: done2 } = createBarrierPromise() const resolver = () => Promise.resolve('foo') - const disposer = sinon.spy(() => { - done1() - return Promise.resolve() - }) - + const disposer = sinon.spy(done1) const cache = new GenericCache(5, () => 1, disposer) // Create a cache entry for 'foo' that blocks on done2 diff --git a/lsif/src/cache.ts b/lsif/src/cache.ts index e658fc6a65187..4ab54fc5576ca 100644 --- a/lsif/src/cache.ts +++ b/lsif/src/cache.ts @@ -1,6 +1,5 @@ import { Connection, createConnection, EntityManager } from 'typeorm' -import { DocumentData } from './entities' -import { Id } from 'lsif-protocol' +import { DocumentData, ResultChunkData } from './models.database' import Yallist from 'yallist' /** @@ -25,9 +24,8 @@ interface CacheEntry { size: number /** - * The number of active withValue calls referencing this entry. - * If this value is non-zero, it should not be evict-able from the - * cache. + * The number of active withValue calls referencing this entry. If + * this value is non-zero, it is not evict-able from the cache. */ readers: number @@ -73,7 +71,7 @@ export class GenericCache { constructor( private max: number, private sizeFunction: (value: V) => number, - private disposeFunction: (value: V) => Promise + private disposeFunction: (value: V) => Promise | void ) {} /** @@ -91,12 +89,6 @@ export class GenericCache { // Find or create the entry const entry = await this.getEntry(key, factory) - // Increase the number of readers currently looking at this value. - // While this value is not equal to zero, it wll not be skipped over - // on the cache eviction pass. - - entry.readers++ - try { // Re-resolve the promise. If this is already resolved it's a fast // no-op. Otherwise, we got a cache entry that was under-construction @@ -147,7 +139,7 @@ export class GenericCache { const value = await promise - if (readers !== 0) { + if (readers > 0) { // There's someone holding the cache value. Create a barrier promise // and stash the function that can unlock it. When the reader count // for an entry is decremented, the waiter function, if present, is @@ -178,19 +170,21 @@ export class GenericCache { if (node) { // Found, move to head of list this.lruList.unshiftNode(node) - return node.value + const entry = node.value + // Ensure entry is locked before returning + entry.readers++ + return entry } - // Create promise and the entry that wraps it. We don't know - // the effective size of the value until the promise resolves, - // so we put zero. We have a reader count of -1, which is the - // value that denotes that the cache entry is currently under - // construction. We don't want to block here while waiting for - // the promise value to resolve, otherwise a second request for + // Create promise and the entry that wraps it. We don't know the effective + // size of the value until the promise resolves, so we put zero. We have a + // reader count of 1, in order to lock the entry until after the user that + // requested the entry is done using it. We don't want to block here while + // waiting for the promise value to resolve, otherwise a second request for // the same key will create a duplicate cache entry. const promise = factory() - const newEntry = { key, promise, size: 0, readers: -1, waiter: undefined } + const newEntry = { key, promise, size: 0, readers: 1, waiter: undefined } // Add to head of list this.lruList.unshift(newEntry) @@ -209,12 +203,6 @@ export class GenericCache { const value = await promise await this.resolved(newEntry, value) - - // Remove the under-construction value from the reader count. - // Callers of this method end up incrementing this value again - // a second time before calling a user callback function. - newEntry.readers++ - return newEntry } @@ -240,12 +228,10 @@ export class GenericCache { } = node if (readers === 0) { - // If readers < 0, then we're under construction and we - // don't have anything yet to discard. If readers > 0, then - // it may be actively used by another part of the code that - // hit a portion of their critical section that returned - // control to the event loop. We don't want to mess with - // those if we can help it. + // If readers > 0, then it may be actively used by another + // part of the code that hit a portion of their critical + // section that returned control to the event loop. We don't + // want to mess with those if we can help it. this.removeNode(node, size) await this.disposeFunction(await promise) @@ -322,50 +308,87 @@ export class ConnectionCache extends GenericCache { * @param database The database filename. * @param entities The set of expected entities present in this schema. * @param callback The function invoke with a SQLite transaction connection. + * @param pragmaHook The function called with connection before the transaction begins. */ public withTransactionalEntityManager( database: string, // Decorators are not possible type check // eslint-disable-next-line @typescript-eslint/ban-types entities: Function[], - callback: (entityManager: EntityManager) => Promise + callback: (entityManager: EntityManager) => Promise, + pragmaHook?: (connection: Connection) => Promise ): Promise { - return this.withConnection(database, entities, connection => connection.transaction(em => callback(em))) + return this.withConnection(database, entities, async connection => { + if (pragmaHook) { + await pragmaHook(connection) + } + + return await connection.transaction(callback) + }) } } /** - * A cache of deserialized `DocumentData` values indexed by their Identifer. + * A wrapper around a cache value that retains its encoded size. In order to keep + * the in-memory limit of these decoded items, we use this value as the cache entry + * size. This assumes that the size of the encoded text is a good proxy for the size + * of the in-memory representation. */ -export class DocumentCache extends GenericCache { +export interface EncodedJsonCacheValue { /** - * Create a new `DocumentCache` with the given maximum (soft) size for + * The size of the encoded value. + */ + size: number + + /** + * The decoded value. + */ + data: T +} + +/** + * A cache of decoded values encoded as JSON and gzipped in a SQLite database. + */ +class EncodedJsonCache extends GenericCache> { + /** + * Create a new `EncodedJsonCache` with the given maximum (soft) size for * all items in the cache. */ constructor(max: number) { super( max, - // TODO - determine memory size - () => 1, + v => v.size, // Let GC handle the cleanup of the object on cache eviction. (): Promise => Promise.resolve() ) } +} +/** + * A cache of deserialized `DocumentData` values indexed by a string containing + * the database path and the path of the document. + */ +export class DocumentCache extends EncodedJsonCache { /** - * Invoke `callback` with document value obtained from the cache - * cache or created on cache miss. - * - * @param documentId The identifier of the document. - * @param factory The function used to create a document. - * @param callback The function invoked with the document. + * Create a new `DocumentCache` with the given maximum (soft) size for + * all items in the cache. */ - public withDocument( - documentId: Id, - factory: () => Promise, - callback: (document: DocumentData) => Promise - ): Promise { - return this.withValue(documentId, factory, callback) + constructor(max: number) { + super(max) + } +} + +/** + * A cache of deserialized `ResultChunkData` values indexed by a string containing + * the database path and the chunk index. + */ +export class ResultChunkCache extends EncodedJsonCache { + /** + * Create a new `ResultChunkCache` with the given maximum (soft) size for + * all items in the cache. + */ + constructor(max: number) { + super(max) } } diff --git a/lsif/src/correlator.test.ts b/lsif/src/correlator.test.ts new file mode 100644 index 0000000000000..1dbdbf0cd653e --- /dev/null +++ b/lsif/src/correlator.test.ts @@ -0,0 +1,16 @@ +import { normalizeHover } from './correlator' + +describe('normalizeHover', () => { + it('should handle all lsp.Hover types', () => { + expect(normalizeHover({ contents: 'foo' })).toEqual('foo') + expect(normalizeHover({ contents: { language: 'typescript', value: 'bar' } })).toEqual( + '```typescript\nbar\n```' + ) + expect(normalizeHover({ contents: { kind: 'markdown', value: 'baz' } })).toEqual('baz') + expect( + normalizeHover({ + contents: ['foo', { language: 'typescript', value: 'bar' }], + }) + ).toEqual('foo\n\n---\n\n```typescript\nbar\n```') + }) +}) diff --git a/lsif/src/correlator.ts b/lsif/src/correlator.ts new file mode 100644 index 0000000000000..bd5da7a551191 --- /dev/null +++ b/lsif/src/correlator.ts @@ -0,0 +1,458 @@ +import RelateUrl from 'relateurl' +import { mustGet, mustGetFromEither } from './util' +import { DefaultMap } from './default-map' +import { Hover, MarkupContent } from 'vscode-languageserver-types' +import { + MonikerData, + PackageInformationData, + RangeData, + MonikerId, + HoverResultId, + ReferenceResultId, + DefinitionResultId, + DocumentId, + PackageInformationId, +} from './models.database' +import { + Id, + VertexLabels, + EdgeLabels, + Vertex, + Edge, + MonikerKind, + ItemEdgeProperties, + moniker, + next, + nextMoniker, + textDocument_definition, + textDocument_hover, + textDocument_references, + packageInformation, + item, + MetaData, + ElementTypes, + contains, + RangeId, +} from 'lsif-protocol' + +/** + * Identifiers of result set vertices. + */ +export type ResultSetId = Id + +/** + * An internal representation of a result set vertex. This is only used during + * correlation and import as we flatten this data into the range vertices for + * faster queries. + */ +export interface ResultSetData { + /** + * The identifier of the definition result attached to this result set. + */ + definitionResultId?: DefinitionResultId + + /** + * The identifier of the reference result attached to this result set. + */ + referenceResultId?: ReferenceResultId + + /** + * The identifier of the hover result attached to this result set. + */ + hoverResultId?: HoverResultId + + /** + * The set of moniker identifiers directly attached to this result set. + */ + monikerIds: Set +} + +/** + * Common state around the conversion of a single LSIF dump upload. This class + * receives the parsed vertex or edge, line by line and adds it into an in-memory + * adjacency-list graph structure that is later processed and converted into a + * SQLite database on disk. + */ +export class Correlator { + /** + * The LSIF version of the input. This is extracted from the metadata vertex at + * the beginning of processing. + */ + public lsifVersion?: string + + /** + * The root of all document URIs. This is extracted from the metadata vertex at + * the beginning of processing. + */ + public projectRoot?: URL + + // Vertex data + public documentPaths = new Map() + public rangeData = new Map() + public resultSetData = new Map() + public hoverData = new Map() + public monikerData = new Map() + public packageInformationData = new Map() + + // Edge data + public nextData = new Map() + public containsData = new Map>() + public definitionData = new Map>() + public referenceData = new Map>() + + /** + * A mapping for the relation from moniker to the set of monikers that they are related + * to via nextMoniker edges. This relation is symmetric such that if `a` is in + * `MonikerSets[b]`, then `b` is in `monikerSets[a]`. + */ + public monikerSets = new DefaultMap>(() => new Set()) + + /** + * The set of exported moniker identifiers that have package information attached. + */ + public importedMonikers = new Set() + + /** + * The set of exported moniker identifiers that have package information attached. + */ + public exportedMonikers = new Set() + + /** + * Process a single vertex or edge. + * + * @param element A vertex or edge element from the LSIF dump. + */ + public insert(element: Vertex | Edge): void { + if (element.type === ElementTypes.vertex) { + switch (element.label) { + case VertexLabels.metaData: + this.handleMetaData(element) + break + + case VertexLabels.document: { + if (!this.projectRoot) { + throw new Error('No metadata defined.') + } + + const path = RelateUrl.relate(this.projectRoot.href + '/', new URL(element.uri).href, { + defaultPorts: {}, + output: RelateUrl.PATH_RELATIVE, + removeRootTrailingSlash: false, + }) + + this.documentPaths.set(element.id, path) + this.containsData.set(element.id, new Set()) + break + } + + // The remaining vertex handlers stash data into an appropriate map. This data + // may be retrieved when an edge that references it is seen, or when a document + // is finalized. + + case VertexLabels.range: + this.rangeData.set(element.id, { + startLine: element.start.line, + startCharacter: element.start.character, + endLine: element.end.line, + endCharacter: element.end.character, + monikerIds: new Set(), + }) + break + + case VertexLabels.resultSet: + this.resultSetData.set(element.id, { monikerIds: new Set() }) + break + + case VertexLabels.definitionResult: + this.definitionData.set(element.id, new DefaultMap(() => [])) + break + + case VertexLabels.referenceResult: + this.referenceData.set(element.id, new DefaultMap(() => [])) + break + + case VertexLabels.hoverResult: + this.hoverData.set(element.id, normalizeHover(element.result)) + break + + case VertexLabels.moniker: + this.monikerData.set(element.id, { + kind: element.kind || MonikerKind.local, + scheme: element.scheme, + identifier: element.identifier, + }) + break + + case VertexLabels.packageInformation: + this.packageInformationData.set(element.id, { + name: element.name, + version: element.version || null, + }) + break + } + } + + if (element.type === ElementTypes.edge) { + switch (element.label) { + case EdgeLabels.contains: + this.handleContains(element) + break + + case EdgeLabels.next: + this.handleNextEdge(element) + break + + case EdgeLabels.item: + this.handleItemEdge(element) + break + + case EdgeLabels.textDocument_definition: + this.handleDefinitionEdge(element) + break + + case EdgeLabels.textDocument_references: + this.handleReferenceEdge(element) + break + + case EdgeLabels.textDocument_hover: + this.handleHoverEdge(element) + break + + case EdgeLabels.moniker: + this.handleMonikerEdge(element) + break + + case EdgeLabels.nextMoniker: + this.handleNextMonikerEdge(element) + break + + case EdgeLabels.packageInformation: + this.handlePackageInformationEdge(element) + break + } + } + } + + // + // Vertex Handlers + + /** + * This should be the first vertex seen. Extract the project root so we + * can create relative paths for documents and cache the LSIF protocol + * version that we will later insert into he metadata table. + * + * @param vertex The metadata vertex. + */ + private handleMetaData(vertex: MetaData): void { + this.lsifVersion = vertex.version + this.projectRoot = new URL(vertex.projectRoot) + } + + // + // Edge Handlers + + /** + * Add range data ids into the document in which they are contained. Ensures + * all referenced vertices are defined. + * + * @param edge The contains edge. + */ + private handleContains(edge: contains): void { + // Do not track project contains + if (!this.documentPaths.has(edge.outV)) { + return + } + + const set = mustGet(this.containsData, edge.outV, 'contains') + for (const inV of edge.inVs) { + mustGet(this.rangeData, inV, 'range') + set.add(inV) + } + } + + /** + * Update definition and reference fields from an item edge. Ensures all + * referenced vertices are defined. + * + * @param edge The item edge. + */ + private handleItemEdge(edge: item): void { + switch (edge.property) { + // `item` edges with a `property` refer to a referenceResult + case ItemEdgeProperties.definitions: + case ItemEdgeProperties.references: { + const documentMap = mustGet(this.referenceData, edge.outV, 'referenceResult') + const rangeIds = documentMap.getOrDefault(edge.document) + for (const inV of edge.inVs) { + mustGet(this.rangeData, inV, 'range') + rangeIds.push(inV) + } + + break + } + + // `item` edges without a `property` refer to a definitionResult + case undefined: { + const documentMap = mustGet(this.definitionData, edge.outV, 'definitionResult') + const rangeIds = documentMap.getOrDefault(edge.document) + for (const inV of edge.inVs) { + mustGet(this.rangeData, inV, 'range') + rangeIds.push(inV) + } + + break + } + } + } + + /** + * Attaches the specified moniker to the specified range or result set. Ensures all referenced + * vertices are defined. + * + * @param edge The moniker edge. + */ + private handleMonikerEdge(edge: moniker): void { + const source = mustGetFromEither( + this.rangeData, + this.resultSetData, + edge.outV, + 'range/resultSet' + ) + + mustGet(this.monikerData, edge.inV, 'moniker') + source.monikerIds = new Set([edge.inV]) + } + + /** + * Sets the next field of the specified range or result set. Ensures all referenced vertices + * are defined. + * + * @param edge The next edge. + */ + private handleNextEdge(edge: next): void { + mustGetFromEither( + this.rangeData, + this.resultSetData, + edge.outV, + 'range/resultSet' + ) + + mustGet(this.resultSetData, edge.inV, 'resultSet') + this.nextData.set(edge.outV, edge.inV) + } + + /** + * Correlates monikers together so that when one moniker is queried, each correlated moniker + * is also returned as a strongly connected set. Ensures all referenced vertices are defined. + * + * @param edge The nextMoniker edge. + */ + private handleNextMonikerEdge(edge: nextMoniker): void { + mustGet(this.monikerData, edge.inV, 'moniker') + mustGet(this.monikerData, edge.outV, 'moniker') + this.monikerSets.getOrDefault(edge.inV).add(edge.outV) // Forward direction + this.monikerSets.getOrDefault(edge.outV).add(edge.inV) // Backwards direction + } + + /** + * Sets the package information of the specified moniker. If the moniker is an export moniker, + * then the package information will also be returned as an exported package by the `finalize` + * method. Ensures all referenced vertices are defined. + * + * @param edge The packageInformation edge. + */ + private handlePackageInformationEdge(edge: packageInformation): void { + const source = mustGet(this.monikerData, edge.outV, 'moniker') + mustGet(this.packageInformationData, edge.inV, 'packageInformation') + source.packageInformationId = edge.inV + + if (source.kind === 'export') { + this.exportedMonikers.add(edge.outV) + } + + if (source.kind === 'import') { + this.importedMonikers.add(edge.outV) + } + } + + /** + * Sets the definition result of the specified range or result set. Ensures all referenced + * vertices are defined. + * + * @param edge The textDocument/definition edge. + */ + private handleDefinitionEdge(edge: textDocument_definition): void { + const outV = mustGetFromEither( + this.rangeData, + this.resultSetData, + edge.outV, + 'range/resultSet' + ) + + mustGet(this.definitionData, edge.inV, 'definitionResult') + outV.definitionResultId = edge.inV + } + + /** + * Sets the hover result of the specified range or result set. Ensures all referenced + * vertices are defined. + * + * @param edge The textDocument/hover edge. + */ + private handleHoverEdge(edge: textDocument_hover): void { + const outV = mustGetFromEither( + this.rangeData, + this.resultSetData, + edge.outV, + 'range/resultSet' + ) + + mustGet(this.hoverData, edge.inV, 'hoverResult') + outV.hoverResultId = edge.inV + } + + /** + * Sets the reference result of the specified range or result set. Ensures all + * referenced vertices are defined. + * + * @param edge The textDocument/references edge. + */ + private handleReferenceEdge(edge: textDocument_references): void { + const outV = mustGetFromEither( + this.rangeData, + this.resultSetData, + edge.outV, + 'range/resultSet' + ) + + mustGet(this.referenceData, edge.inV, 'referenceResult') + outV.referenceResultId = edge.inV + } +} + +/** + * Normalize an LSP hover object into a string. + * + * @param hover The hover object. + */ +export function normalizeHover(hover: Hover): string { + const normalizeContent = (content: string | MarkupContent | { language: string; value: string }): string => { + if (typeof content === 'string') { + return content + } + + if (MarkupContent.is(content)) { + return content.value + } + + const tick = '```' + return `${tick}${content.language}\n${content.value}\n${tick}` + } + + const separator = '\n\n---\n\n' + const contents = Array.isArray(hover.contents) ? hover.contents : [hover.contents] + return contents + .map(c => normalizeContent(c).trim()) + .filter(s => s) + .join(separator) +} diff --git a/lsif/src/database.test.ts b/lsif/src/database.test.ts index 380168ae2b77d..e9dc22c65e9d9 100644 --- a/lsif/src/database.test.ts +++ b/lsif/src/database.test.ts @@ -1,12 +1,11 @@ import * as lsp from 'vscode-languageserver-protocol' -import { asLocations, comparePosition, findMonikers, findRange, findResult, makeRemoteUri, walkChain } from './database' -import { Id, MonikerKind } from 'lsif-protocol' -import { MonikerData, RangeData, ResultSetData } from './entities' +import { comparePosition, getRangeByPosition, createRemoteUri, mapRangesToLocations } from './database' +import { RangeData, RangeId, MonikerId } from './models.database' import { range } from 'lodash' -describe('findRange', () => { +describe('getRangeByPosition', () => { it('should find all ranges in list', () => { - // Generate starting characters for each range. Thse neds to be + // Generate starting characters for each range. These need to be // spread wide enough so that the ranges on each line don't touch. const characters = range(0, 10000, 5) @@ -16,123 +15,86 @@ describe('findRange', () => { const c2 = characters[(i - 1) * 2 + 1] // Generate two ranges on each line - ranges.push({ startLine: i, startCharacter: c1, endLine: i, endCharacter: c1 + 3, monikers: [] }) - ranges.push({ startLine: i, startCharacter: c2, endLine: i, endCharacter: c2 + 3, monikers: [] }) + ranges.push({ + startLine: i, + startCharacter: c1, + endLine: i, + endCharacter: c1 + 3, + monikerIds: new Set(), + }) + ranges.push({ + startLine: i, + startCharacter: c2, + endLine: i, + endCharacter: c2 + 3, + monikerIds: new Set(), + }) } for (const range of ranges) { // search for midpoint of each range const c = (range.startCharacter + range.endCharacter) / 2 - expect(findRange(ranges, { line: range.startLine, character: c })).toEqual(range) + expect(getRangeByPosition(ranges, { line: range.startLine, character: c })).toEqual(range) } for (let i = 1; i <= 1000; i++) { // search for the empty space between ranges on a line const c = characters[(i - 1) * 2 + 1] - 1 - expect(findRange(ranges, { line: i, character: c })).toBeUndefined() + expect(getRangeByPosition(ranges, { line: i, character: c })).toBeUndefined() } }) }) -describe('findResult', () => { - it('should should find results via next chain', () => { - const resultSets = new Map() - resultSets.set(1, { monikers: [42], next: 3 }) - resultSets.set(2, { monikers: [43], definitionResult: 25 }) - resultSets.set(3, { monikers: [44], next: 2, definitionResult: 50 }) - resultSets.set(4, { monikers: [44] }) - - const range = { - startLine: 5, - startCharacter: 11, - endLine: 11, - endCharacter: 13, - monikers: [41], - next: 1, - } - - const map = new Map() - map.set(50, 'foo') - map.set(25, 'bar') - - expect(findResult(resultSets, map, range, 'definitionResult')).toEqual('foo') - expect(findResult(resultSets, map, resultSets.get(2)!, 'definitionResult')).toEqual('bar') - expect(findResult(resultSets, map, resultSets.get(4)!, 'definitionResult')).toBeUndefined() - }) -}) - -describe('findMonikers', () => { - it('should should find monikers via next chain', () => { - const resultSets = new Map() - resultSets.set(1, { monikers: [42], next: 3 }) - resultSets.set(2, { monikers: [43, 50] }) - resultSets.set(3, { monikers: [44], next: 2 }) - +describe('comparePosition', () => { + it('should return the relative order to a range', () => { const range = { startLine: 5, startCharacter: 11, - endLine: 11, + endLine: 5, endCharacter: 13, - monikers: [41], - next: 1, + monikerIds: new Set(), } - const map = new Map() - map.set(41, { kind: MonikerKind.local, scheme: '', identifier: 'foo' }) - map.set(42, { kind: MonikerKind.local, scheme: '', identifier: 'foo' }) - map.set(44, { kind: MonikerKind.local, scheme: '', identifier: 'bar' }) - map.set(43, { kind: MonikerKind.local, scheme: '', identifier: 'bonk' }) - map.set(50, { kind: MonikerKind.local, scheme: '', identifier: 'quux' }) - - expect(findMonikers(resultSets, map, range)).toEqual([ - map.get(41), - map.get(42), - map.get(44), - map.get(43), - map.get(50), - ]) + expect(comparePosition(range, { line: 5, character: 11 })).toEqual(0) + expect(comparePosition(range, { line: 5, character: 12 })).toEqual(0) + expect(comparePosition(range, { line: 5, character: 13 })).toEqual(0) + expect(comparePosition(range, { line: 4, character: 12 })).toEqual(+1) + expect(comparePosition(range, { line: 5, character: 10 })).toEqual(+1) + expect(comparePosition(range, { line: 5, character: 14 })).toEqual(-1) + expect(comparePosition(range, { line: 6, character: 12 })).toEqual(-1) }) }) -describe('walkChain', () => { - it('should yield result sets in order', () => { - const resultSets = new Map() - resultSets.set(1, { monikers: [42], next: 3 }) - resultSets.set(2, { monikers: [43, 50] }) - resultSets.set(3, { monikers: [44], next: 2 }) - - const range = { - startLine: 5, - startCharacter: 11, - endLine: 11, - endCharacter: 13, - monikers: [41], - next: 1, +describe('createRemoteUri', () => { + it('should generate a URI to another project', () => { + const pkg = { + id: 0, + scheme: '', + name: '', + version: '', + repository: 'github.com/sourcegraph/codeintellify', + commit: 'deadbeef', } - expect(Array.from(walkChain(resultSets, range))).toEqual([ - range, - resultSets.get(1), - resultSets.get(3), - resultSets.get(2), - ]) + const uri = createRemoteUri(pkg, 'src/position.ts') + expect(uri).toEqual('git://github.com/sourcegraph/codeintellify?deadbeef#src/position.ts') }) }) -describe('asLocations', () => { - it('should convert valid locations', () => { - const ranges = new Map() +describe('mapRangesToLocations', () => { + it('should map ranges to locations', () => { + const ranges = new Map() ranges.set(1, 0) ranges.set(2, 2) ranges.set(4, 1) const orderedRanges = [ - { startLine: 1, startCharacter: 1, endLine: 1, endCharacter: 2, monikers: [] }, - { startLine: 2, startCharacter: 1, endLine: 2, endCharacter: 2, monikers: [] }, - { startLine: 3, startCharacter: 1, endLine: 3, endCharacter: 2, monikers: [] }, + { startLine: 1, startCharacter: 1, endLine: 1, endCharacter: 2, monikerIds: new Set() }, + { startLine: 2, startCharacter: 1, endLine: 2, endCharacter: 2, monikerIds: new Set() }, + { startLine: 3, startCharacter: 1, endLine: 3, endCharacter: 2, monikerIds: new Set() }, ] - expect(asLocations(ranges, orderedRanges, 'src/position.ts', [1, 2, 3, 4])).toEqual([ + expect(mapRangesToLocations(ranges, orderedRanges, 'src/position.ts', [1, 2, 4])).toEqual([ lsp.Location.create('src/position.ts', { start: { line: 1, character: 1 }, end: { line: 1, character: 2 }, @@ -148,38 +110,3 @@ describe('asLocations', () => { ]) }) }) - -describe('makeRemoteUri', () => { - it('should generate a URI to another project', () => { - const pkg = { - id: 0, - scheme: '', - name: '', - version: '', - repository: 'github.com/sourcegraph/codeintellify', - commit: 'deadbeef', - } - - const uri = makeRemoteUri(pkg, 'src/position.ts') - expect(uri).toEqual('git://github.com/sourcegraph/codeintellify?deadbeef#src/position.ts') - }) -}) - -describe('comparePosition', () => { - it('should return the relative order to a range', () => { - const range = { - startLine: 5, - startCharacter: 11, - endLine: 5, - endCharacter: 13, - } - - expect(comparePosition(range, { line: 5, character: 11 })).toEqual(0) - expect(comparePosition(range, { line: 5, character: 12 })).toEqual(0) - expect(comparePosition(range, { line: 5, character: 13 })).toEqual(0) - expect(comparePosition(range, { line: 4, character: 12 })).toEqual(+1) - expect(comparePosition(range, { line: 5, character: 10 })).toEqual(+1) - expect(comparePosition(range, { line: 5, character: 14 })).toEqual(-1) - expect(comparePosition(range, { line: 6, character: 12 })).toEqual(-1) - }) -}) diff --git a/lsif/src/database.ts b/lsif/src/database.ts index bcce643098ba5..e176a7306af0f 100644 --- a/lsif/src/database.ts +++ b/lsif/src/database.ts @@ -1,18 +1,40 @@ import * as lsp from 'vscode-languageserver-protocol' -import { groupBy, isEqual, uniqWith } from 'lodash' -import { DocumentModel, DefModel, MetaModel, RefModel, PackageModel } from './models' +import { mustGet, hashKey } from './util' import { Connection } from 'typeorm' +import { ConnectionCache, DocumentCache, EncodedJsonCacheValue, ResultChunkCache } from './cache' import { decodeJSON } from './encoding' -import { MonikerData, RangeData, ResultSetData, DocumentData, FlattenedRange } from './entities' -import { Id } from 'lsif-protocol' +import { DefaultMap } from './default-map' +import { + DefinitionModel, + DocumentData, + DocumentModel, + MetaModel, + MonikerData, + RangeData, + ReferenceModel, + ResultChunkData, + ResultChunkModel, + DocumentPathRangeId, + OrderedRanges, + Ix, + DefinitionReferenceResultId, + RangeId, +} from './models.database' +import { isEqual, uniqWith } from 'lodash' import { makeFilename } from './backend' +import { PackageModel } from './models.xrepo' import { XrepoDatabase } from './xrepo' -import { ConnectionCache, DocumentCache } from './cache' /** * A wrapper around operations for single repository/commit pair. */ export class Database { + /** + * A static map of database paths to the `numResultChunks` value of their + * metadata row. This map is populated lazily as the values are needed. + */ + private static numResultChunks = new Map() + /** * Create a new `Database` with the given cross-repo database instance and the * filename of the database that contains data for a particular repository/commit. @@ -20,7 +42,8 @@ export class Database { * @param storageRoot The path where SQLite databases are stored. * @param xrepoDatabase The cross-repo database. * @param connectionCache The cache of SQLite connections. - * @param documentCache The cache of loaded document. + * @param documentCache The cache of loaded documents. + * @param resultChunkCache The cache of loaded result chunks. * @param repository The repository for which this database answers queries. * @param commit The commit for which this database answers queries. * @param databasePath The path to the database file. @@ -30,6 +53,7 @@ export class Database { private xrepoDatabase: XrepoDatabase, private connectionCache: ConnectionCache, private documentCache: DocumentCache, + private resultChunkCache: ResultChunkCache, private repository: string, private commit: string, private databasePath: string @@ -41,7 +65,7 @@ export class Database { * @param path The path of the document. */ public async exists(path: string): Promise { - return (await this.findDocument(path)) !== undefined + return (await this.getDocumentByPath(path)) !== undefined } /** @@ -51,7 +75,7 @@ export class Database { * @param position The current hover position. */ public async definitions(path: string, position: lsp.Position): Promise { - const { document, range } = await this.findRange(path, position) + const { document, range } = await this.getRangeByPosition(path, position) if (!document || !range) { return null } @@ -59,10 +83,17 @@ export class Database { // First, we try to find the definition result attached to the range or one // of the result sets to which the range is attached. - const resultData = findResult(document.resultSets, document.definitionResults, range, 'definitionResult') - if (resultData) { + if (range.definitionResultId) { // We have a definition result in this database. - return await this.getLocations(path, document, resultData) + const definitionResults = await this.getResultById(range.definitionResultId) + + // TODO - due to some bugs in tsc... this fixes the tests and some typescript examples + // Not sure of a better way to do this right now until we work through how to patch + // lsif-tsc to handle node_modules inclusion (or somehow blacklist it on import). + + if (!definitionResults.some(v => v.documentPath.includes('node_modules'))) { + return await this.convertRangesToLspLocations(path, document, definitionResults) + } } // Otherwise, we fall back to a moniker search. We get all the monikers attached @@ -70,26 +101,26 @@ export class Database { // moniker sequentially in order of priority, where import monikers, if any exist, // will be processed first. - for (const moniker of findMonikers(document.resultSets, document.monikers, range)) { + for (const moniker of sortMonikers( + Array.from(range.monikerIds).map(id => mustGet(document.monikers, id, 'moniker')) + )) { if (moniker.kind === 'import') { // This symbol was imported from another database. See if we have xrepo // definition for it. - const defs = await this.remoteDefinitions(document, moniker) - if (defs) { - return defs + const remoteDefinitions = await this.remoteDefinitions(document, moniker) + if (remoteDefinitions) { + return remoteDefinitions + } + } else { + // This symbol was not imported from another database. We search the definitions + // table of our own database in case there was a definition that wasn't properly + // attached to a result set but did have the correct monikers attached. + + const localDefinitions = await Database.monikerResults(this, DefinitionModel, moniker, path => path) + if (localDefinitions) { + return localDefinitions } - - continue - } - - // This symbol was not imported from another database. We search the Defs table - // of our own database in case there was a definition that wasn't properly - // attached to a result set but did have the correct monikers attached. - - const defs = await Database.monikerResults(this, DefModel, moniker, path => path) - if (defs) { - return defs } } @@ -103,86 +134,63 @@ export class Database { * @param position The current hover position. */ public async references(path: string, position: lsp.Position): Promise { - const { document, range } = await this.findRange(path, position) + const { document, range } = await this.getRangeByPosition(path, position) if (!document || !range) { return undefined } - let result: lsp.Location[] = [] + let locations: lsp.Location[] = [] // First, we try to find the reference result attached to the range or one // of the result sets to which the range is attached. - const resultData = findResult(document.resultSets, document.referenceResults, range, 'referenceResult') - if (resultData) { + if (range.referenceResultId) { // We have references in this database. - result = result.concat(await this.getLocations(path, document, resultData)) + locations = locations.concat( + await this.convertRangesToLspLocations( + path, + document, + await this.getResultById(range.referenceResultId) + ) + ) } // Next, we do a moniker search in two stages, described below. We process each // moniker sequentially in order of priority for each stage, where import monikers, // if any exist, will be processed first. - const monikers = findMonikers(document.resultSets, document.monikers, range) + const monikers = sortMonikers( + Array.from(range.monikerIds).map(id => mustGet(document.monikers, id, 'monikers')) + ) - // First, we search the Refs table of our own database - this search is necessary, - // but may be unintuitive, but remember that a 'Find References' operation on a - // reference should also return references to the definition - these are not - // necessarily fully linked in the LSIF data. + // Next, we search the references table of our own database - this search is necessary, + // but may be un-intuitive, but remember that a 'Find References' operation on a reference + // should also return references to the definition. These are not necessarily fully linked + // in the LSIF data. for (const moniker of monikers) { - result = result.concat(await Database.monikerResults(this, RefModel, moniker, path => path)) + locations = locations.concat(await Database.monikerResults(this, ReferenceModel, moniker, path => path)) } - // Second, we perform an xrepo search for uses of each nonlocal moniker. We stop - // processing after the first moniker for which we received results. As we process - // monikers in an order that considers moniker schemes, the first one to get results - // should be the most desirable. + // Next, we perform an xrepo search for uses of each nonlocal moniker. We stop processing after + // the first moniker for which we received results. As we process monikers in an order that + // considers moniker schemes, the first one to get results should be the most desirable. for (const moniker of monikers) { if (moniker.kind === 'import') { - if (moniker.packageInformation) { - const packageInformation = document.packageInformation.get(moniker.packageInformation) - if (packageInformation) { - const packageEntity = await this.xrepoDatabase.getPackage( - moniker.scheme, - packageInformation.name, - packageInformation.version - ) - - if (packageEntity) { - const db = new Database( - this.storageRoot, - this.xrepoDatabase, - this.connectionCache, - this.documentCache, - packageEntity.repository, - packageEntity.commit, - makeFilename(this.storageRoot, packageEntity.repository, packageEntity.commit) - ) - - const pathTransformer = (path: string): string => makeRemoteUri(packageEntity, path) - result = result.concat( - await Database.monikerResults(db, RefModel, moniker, pathTransformer) - ) - } - } - } - } - - if (moniker.kind === 'local') { - continue + // Get locations in the defining package + locations = locations.concat(await this.remoteMoniker(document, moniker)) } + // Get locations in all packages const remoteResults = await this.remoteReferences(document, moniker) if (remoteResults) { - // TODO - see why we need to deduplicate - return uniqWith(result.concat(remoteResults), isEqual) + // TODO - determine source of duplication (and below) + return uniqWith(locations.concat(remoteResults), isEqual) } } - // TODO - see why we need to deduplicate - return uniqWith(result, isEqual) + return uniqWith(locations, isEqual) } /** @@ -192,7 +200,7 @@ export class Database { * @param position The current hover position. */ public async hover(path: string, position: lsp.Position): Promise { - const { document, range } = await this.findRange(path, position) + const { document, range } = await this.getRangeByPosition(path, position) if (!document || !range) { return null } @@ -201,91 +209,96 @@ export class Database { // which the range is attached. There is no fall-back search via monikers for this // operation. - const contents = findResult(document.resultSets, document.hovers, range, 'hoverResult') - if (!contents) { - return null + if (range.hoverResultId) { + return { contents: mustGet(document.hoverResults, range.hoverResultId, 'hoverResult') } } - return { contents } + return null } // // Helper Functions /** - * Query the defs or refs table of `db` for items that match the given moniker. Convert - * each result into an LSP location. The `pathTransformer` function is invoked on each - * result item to modify the resulting locations. - * - * @param db The target database. - * @param model The constructor for the model type. - * @param moniker The target moniker. - * @param pathTransformer The function used to alter location paths. - */ - private static async monikerResults( - db: Database, - model: typeof DefModel | typeof RefModel, - moniker: MonikerData, - pathTransformer: (path: string) => string - ): Promise { - const results = await db.withConnection(connection => - connection.getRepository(model).find({ - where: { - scheme: moniker.scheme, - identifier: moniker.identifier, - }, - }) - ) - - return results.map(result => lsp.Location.create(pathTransformer(result.documentPath), makeRange(result))) - } - - /** - * Convert a set of range results (from a definition or reference query) into a set - * of LSP ranges. Each range result holds the range Id as well as the document path. - * For document paths matching the loaded document, find the range data locally. For - * all other paths, find the document in this database and find the range in that + * Convert a set of range-document pairs (from a definition or reference query) into + * a set of LSP ranges. Each pair holds the range identifier as well as the document + * path. For document paths matching the loaded document, find the range data locally. + * For all other paths, find the document in this database and find the range in that * document. * * @param path The path of the document for this query. * @param document The document object for this query. - * @param resultData A lsit of range ids and the document they belong to. + * @param resultData A list of range ids and the document they belong to. */ - private async getLocations( + private async convertRangesToLspLocations( path: string, document: DocumentData, - resultData: { documentPath: string; id: Id }[] + resultData: DocumentPathRangeId[] ): Promise { // Group by document path so we only have to load each document once - const groups = groupBy(resultData, v => v.documentPath) + const groupedResults = new DefaultMap>(() => new Set()) + + for (const { documentPath, rangeId } of resultData) { + groupedResults.getOrDefault(documentPath).add(rangeId) + } let results: lsp.Location[] = [] - for (const documentPath of Object.keys(groups)) { - // Get all ids for the document path - const ids = groups[documentPath].map(v => v.id) + for (const [documentPath, rangeIdSet] of groupedResults) { + // Sets are not mappable, use array + const rangeIds = Array.from(rangeIdSet) if (documentPath === path) { // If the document path is this document, convert the locations directly - results = results.concat(asLocations(document.ranges, document.orderedRanges, path, ids)) + results = results.concat(mapRangesToLocations(document.ranges, document.orderedRanges, path, rangeIds)) continue } // Otherwise, we need to get the correct document - const sibling = await this.findDocument(documentPath) + const sibling = await this.getDocumentByPath(documentPath) if (!sibling) { continue } // Then finally convert the locations in the sibling document - results = results.concat(asLocations(sibling.ranges, sibling.orderedRanges, documentPath, ids)) + results = results.concat( + mapRangesToLocations(sibling.ranges, sibling.orderedRanges, documentPath, rangeIds) + ) } return results } + /** + * Query the definitions or references table of `db` for items that match the given moniker. + * Convert each result into an LSP location. The `pathTransformer` function is invoked on each + * result item to modify the resulting locations. + * + * @param db The target database. + * @param model The constructor for the model type. + * @param moniker The target moniker. + * @param pathTransformer The function used to alter location paths. + */ + private static async monikerResults( + db: Database, + model: typeof DefinitionModel | typeof ReferenceModel, + moniker: MonikerData, + pathTransformer: (path: string) => string + ): Promise { + const results = await db.withConnection(connection => + connection.getRepository(model).find({ + where: { + scheme: moniker.scheme, + identifier: moniker.identifier, + }, + }) + ) + + return results.map(result => lsp.Location.create(pathTransformer(result.documentPath), createRange(result))) + } + /** * Find the definition of the target moniker outside of the current database. If the - * moniker has attached package information, then the xrepo database is queried for + * moniker has attached package information, then the correlation database is queried for * the target package. That database is opened, and its def table is queried for the * target moniker. * @@ -293,11 +306,11 @@ export class Database { * @param moniker The target moniker. */ private async remoteDefinitions(document: DocumentData, moniker: MonikerData): Promise { - if (!moniker.packageInformation) { + if (!moniker.packageInformationId) { return null } - const packageInformation = document.packageInformation.get(moniker.packageInformation) + const packageInformation = document.packageInformation.get(moniker.packageInformationId) if (!packageInformation) { return null } @@ -312,35 +325,67 @@ export class Database { return null } - const db = new Database( - this.storageRoot, - this.xrepoDatabase, - this.connectionCache, - this.documentCache, + const db = this.createNewDatabase( packageEntity.repository, packageEntity.commit, makeFilename(this.storageRoot, packageEntity.repository, packageEntity.commit) ) - const pathTransformer = (path: string): string => makeRemoteUri(packageEntity, path) - return await Database.monikerResults(db, DefModel, moniker, pathTransformer) + const pathTransformer = (path: string): string => createRemoteUri(packageEntity, path) + return await Database.monikerResults(db, DefinitionModel, moniker, pathTransformer) + } + + /** + * Find the references of of the target moniker inside the database where that moniker is defined. + * + * @param document The document containing the definition. + * @param moniker The target moniker. + */ + private async remoteMoniker(document: DocumentData, moniker: MonikerData): Promise { + if (!moniker.packageInformationId) { + return [] + } + + const packageInformation = document.packageInformation.get(moniker.packageInformationId) + if (!packageInformation) { + return [] + } + + const packageEntity = await this.xrepoDatabase.getPackage( + moniker.scheme, + packageInformation.name, + packageInformation.version + ) + + if (!packageEntity) { + return [] + } + + const db = this.createNewDatabase( + packageEntity.repository, + packageEntity.commit, + makeFilename(this.storageRoot, packageEntity.repository, packageEntity.commit) + ) + + const pathTransformer = (path: string): string => createRemoteUri(packageEntity, path) + return await Database.monikerResults(db, ReferenceModel, moniker, pathTransformer) } /** * Find the references of the target moniker outside of the current database. If the moniker - * has attached package information, then the xrepo database is queried for the packages that + * has attached package information, then the correlation database is queried for the packages that * require this particular moniker identifier. These databases are opened, and their ref tables * are queried for the target moniker. * * @param document The document containing the definition. - * @param moniker THe target moniker. + * @param moniker The target moniker. */ private async remoteReferences(document: DocumentData, moniker: MonikerData): Promise { - if (!moniker.packageInformation) { + if (!moniker.packageInformationId) { return [] } - const packageInformation = document.packageInformation.get(moniker.packageInformation) + const packageInformation = document.packageInformation.get(moniker.packageInformationId) if (!packageInformation) { return [] } @@ -354,22 +399,20 @@ export class Database { let allReferences: lsp.Location[] = [] for (const reference of references) { + // Skip the remote reference that show up for ourselves - we've already gathered + // these in the previous step of the references query. if (reference.repository === this.repository && reference.commit === this.commit) { continue } - const db = new Database( - this.storageRoot, - this.xrepoDatabase, - this.connectionCache, - this.documentCache, + const db = this.createNewDatabase( reference.repository, reference.commit, makeFilename(this.storageRoot, reference.repository, reference.commit) ) - const pathTransformer = (path: string): string => makeRemoteUri(reference, path) - const references = await Database.monikerResults(db, RefModel, moniker, pathTransformer) + const pathTransformer = (path: string): string => createRemoteUri(reference, path) + const references = await Database.monikerResults(db, ReferenceModel, moniker, pathTransformer) allReferences = allReferences.concat(references) } @@ -382,17 +425,20 @@ export class Database { * * @param path The path of the document. */ - private async findDocument(path: string): Promise { - const factory = async (): Promise => { + private async getDocumentByPath(path: string): Promise { + const factory = async (): Promise> => { const document = await this.withConnection(connection => connection.getRepository(DocumentModel).findOneOrFail(path) ) - return await decodeJSON(document.value) + return { + size: document.data.length, + data: await decodeJSON(document.data), + } } - return await this.documentCache.withDocument(`${this.databasePath}::${path}`, factory, document => - Promise.resolve(document) + return await this.documentCache.withValue(`${this.databasePath}::${path}`, factory, document => + Promise.resolve(document.data) ) } @@ -404,16 +450,16 @@ export class Database { * @param path The path of the document. * @param position The user's hover position. */ - private async findRange( + private async getRangeByPosition( path: string, position: lsp.Position ): Promise<{ document: DocumentData | undefined; range: RangeData | undefined }> { - const document = await this.findDocument(path) + const document = await this.getDocumentByPath(path) if (!document) { return { document: undefined, range: undefined } } - const range = findRange(document.orderedRanges, position) + const range = getRangeByPosition(document.orderedRanges, position) if (!range) { return { document: undefined, range: undefined } } @@ -421,6 +467,85 @@ export class Database { return { document, range } } + /** + * Convert a list of ranges with document ids into a list of ranges with + * document paths by looking into the result chunks table and parsing the + * data associated with the given identifier. + * + * @param id The identifier of the definition or reference result. + */ + private async getResultById(id: DefinitionReferenceResultId): Promise { + const { documentPaths, documentIdRangeIds } = await this.getResultChunkByResultId(id) + const ranges = mustGet(documentIdRangeIds, id, 'documentIdRangeId') + + return ranges.map(range => ({ + documentPath: mustGet(documentPaths, range.documentId, 'documentPath'), + rangeId: range.rangeId, + })) + } + + /** + * Return a parsed result chunk that contains the given identifier. + * + * @param id An identifier contained in the result chunk. + */ + private async getResultChunkByResultId(id: DefinitionReferenceResultId): Promise { + // Find the result chunk index this id belongs to + const index = hashKey(id, await this.getNumResultChunks()) + + const factory = async (): Promise> => { + const resultChunk = await this.withConnection(connection => + connection.getRepository(ResultChunkModel).findOneOrFail(index) + ) + + return { + size: resultChunk.data.length, + data: await decodeJSON(resultChunk.data), + } + } + + return await this.resultChunkCache.withValue(`${this.databasePath}::${index}`, factory, resultChunk => + Promise.resolve(resultChunk.data) + ) + } + + /** + * Get the `numResultChunks` value from this database's metadata row. + */ + private async getNumResultChunks(): Promise { + const numResultChunks = Database.numResultChunks.get(this.databasePath) + if (numResultChunks !== undefined) { + return numResultChunks + } + + // Not in the shared map, need to query it + const meta = await this.withConnection(connection => connection.getRepository(MetaModel).findOneOrFail(1)) + Database.numResultChunks.set(this.databasePath, meta.numResultChunks) + return meta.numResultChunks + } + + /** + * Create a new database with the same configuration but a different repository, + * commit, and databasePath. + * + * + * @param repository The repository for which this database answers queries. + * @param commit The commit for which this database answers queries. + * @param databasePath The path to the database file. + */ + private createNewDatabase(repository: string, commit: string, databasePath: string): Database { + return new Database( + this.storageRoot, + this.xrepoDatabase, + this.connectionCache, + this.documentCache, + this.resultChunkCache, + repository, + commit, + databasePath + ) + } + /** * Invoke `callback` with a SQLite connection object obtained from the * cache or created on cache miss. @@ -430,7 +555,7 @@ export class Database { private async withConnection(callback: (connection: Connection) => Promise): Promise { return await this.connectionCache.withConnection( this.databasePath, - [DefModel, DocumentModel, MetaModel, RefModel], + [DefinitionModel, DocumentModel, MetaModel, ReferenceModel, ResultChunkModel], callback ) } @@ -447,7 +572,7 @@ export class Database { * @param orderedRanges The ranges of the document, ordered by startLine/startCharacter. * @param position The user's hover position. */ -export function findRange(orderedRanges: RangeData[], position: lsp.Position): RangeData | undefined { +export function getRangeByPosition(orderedRanges: OrderedRanges, position: lsp.Position): RangeData | undefined { let lo = 0 let hi = orderedRanges.length - 1 @@ -471,80 +596,31 @@ export function findRange(orderedRanges: RangeData[], position: lsp.Position): R } /** - * Return the closest defined `property` related to the given range - * or result set. This method will walk the `next` chains of the item - * to find the property on an attached result set if it's not set - * on the range itself. Note that the `property` on the range and - * result set objects are simply identifiers, so the real value must - * be looked up in a secondary data structure `map`. + * Compare a position against a range. Returns 0 if the position occurs + * within the range (inclusive bounds), -1 if the position occurs after + * it, and +1 if the position occurs before it. * - * @param resultSets The map of results sets of the document. - * @param map The map from which to return the property value. - * @param data The range or result set object. - * @param property The target property. + * @param range The range. + * @param position The position. */ -export function findResult( - resultSets: Map, - map: Map, - data: RangeData | ResultSetData, - property: 'definitionResult' | 'referenceResult' | 'hoverResult' -): T | undefined { - for (const current of walkChain(resultSets, data)) { - const value = current[property] - if (value) { - return map.get(value) - } +export function comparePosition(range: RangeData, position: lsp.Position): number { + if (position.line < range.startLine) { + return +1 } - return undefined -} - -/** - * Retrieve all monikers attached to a range or result set. - * - * @param resultSets The map of results sets of the document. - * @param monikers The map of monikers of the document. - * @param data The range or result set object. - */ -export function findMonikers( - resultSets: Map, - monikers: Map, - data: RangeData | ResultSetData -): MonikerData[] { - const monikerSet: MonikerData[] = [] - for (const current of walkChain(resultSets, data)) { - for (const id of current.monikers) { - const moniker = monikers.get(id) - if (moniker) { - monikerSet.push(moniker) - } - } + if (position.line > range.endLine) { + return -1 } - return sortMonikers(monikerSet) -} - -/** - * Return an iterable of the range and result set items that are attached - * to the given initial data. The initial data is yielded immediately. - * - * @param resultSets The map of results sets of the document. - * @param data The range or result set object. - */ -export function* walkChain( - resultSets: Map, - data: RangeData | ResultSetData -): Iterable { - let current: RangeData | ResultSetData | undefined = data - - while (current) { - yield current - if (!current.next) { - return - } + if (position.line === range.startLine && position.character < range.startCharacter) { + return +1 + } - current = resultSets.get(current.next) + if (position.line === range.endLine && position.character > range.endCharacter) { + return -1 } + + return 0 } /** @@ -572,39 +648,6 @@ export function sortMonikers(monikers: MonikerData[]): MonikerData[] { return monikers } -/** - * Convert the given range identifiers into LSP location objects. - * - * @param ranges The map of ranges of the document (from identifier to the range's index in `orderedRanges`). - * @param orderedRanges The ordered ranges of the document. - * @param uri The location URI. - * @param ids The set of range identifiers for each resulting location. - */ -export function asLocations( - ranges: Map, - orderedRanges: RangeData[], - uri: string, - ids: Id[] -): lsp.Location[] { - const locations = [] - for (const id of ids) { - const rangeIndex = ranges.get(id) - if (rangeIndex === undefined) { - continue - } - - const range = orderedRanges[rangeIndex] - locations.push( - lsp.Location.create(uri, { - start: { line: range.startLine, character: range.startCharacter }, - end: { line: range.endLine, character: range.endCharacter }, - }) - ) - } - - return locations -} - /** * Construct a URI that can be used by the frontend to switch to another * directory. @@ -612,7 +655,7 @@ export function asLocations( * @param pkg The target package. * @param path The path relative to the project root. */ -export function makeRemoteUri(pkg: PackageModel, path: string): string { +export function createRemoteUri(pkg: PackageModel, path: string): string { const url = new URL(`git://${pkg.repository}`) url.search = pkg.commit url.hash = path @@ -624,7 +667,7 @@ export function makeRemoteUri(pkg: PackageModel, path: string): string { * * @param result The start/end line/character of the range. */ -function makeRange(result: { +function createRange(result: { startLine: number startCharacter: number endLine: number @@ -634,29 +677,18 @@ function makeRange(result: { } /** - * Compare a position against a range. Returns 0 if the position occurs - * within the range (inclusive bounds), -1 if the position occurs after - * it, and +1 if the position occurs before it. + * Convert the given range identifiers into LSP location objects. * - * @param range The range. - * @param position The position. + * @param ranges The map of ranges of the document (from identifier to the range's index in `orderedRanges`). + * @param orderedRanges The ordered ranges of the document. + * @param uri The location URI. + * @param ids The set of range identifiers for each resulting location. */ -export function comparePosition(range: FlattenedRange, position: lsp.Position): number { - if (position.line < range.startLine) { - return +1 - } - - if (position.line > range.endLine) { - return -1 - } - - if (position.line === range.startLine && position.character < range.startCharacter) { - return +1 - } - - if (position.line === range.endLine && position.character > range.endCharacter) { - return -1 - } - - return 0 +export function mapRangesToLocations( + ranges: Map>, + orderedRanges: OrderedRanges, + uri: string, + ids: RangeId[] +): lsp.Location[] { + return ids.map(id => lsp.Location.create(uri, createRange(orderedRanges[mustGet(ranges, id, 'range')]))) } diff --git a/lsif/src/default-map.test.ts b/lsif/src/default-map.test.ts new file mode 100644 index 0000000000000..9888097effb87 --- /dev/null +++ b/lsif/src/default-map.test.ts @@ -0,0 +1,35 @@ +import { DefaultMap } from './default-map' + +describe('DefaultMap', () => { + it('should leave get unchanged', () => { + const map = new DefaultMap(() => 'bar') + expect(map.get('foo')).toBeUndefined() + }) + + it('should create values on access', () => { + const map = new DefaultMap(() => 'bar') + expect(map.getOrDefault('foo')).toEqual('bar') + }) + + it('should respect explicit set', () => { + const map = new DefaultMap(() => 'bar') + map.set('foo', 'baz') + expect(map.getOrDefault('foo')).toEqual('baz') + }) + + it('should support nested gets', () => { + const map = new DefaultMap>( + () => new DefaultMap(() => []) + ) + + map.getOrDefault('foo') + .getOrDefault('bar') + .push('baz') + + map.getOrDefault('foo') + .getOrDefault('bar') + .push('bonk') + + expect(map.get('foo')!.get('bar')).toEqual(['baz', 'bonk']) + }) +}) diff --git a/lsif/src/default-map.ts b/lsif/src/default-map.ts new file mode 100644 index 0000000000000..b9811852b1ac6 --- /dev/null +++ b/lsif/src/default-map.ts @@ -0,0 +1,32 @@ +/** + * An extension of `Map` that defines `getOrDefault` for a type of stunted + * autovivification. This saves a bunch of code that needs to check if a + * nested type within a map is undefined on first access. + */ +export class DefaultMap extends Map { + /** + * Returns a new `DefaultMap`. + * + * @param defaultFactory The factory invoked when an undefined value is accessed. + */ + constructor(private defaultFactory: () => V) { + super() + } + + /** + * Get a key from the map. If the key does not exist, the default factory produces + * a value and inserted into the map before being returned. + * + * @param key The key to retrieve. + */ + public getOrDefault(key: K): V { + let value = super.get(key) + if (value !== undefined) { + return value + } + + value = this.defaultFactory() + this.set(key, value) + return value + } +} diff --git a/lsif/src/encoding.test.ts b/lsif/src/encoding.test.ts index 2f2f6f126fd9f..cec65c267fdc0 100644 --- a/lsif/src/encoding.test.ts +++ b/lsif/src/encoding.test.ts @@ -25,4 +25,18 @@ describe('encodeJSON', () => { const decoded = await decodeJSON(encoded) expect(decoded).toEqual(value) }) + + it('should preserve sets', async () => { + const s = new Set([1, 2, 3, 4, 5]) + + const value = { + foo: [1, 2, 3], + bar: ['abc', 'xyz'], + baz: s, + } + + const encoded = await encodeJSON(value) + const decoded = await decodeJSON(encoded) + expect(decoded).toEqual(value) + }) }) diff --git a/lsif/src/encoding.ts b/lsif/src/encoding.ts index 3732bfc3f8f5a..7aa45ba4a7b1a 100644 --- a/lsif/src/encoding.ts +++ b/lsif/src/encoding.ts @@ -67,7 +67,7 @@ export function encodeJSON(value: T): Promise { * @param value The value to decode. */ export async function decodeJSON(value: string): Promise { - return parseJSON(await unb64gzip(value)) + return await parseJSON(await unb64gzip(value)) } /** @@ -90,9 +90,9 @@ async function unb64gzip(value: string): Promise { /** * Return the JSON representation of `value`. This has special logic to - * convert an ES6 map structure into a JSON-representable value. This - * method, along with `parseJSON` should be used over the raw methods if - * the payload may contain maps. + * convert ES6 map and set structures into a JSON-representable value. + * This method, along with `parseJSON` should be used over the raw methods + * if the payload may contain maps. * * @param value The value to jsonify. */ @@ -105,20 +105,33 @@ function dumpJSON(value: T): string { } } + if (value instanceof Set) { + return { + type: 'set', + value: [...value], + } + } + return value }) } /** * Parse the JSON representation of `value`. This has special logic to - * unmarshal map objects as encoded by `dumpJSON`. + * unmarshal map and set objects as encoded by `dumpJSON`. * * @param value The value to unmarshal. */ function parseJSON(value: string): T { return JSON.parse(value, (_, value) => { - if (typeof value === 'object' && value !== null && value.type === 'map') { - return new Map(value.value) + if (typeof value === 'object' && value !== null) { + if (value.type === 'map') { + return new Map(value.value) + } + + if (value.type === 'set') { + return new Set(value.value) + } } return value diff --git a/lsif/src/entities.ts b/lsif/src/entities.ts deleted file mode 100644 index 28be550040b68..0000000000000 --- a/lsif/src/entities.ts +++ /dev/null @@ -1,178 +0,0 @@ -import { Id, MonikerKind } from 'lsif-protocol' - -/** - * Data for a single document within an LSIF dump. The data here can answer definitions, - * references, and hover queries if the results are all contained within the same document. - */ -export interface DocumentData { - /** - * A mapping from range identifiers to the index of the range in the - * `orderedRanges` array. We keep a mapping so we can look range data by - * identifier quickly, and keep them sorted so we can find the range that - * encloses a position quickly. - */ - ranges: Map - - /** - * An array of range data sorted by startLine, then by startCharacter. This - * allows us to perform binary search to find a particular location subsumed - * by a range in the document. - */ - orderedRanges: RangeData[] - - /** - * A map of result set identifiers to result set data. Result sets are like - * ranges, but do not have extents in the document. - */ - resultSets: Map - - /** - * A map of definition result identifiers to a list of ids that compose the - * definition result. Each id is paired with a document path, as result sets - * can be shared between documents (necessitating cross-document queries). - */ - definitionResults: Map - - /** - ** A map of reference result identifiers to a list of ids that compose the - * reference result. Each id is paired with a document path, as result sets - * can be shared between documents (necessitating cross-document queries). - */ - referenceResults: Map - - /** - * A map of hover identifiers to hover results normalized as a single string. - */ - hovers: Map - - /** - * A map of moniker identifiers to moniker data. - */ - monikers: Map - - /** - * A map of package information identifiers to package information data. - */ - packageInformation: Map -} - -/** - * The set of fields shared by a range or a result set vertex. It contains - * the same relevant edge data, which can be subsequently queried in the - * containing document. - */ -interface ResultObjectData { - /** - * The set of moniker identifiers directly attached to this range or result - * set. The moniker object can be queried by its identifier within the - * containing document. - */ - monikers: Id[] - - /** - * The identifier of the hover result attached to this range or result set, - * if one exists. The hover result object can be queried by its identifier - * within the containing document. - */ - hoverResult?: Id - - /** - * The identifier of the definition result attached to this range or result - * set, if one exists. The definition result object can be queried by its - * identifier within the containing document. - */ - definitionResult?: Id - - /** - * The identifier of the reference result attached to this range or result - * set, if one exists. The reference result object can be queried by its - * identifier within the containing document. - */ - referenceResult?: Id - - /** - * The identifier of a result set attached to this range or result set, if one - * exists. The result set object can be queried by its identifier within the - * containing document. - */ - next?: Id -} - -/** - * An internal representation of a range vertex from an LSIF dump. It contains the same - * relevant edge data, which can be subsequently queried in the containing document. - */ -export interface RangeData extends ResultObjectData, FlattenedRange {} - -/** - * An internal representation of a result set vertex from an LSIF dump. It contains the - * same relevant edge data, which can be subsequently queried in the containing document. - */ -export interface ResultSetData extends ResultObjectData {} - -/** - * Data about a moniker attached to a range or a result set. - */ -export interface MonikerData { - /** - * The kind of moniker (e.g. local, import, export). - */ - kind: MonikerKind - - /** - * The name of the package type (e.g. npm, pip). - */ - scheme: string - - /** - * The unique identifier of the moniker. - */ - identifier: string - - /** - * The identifier of the package information to this moniker, if one exists. - * The package information object can be queried by its identifier within the - * containing document. - */ - packageInformation?: Id -} - -/** - * Additional data about a non-local moniker. - */ -export interface PackageInformationData { - /** - * The name of the package the moniker describes. - */ - name: string - - /** - * The version of the package the moniker describes. - */ - version: string -} - -/** - * An LSP range that has been squashed into a single layer. - */ -export interface FlattenedRange { - /** - * The line on which the range starts (0-indexed, inclusive). - */ - startLine: number - - /** - * The line on which the range ends (0-indexed, inclusive). - */ - startCharacter: number - - /** - * The character on which the range starts (0-indexed, inclusive). - */ - endLine: number - - /** - * The character on which the range ends (0-indexed, inclusive). - */ - endCharacter: number -} diff --git a/lsif/src/importer.test.ts b/lsif/src/importer.test.ts index a4efd20e9b4ad..a5ff99899a08d 100644 --- a/lsif/src/importer.test.ts +++ b/lsif/src/importer.test.ts @@ -1,64 +1,16 @@ -import { reachableMonikers, normalizeHover, lookupRanges } from './importer' -import { Id } from 'lsif-protocol' - -describe('lookupRanges', () => { - it('should correlate ids with correct index', () => { - const ranges = new Map() - ranges.set(1, 0) - ranges.set(2, 2) - ranges.set(3, 1) - - const orderedRanges = [ - { startLine: 1, startCharacter: 1, endLine: 1, endCharacter: 2, monikers: [] }, - { startLine: 2, startCharacter: 1, endLine: 2, endCharacter: 2, monikers: [] }, - { startLine: 3, startCharacter: 1, endLine: 3, endCharacter: 2, monikers: [] }, - ] - - const document = { - id: '', - path: '', - contains: [], - definitions: [], - references: [], - ranges, - orderedRanges, - resultSets: new Map(), - definitionResults: new Map(), - referenceResults: new Map(), - hovers: new Map(), - monikers: new Map(), - packageInformation: new Map(), - } - - expect(lookupRanges(document, [1, 2, 3, 4])).toEqual([orderedRanges[0], orderedRanges[2], orderedRanges[1]]) - }) -}) - -describe('normalizeHover', () => { - it('should handle all lsp.Hover types', () => { - expect(normalizeHover({ contents: 'foo' })).toEqual('foo') - expect(normalizeHover({ contents: { language: 'typescript', value: 'bar' } })).toEqual( - '```typescript\nbar\n```' - ) - expect(normalizeHover({ contents: { kind: 'markdown', value: 'baz' } })).toEqual('baz') - expect( - normalizeHover({ - contents: ['foo', { language: 'typescript', value: 'bar' }], - }) - ).toEqual('foo\n\n---\n\n```typescript\nbar\n```') - }) -}) +import { reachableMonikers } from './importer' +import { MonikerId } from './models.database' describe('reachableMonikers', () => { it('should traverse moniker relation graph', () => { - const monikerSets = new Map>() - monikerSets.set(1, new Set([2])) - monikerSets.set(2, new Set([1, 4])) - monikerSets.set(3, new Set([4])) - monikerSets.set(4, new Set([2, 3])) - monikerSets.set(5, new Set([6])) - monikerSets.set(6, new Set([5])) + const monikerSets = new Map>() + monikerSets.set(1, new Set([2])) + monikerSets.set(2, new Set([1, 4])) + monikerSets.set(3, new Set([4])) + monikerSets.set(4, new Set([2, 3])) + monikerSets.set(5, new Set([6])) + monikerSets.set(6, new Set([5])) - expect(reachableMonikers(monikerSets, 1)).toEqual(new Set([1, 2, 3, 4])) + expect(reachableMonikers(monikerSets, 1)).toEqual(new Set([1, 2, 3, 4])) }) }) diff --git a/lsif/src/importer.ts b/lsif/src/importer.ts index 2582fe5a8b3d1..b74ed4a30a15a 100644 --- a/lsif/src/importer.ts +++ b/lsif/src/importer.ts @@ -1,39 +1,33 @@ +import { mustGet, assertId, hashKey, readEnvInt } from './util' +import { Correlator, ResultSetData, ResultSetId } from './correlator' +import { DefaultMap } from './default-map' +import { + DefinitionModel, + DocumentData, + DocumentModel, + MetaModel, + MonikerData, + PackageInformationData, + RangeData, + ReferenceModel, + ResultChunkModel, + DocumentIdRangeId, + DefinitionResultId, + MonikerId, + DefinitionReferenceResultId, + DocumentId, + ReferenceResultId, + PackageInformationId, + HoverResultId, + Ix, + OrderedRanges, +} from './models.database' +import { Edge, MonikerKind, Vertex, RangeId } from 'lsif-protocol' +import { encodeJSON } from './encoding' import { EntityManager } from 'typeorm' import { isEqual, uniqWith } from 'lodash' -import { DefModel, DocumentModel, MetaModel, RefModel } from './models' -import RelateUrl from 'relateurl' -import { encodeJSON } from './encoding' -import { TableInserter } from './inserter' -import { MonikerData, RangeData, ResultSetData, PackageInformationData, DocumentData, FlattenedRange } from './entities' -import { - Id, - VertexLabels, - EdgeLabels, - Vertex, - Edge, - MonikerKind, - ItemEdgeProperties, - DocumentEvent, - Event, - moniker, - next, - nextMoniker, - textDocument_definition, - textDocument_hover, - Range, - textDocument_references, - packageInformation, - PackageInformation, - item, - contains, - EventKind, - EventScope, - MetaData, - ElementTypes, - Moniker, -} from 'lsif-protocol' import { Package, SymbolReferences } from './xrepo' -import { Hover, MarkupContent } from 'vscode-languageserver-types' +import { TableInserter } from './inserter' /** * The internal version of our SQLite databases. We need to keep this in case @@ -44,946 +38,504 @@ import { Hover, MarkupContent } from 'vscode-languageserver-types' const INTERNAL_LSIF_VERSION = '0.1.0' /** - * A wrapper around `DocumentData` with additional context required during the - * importing of an LSIF dump. + * The target results per result chunk. This is used to determine the number of chunks + * created during conversion, but does not guarantee that the distribution of hash keys + * will wbe even. In practice, chunks are fairly evenly filled. */ -export interface DecoratedDocumentData extends DocumentData { - /** - * The identifier of the document. - */ - id: Id - - /** - * The root-relative path of the document. - */ - path: string - - /** - * The running set of identifiers that have a contains edge to this document - * in the LSIF dump. - */ - contains: Id[] - - /** - * A field that carries the data of definitionResult edges attached within - * the document if there is a non-local moniker attached to it; otherwise, - * the definition result data would be stored in field `definitionResults` - * of `DocumentData`. - */ - definitions: { ids: Id[]; moniker: MonikerData }[] - - /** - * A field that carries the data of referenceResult edges attached within - * the document if there is a non-local moniker attached to it; otherwise, - * the reference result data would be stored infield `referenceResults` - * of `DocumentData`. - */ - references: { ids: Id[]; moniker: MonikerData }[] -} +const RESULTS_PER_RESULT_CHUNK = readEnvInt('RESULTS_PER_RESULT_CHUNK', 500) /** - * Common state around the conversion of a single LSIF dump upload. This class - * receives the parsed vertex or edge, line by line, from the caller, and adds it - * into a new database file on disk. Once finalized, the database is ready for use - * and relevant cross-repository metadata is returned to the caller, which - * is used to populate the xrepo database. - * - * This class should not be used directly - use the `importLsif` function instead. + * The maximum number of result chunks that will be created during conversion. */ -class LsifImporter { - // Bulk database inserters - private metaInserter: TableInserter MetaModel> - private documentInserter: TableInserter DocumentModel> - private defInserter: TableInserter DefModel> - private refInserter: TableInserter RefModel> - - // Vertex data - private definitionData: Map> = new Map() - private documentUris: Map = new Map() - private hoverData: Map = new Map() - private monikerData: Map = new Map() - private packageInformationData: Map = new Map() - private rangeData: Map = new Map() - private referenceData: Map> = new Map() - private resultSetData: Map = new Map() - - /** - * The root of all document URIs. This is extracted from the metadata vertex at - * the beginning of processing. - */ - private projectRoot?: URL - - /** - * A map of decorated `DocumentData` objects that are created on document begin events - * and are inserted into the databse on document end events. - */ - private documentData = new Map() - - /** - * A mapping for the relation from moniker to the set of monikers that they are related - * to via nextMoniker edges. This relation is symmetric (if `a` is in `MonikerSets[b]`, - * then `b` is in `monikerSets[a]`). - */ - private monikerSets = new Map>() - - /** - * The set of exported moniker identifiers that have package information attached. - */ - private importedMonikers = new Set() - - /** - * The set of exported moniker identifiers that have package information attached. - */ - private exportedMonikers = new Set() - - /** - * Create a new `LsifImporter` with the given entity manager. - * - * @param entityManager A transactional SQLite entity manager. - */ - constructor(private entityManager: EntityManager) { - // Determine the max batch size of each model type. We cannot perform an - // insert operation with more than 999 placeholder variables, so we need - // to flush our batch before we reach that amount. The batch size for each - // model is calculated based on the number of fields inserted. If fields - // are added to the models, these numbers will also need to change. - - this.metaInserter = new TableInserter(this.entityManager, MetaModel, Math.floor(999 / 3)) - this.documentInserter = new TableInserter(this.entityManager, DocumentModel, Math.floor(999 / 2)) - this.defInserter = new TableInserter(this.entityManager, DefModel, Math.floor(999 / 8)) - this.refInserter = new TableInserter(this.entityManager, RefModel, Math.floor(999 / 8)) - } - - /** - * Process a single vertex or edge. - * - * @param element A vertex or edge element from the LSIF dump. - */ - public async insert(element: Vertex | Edge): Promise { - if (element.type === ElementTypes.vertex) { - switch (element.label) { - case VertexLabels.metaData: - await this.handleMetaData(element) - break - case VertexLabels.event: - await this.handleEvent(element) - break - - // The remaining vertex handlers stash data into an appropriate map. This data - // may be retrieved when an edge that references it is seen, or when a document - // is finalized. - - case VertexLabels.definitionResult: - this.definitionData.set(element.id, new Map()) - break - case VertexLabels.document: - this.documentUris.set(element.id, element.uri) - break - case VertexLabels.hoverResult: - this.hoverData.set(element.id, normalizeHover(element.result)) - break - case VertexLabels.moniker: - this.monikerData.set(element.id, convertMoniker(element)) - break - case VertexLabels.packageInformation: - this.packageInformationData.set(element.id, convertPackageInformation(element)) - break - case VertexLabels.range: - this.rangeData.set(element.id, convertRange(element)) - break - case VertexLabels.referenceResult: - this.referenceData.set(element.id, new Map()) - break - case VertexLabels.resultSet: - this.resultSetData.set(element.id, { monikers: [] }) - break - } - } - - if (element.type === ElementTypes.edge) { - switch (element.label) { - case EdgeLabels.contains: - this.handleContains(element) - break - case EdgeLabels.item: - this.handleItemEdge(element) - break - case EdgeLabels.moniker: - this.handleMonikerEdge(element) - break - case EdgeLabels.next: - this.handleNextEdge(element) - break - case EdgeLabels.nextMoniker: - this.handleNextMonikerEdge(element) - break - case EdgeLabels.packageInformation: - this.handlePackageInformationEdge(element) - break - case EdgeLabels.textDocument_definition: - this.handleDefinitionEdge(element) - break - case EdgeLabels.textDocument_hover: - this.handleHoverEdge(element) - break - case EdgeLabels.textDocument_references: - this.handleReferenceEdge(element) - break - } - } - } - - /** - * Ensure that any outstanding records are flushed to the database. Also - * returns the set of packages provided by the project analyzed by this - * LSIF dump as well as the symbols imported into the LSIF dump from - * external packages. - */ - public async finalize(): Promise<{ packages: Package[]; references: SymbolReferences[] }> { - await this.metaInserter.finalize() - await this.documentInserter.finalize() - await this.defInserter.finalize() - await this.refInserter.finalize() - - return { packages: this.getPackages(), references: this.getReferences() } - } - - /** - * Return the set of packages provided by the project analyzed by this LSIF dump. - */ - private getPackages(): Package[] { - const packages: Package[] = [] - for (const id of this.exportedMonikers) { - const source = assertDefined(id, 'moniker', this.monikerData) - const packageInformationId = assertId(source.packageInformation) - const packageInfo = assertDefined(packageInformationId, 'packageInformation', this.packageInformationData) - packages.push({ - scheme: source.scheme, - name: packageInfo.name, - version: packageInfo.version, - }) - } - - return uniqWith(packages, isEqual) - } - - /** - * Return the symbols imported into the LSIF dump from external packages. - */ - private getReferences(): SymbolReferences[] { - const packageIdentifiers: Map = new Map() - for (const id of this.importedMonikers) { - const source = assertDefined(id, 'moniker', this.monikerData) - const packageInformationId = assertId(source.packageInformation) - const packageInfo = assertDefined(packageInformationId, 'packageInformation', this.packageInformationData) - const pkg = JSON.stringify({ - scheme: source.scheme, - name: packageInfo.name, - version: packageInfo.version, - }) - - const list = packageIdentifiers.get(pkg) - if (list) { - list.push(source.identifier) - } else { - packageIdentifiers.set(pkg, [source.identifier]) - } - } - - return Array.from(packageIdentifiers).map(([key, identifiers]) => ({ - package: JSON.parse(key) as Package, - identifiers, - })) - } +const MAX_NUM_RESULT_CHUNKS = readEnvInt('MAX_NUM_RESULT_CHUNKS', 1000) - // - // Vertex Handlers - - /** - * This should be the first vertex seen. Extract the project root so we - * can create relative paths for documents. Insert a row in the meta - * table with the LSIF protocol version. - * - * @param vertex The metadata vertex. - */ - private async handleMetaData(vertex: MetaData): Promise { - this.projectRoot = new URL(vertex.projectRoot) - await this.metaInserter.insert(convertMetadata(vertex)) - } - - /** - * Delegate document-scoped begin and end events. - * - * @param vertex The event vertex. - */ - private async handleEvent(vertex: Event): Promise { - if (vertex.scope === EventScope.document && vertex.kind === EventKind.begin) { - this.handleDocumentBegin(vertex as DocumentEvent) - } - - if (vertex.scope === EventScope.document && vertex.kind === EventKind.end) { - await this.handleDocumentEnd(vertex as DocumentEvent) - } - } - - // - // Edge Handlers - - /** - * Add range data ids into the document in which they are contained. Ensures - * all referenced vertices are defined. - * - * @param edge The contains edge. - */ - private handleContains(edge: contains): void { - if (this.documentData.has(edge.outV)) { - const source = assertDefined(edge.outV, 'document', this.documentData) - mapAssertDefined(edge.inVs, 'range', this.rangeData) - source.contains = source.contains.concat(edge.inVs) - } - } - - /** - * Update definition and reference fields from an item edge. Ensures all - * referenced vertices are defined. - * - * @param edge The item edge. - */ - private handleItemEdge(edge: item): void { - switch (edge.property) { - // `item` edges with a `property` refer to a referenceResult - case ItemEdgeProperties.definitions: - case ItemEdgeProperties.references: - this.handleGenericItemEdge(edge, 'referenceResult', this.referenceData) - break - - // `item` edges without a `property` refer to a definitionResult - case undefined: - this.handleGenericItemEdge(edge, 'definitionResult', this.definitionData) - break - } - } - - /** - * Attaches the specified moniker to the specified range or result set. Ensures all referenced - * vertices are defined. - * - * @param edge The moniker edge. - */ - private handleMonikerEdge(edge: moniker): void { - const source = assertDefined( - edge.outV, - 'range/resultSet', - this.rangeData, - this.resultSetData - ) - assertDefined(edge.inV, 'moniker', this.monikerData) - source.monikers = [edge.inV] - } - - /** - * Sets the next field fo the specified range or result set. Ensures all referenced vertices - * are defined. - * - * @param edge The next edge. - */ - private handleNextEdge(edge: next): void { - const outV = assertDefined( - edge.outV, - 'range/resultSet', - this.rangeData, - this.resultSetData - ) - assertDefined(edge.inV, 'resultSet', this.resultSetData) - outV.next = edge.inV - } - - /** - * Correlates monikers together so that when one moniker is queried, each correlated moniker - * is also returned as a strongly connected set. Ensures all referenced vertices are defined. - * - * @param edge The nextMoniker edge. - */ - private handleNextMonikerEdge(edge: nextMoniker): void { - assertDefined(edge.inV, 'moniker', this.monikerData) - assertDefined(edge.outV, 'moniker', this.monikerData) - this.correlateMonikers(edge.inV, edge.outV) // Forward direction - this.correlateMonikers(edge.outV, edge.inV) // Backwards direction - } - - /** - * Sets the package information of the specified moniker. If the moniker is an export moniker, - * then the package information will also be returned as an exported package by the `finalize` - * method. Ensures all referenced vertices are defined. - * - * @param edge The packageInformation edge. - */ - private handlePackageInformationEdge(edge: packageInformation): void { - const source = assertDefined(edge.outV, 'moniker', this.monikerData) - assertDefined(edge.inV, 'packageInformation', this.packageInformationData) - source.packageInformation = edge.inV - - if (source.kind === 'export') { - this.exportedMonikers.add(edge.outV) - } +/** + * Correlate each vertex and edge together, then populate the provided entity manager + * with the document, definition, and reference information. Returns the package and + * external reference data needed to populate the correlation database. + * + * @param entityManager A transactional SQLite entity manager. + * @param elements The stream of vertex and edge objects composing the LSIF dump. + */ +export async function importLsif( + entityManager: EntityManager, + elements: AsyncIterable +): Promise<{ packages: Package[]; references: SymbolReferences[] }> { + const correlator = new Correlator() - if (source.kind === 'import') { - this.importedMonikers.add(edge.outV) + let line = 0 + for await (const element of elements) { + try { + correlator.insert(element) + } catch (e) { + throw Object.assign( + new Error(`Failed to process line #${line + 1} (${JSON.stringify(element)}): ${e && e.message}`), + { status: 422 } + ) } - } - /** - * Sets the definition result of the specified range or result set. Ensures all referenced - * vertices are defined. - * - * @param edge The textDocument/definition edge. - */ - private handleDefinitionEdge(edge: textDocument_definition): void { - const outV = assertDefined( - edge.outV, - 'range/resultSet', - this.rangeData, - this.resultSetData - ) - assertDefined(edge.inV, 'definitionResult', this.definitionData) - outV.definitionResult = edge.inV + line++ } - /** - * Sets the hover result of the specified range or result set. Ensures all referenced - * vertices are defined. - * - * @param edge The textDocument/hover edge. - */ - private handleHoverEdge(edge: textDocument_hover): void { - const outV = assertDefined( - edge.outV, - 'range/resultSet', - this.rangeData, - this.resultSetData - ) - assertDefined(edge.inV, 'hoverResult', this.hoverData) - outV.hoverResult = edge.inV + if (correlator.lsifVersion === undefined) { + throw new Error('No metadata defined.') } - /** - * Sets the reference result of the specified range or result set. Ensures all - * referenced vertices are defined. - * - * @param edge The textDocument/references edge. - */ - private handleReferenceEdge(edge: textDocument_references): void { - const outV = assertDefined( - edge.outV, - 'range/resultSet', - this.rangeData, - this.resultSetData - ) - assertDefined(edge.inV, 'referenceResult', this.referenceData) - outV.referenceResult = edge.inV - } - - // - // Event Handlers - - /** - * Initialize a blank document which will be fully populated on the invocation of - * `handleDocumentEnd`. This document is created now so that we can stash the ids - * of ranges referred to by `contains` edges we see before the document end event - * occurs. - * - * @param event The document begin event. - */ - private handleDocumentBegin(event: DocumentEvent): void { - if (!this.projectRoot) { - throw new Error('No project root has been defined.') - } - - const uri = assertDefined(event.data, 'document', this.documentUris) + const numResults = correlator.definitionData.size + correlator.referenceData.size + const numResultChunks = Math.min(MAX_NUM_RESULT_CHUNKS, Math.floor(numResults / RESULTS_PER_RESULT_CHUNK) || 1) + + // Insert metadata + const metaInserter = new TableInserter(entityManager, MetaModel, getBatchSize(3)) + await populateMetadataTable(correlator, metaInserter, numResultChunks) + await metaInserter.flush() + + // Insert documents + const documentInserter = new TableInserter(entityManager, DocumentModel, getBatchSize(2)) + await populateDocumentsTable(correlator, documentInserter) + await documentInserter.flush() + + // Insert result chunks + const resultChunkInserter = new TableInserter(entityManager, ResultChunkModel, getBatchSize(2)) + await populateResultChunksTable(correlator, resultChunkInserter, numResultChunks) + await resultChunkInserter.flush() + + // Insert definitions and references + const definitionInserter = new TableInserter(entityManager, DefinitionModel, getBatchSize(8)) + const referenceInserter = new TableInserter(entityManager, ReferenceModel, getBatchSize(8)) + await populateDefinitionsAndReferencesTables(correlator, definitionInserter, referenceInserter) + await definitionInserter.flush() + await referenceInserter.flush() + + // Return correlation data to populate xrepo database + return { packages: getPackages(correlator), references: getReferences(correlator) } +} - const path = RelateUrl.relate(this.projectRoot.href + '/', new URL(uri).href, { - defaultPorts: {}, - output: RelateUrl.PATH_RELATIVE, - removeRootTrailingSlash: false, - }) +/** + * Determine the table inserter batch size for an entity given the number of + * fields inserted for that entity. We cannot perform an insert operation with + * more than 999 placeholder variables, so we need to flush our batch before + * we reach that amount. If fields are added to the models, the argument to + * this function also needs to change. + * + * @param numFields The number of fields for an entity. + */ +function getBatchSize(numFields: number): number { + return Math.floor(999 / numFields) +} - this.documentData.set(event.data, { - id: event.data, - path, - contains: [], - definitions: [], - references: [], - ranges: new Map(), - orderedRanges: [], - resultSets: new Map(), - definitionResults: new Map(), - referenceResults: new Map(), - hovers: new Map(), - monikers: new Map(), - packageInformation: new Map(), - }) +/** + * Correlate, encode, and insert all document entries for this dump. + */ +async function populateDocumentsTable( + correlator: Correlator, + documentInserter: TableInserter DocumentModel> +): Promise { + // Collapse result sets data into the ranges that can reach them. The + // remainder of this function assumes that we can completely ignore + // the "next" edges coming from range data. + for (const [rangeId, range] of correlator.rangeData) { + canonicalizeItem(correlator, rangeId, range) } - /** - * Finalize the document by correlating and compressing any data reachable from a - * range that it contains. This document, as well as its definitions and references, - * will be submitted to the database for insertion. - * - * @param event The document end event. - */ - private async handleDocumentEnd(event: DocumentEvent): Promise { - const document = assertDefined(event.data, 'document', this.documentData) - - // Finalize document - await this.finalizeDocument(document) - - // Insert document record - await this.documentInserter.insert({ - path: document.path, - value: await encodeJSON({ + // Gather and insert document data that includes the ranges contained in the document, + // any associated hover data, and any associated moniker data/package information. + // Each range also has identifiers that correlate to a definition or reference result + // which can be found in a result chunk, created in the next step. + + for (const [documentId, documentPath] of correlator.documentPaths) { + // Create document record from the correlated information. This will also insert + // external definitions and references into the maps initialized above, which are + // inserted into the definitions and references table, respectively, below. + const document = gatherDocument(correlator, documentId, documentPath) + + // Encode and insert document record + await documentInserter.insert({ + path: documentPath, + data: await encodeJSON({ ranges: document.ranges, orderedRanges: document.orderedRanges, - resultSets: document.resultSets, - definitionResults: document.definitionResults, - referenceResults: document.referenceResults, - hovers: document.hovers, + hoverResults: document.hoverResults, monikers: document.monikers, packageInformation: document.packageInformation, }), }) + } +} - // Insert all related definitions - for (const { ids, moniker } of document.definitions) { - for (const range of lookupRanges(document, ids)) { - await this.defInserter.insert({ - scheme: moniker.scheme, - identifier: moniker.identifier, - documentPath: document.path, - ...range, - }) +/** + * Correlate and insert all result chunk entries for this dump. + */ +async function populateResultChunksTable( + correlator: Correlator, + resultChunkInserter: TableInserter ResultChunkModel>, + numResultChunks: number +): Promise { + // Create all the result chunks we'll be populating and inserting up-front. Data will + // be inserted into result chunks based on hash values (modulo the number of result chunks), + // and we don't want to create them lazily. + + const resultChunks = new Array(numResultChunks).fill(null).map(() => ({ + paths: new Map(), + documentIdRangeIds: new Map(), + })) + + const chunkResults = (data: Map>): void => { + for (const [id, documentRanges] of data) { + // Flatten map into list of ranges + let documentIdRangeIds: DocumentIdRangeId[] = [] + for (const [documentId, rangeIds] of documentRanges) { + documentIdRangeIds = documentIdRangeIds.concat(rangeIds.map(rangeId => ({ documentId, rangeId }))) } - } - // Insert all related references - for (const { ids, moniker } of document.references) { - for (const range of lookupRanges(document, ids)) { - await this.refInserter.insert({ - scheme: moniker.scheme, - identifier: moniker.identifier, - documentPath: document.path, - ...range, - }) - } - } - } + // Insert ranges into target result chunk + const resultChunk = resultChunks[hashKey(id, resultChunks.length)] + resultChunk.documentIdRangeIds.set(id, documentIdRangeIds) - // - // Helper Functions - - /** - * Concatenate `edge.inVs` to the array at `map[edge.outV][edge.document]`. - * If any field is undefined, it is created on the fly. - * - * @param edge The edge. - * @param name The type of map (used for exception message). - * @param map The map to populate. - */ - private handleGenericItemEdge(edge: item, name: string, map: Map>): void { - const innerMap = assertDefined(edge.outV, name, map) - const data = innerMap.get(edge.document) - if (!data) { - innerMap.set(edge.document, edge.inVs) - } else { - for (const inV of edge.inVs) { - data.push(inV) + for (const documentId of documentRanges.keys()) { + // Add paths into the result chunk where they are used + resultChunk.paths.set(documentId, mustGet(correlator.documentPaths, documentId, 'documentPath')) } } } - /** - * Add `b` as a neighbor of `a` in `monikerSets`. - * - * @param a A moniker. - * @param b A second moniker. - */ - private correlateMonikers(a: Id, b: Id): void { - const neighbors = this.monikerSets.get(a) - if (neighbors) { - neighbors.add(b) - } else { - this.monikerSets.set(a, new Set([b])) - } - } + // Add definitions and references to result chunks + chunkResults(correlator.definitionData) + chunkResults(correlator.referenceData) - /** - * Populate a document object (whose only populated value should be its `contains` array). - * Each range that is contained in this document will be added to this object, as well as - * any item reachable from that range. This lazily populates the document with the minimal - * data, and keeps it self-contained within the document so that multiple queries are not - * needed when asking about (non-xrepo) LSIF data. - * - * @param document The document object. - */ - private async finalizeDocument(document: DecoratedDocumentData): Promise { - const orderedRanges: (RangeData & { id: Id })[] = [] - for (const id of document.contains) { - const range = assertDefined(id, 'range', this.rangeData) - orderedRanges.push({ id, ...range }) - await this.attachItemToDocument(document, id, range) + for (let id = 0; id < resultChunks.length; id++) { + // Empty chunk, no need to serialize as it will never be queried + if (resultChunks[id].paths.size === 0 && resultChunks[id].documentIdRangeIds.size === 0) { + continue } - // Sort ranges by their starting position - orderedRanges.sort((a, b) => a.startLine - b.startLine || a.startCharacter - b.startCharacter) - - // Populate a reverse lookup so ranges can be queried by id - // via `orderedRanges[range[id]]`. - for (const [index, range] of orderedRanges.entries()) { - document.ranges.set(range.id, index) - } + const data = await encodeJSON({ + documentPaths: resultChunks[id].paths, + documentIdRangeIds: resultChunks[id].documentIdRangeIds, + }) - // eslint-disable-next-line require-atomic-updates - document.orderedRanges = orderedRanges.map(({ id, ...range }) => range) + // Encode and insert result chunk record + await resultChunkInserter.insert({ id, data }) } +} - /** - * Moves the data reachable from the given range or result set into the - * given document. This walks the edges of next/item edges as seen in - * one of the handler functions above. - * - * @param document The document object. - * @param id The identifier of the range or result set. - * @param item The range or result set. - */ - private async attachItemToDocument( - document: DecoratedDocumentData, - id: Id, - item: RangeData | ResultSetData - ): Promise { - // Find monikers for an item and add them to the item and document. - // This will also add any package information attached to a moniker - // to the document. - const monikers = this.attachItemMonikersToDocument(document, id, item) - - // Get non-local monitors, which we will add to the Defs and Refs table - // for lookup from another project. - const nonlocalMonikers = monikers.filter(m => m.kind !== MonikerKind.local) - - // Add result set to document, if it doesn't exist - if (item.next && !document.resultSets.has(item.next)) { - const resultSet = assertDefined(item.next, 'resultSet', this.resultSetData) - document.resultSets.set(item.next, resultSet) - await this.attachItemToDocument(document, item.next, resultSet) +/** + * Correlate and insert all definition and reference entries for this dump. + */ +async function populateDefinitionsAndReferencesTables( + correlator: Correlator, + definitionInserter: TableInserter DefinitionModel>, + referenceInserter: TableInserter ReferenceModel> +): Promise { + // Determine the set of monikers that are attached to a definition or a + // reference result. Correlating information in this way has two benefits: + // (1) it reduces duplicates in the definitions and references tables + // (2) it stop us from re-iterating over the range data of the entire + // LSIF dump, which is by far the largest proportion of data. + + const definitionMonikers = new DefaultMap>(() => new Set()) + const referenceMonikers = new DefaultMap>(() => new Set()) + + for (const range of correlator.rangeData.values()) { + if (range.monikerIds.size === 0) { + continue } - // Add hover to document, if it doesn't exist - if (item.hoverResult && !document.hovers.has(item.hoverResult)) { - const hoverResult = assertDefined(item.hoverResult, 'hoverResult', this.hoverData) - document.hovers.set(item.hoverResult, hoverResult) + if (range.definitionResultId !== undefined) { + const set = definitionMonikers.getOrDefault(range.definitionResultId) + for (const monikerId of range.monikerIds) { + set.add(monikerId) + } } - // Attach definition and reference results results to the document. This attaches - // some denormalized data on the `WrappedDocumentData` object which will also be - // used to populate the defs and refs tables once the document is finalized. - - if (item.definitionResult) { - const values = [] - for (const [key, ids] of assertDefined(item.definitionResult, 'definitionResult', this.definitionData)) { - // Resolve the "document" field from the "item" edge. This will correlate - // the referenced range identifier with the document in which it belongs. - const documentPath = assertDefined(key, 'document', this.documentData).path - - for (const id of ids) { - values.push({ documentPath, id }) - } - - // If this is results for the current document, construct the data that - // will later be used to insert into the Defs table for this document. + if (range.referenceResultId !== undefined) { + const set = referenceMonikers.getOrDefault(range.referenceResultId) + for (const monikerId of range.monikerIds) { + set.add(monikerId) + } + } + } - if (key === document.id) { - for (const moniker of nonlocalMonikers) { - document.definitions.push({ ids, moniker }) - } - } + const insertMonikerRanges = async ( + data: Map>, + monikers: Map>, + inserter: TableInserter DefinitionModel | ReferenceModel> + ): Promise => { + for (const [id, documentRanges] of data) { + // Get monikers. Nothing to insert if we don't have any. + const monikerIds = monikers.get(id) + if (monikerIds === undefined) { + continue } - // Store the definition results - document.definitionResults.set(item.definitionResult, values) - } + // Correlate each moniker with the document/range pairs stored in + // the result set provided by the data argument of this function. - if (item.referenceResult) { - const values = [] - for (const [key, ids] of assertDefined(item.referenceResult, 'referenceResult', this.referenceData)) { - // Resolve the "document" field from the "item" edge. This will correlate - // the referenced range identifier with the document in which it belongs. - const documentPath = assertDefined(key, 'document', this.documentData).path + for (const monikerId of monikerIds) { + const moniker = mustGet(correlator.monikerData, monikerId, 'moniker') - for (const id of ids) { - values.push({ documentPath, id }) - } + for (const [documentId, rangeIds] of documentRanges) { + const documentPath = mustGet(correlator.documentPaths, documentId, 'documentPath') - // If this is results for the current document, construct the data that - // will later be used to insert into the Refs table for this document. + for (const rangeId of rangeIds) { + const range = mustGet(correlator.rangeData, rangeId, 'range') - if (key === document.id) { - for (const moniker of nonlocalMonikers) { - document.references.push({ ids, moniker }) + await inserter.insert({ + scheme: moniker.scheme, + identifier: moniker.identifier, + documentPath, + ...range, + }) } } } - - // Store the reference results - document.referenceResults.set(item.referenceResult, values) } } - /** - * Find all monikers reachable from the given range or result set, and - * add them to the item, and the document. If package information is - * also attached, it is also attached to the document. - * - * @param document The document object. - * @param id The identifier of the range or result set. - * @param item The range or result set. - */ - private attachItemMonikersToDocument( - document: DecoratedDocumentData, - id: Id, - item: RangeData | ResultSetData - ): MonikerData[] { - if (item.monikers.length === 0) { - return [] - } - - const monikers = [] - for (const id of reachableMonikers(this.monikerSets, item.monikers[0])) { - const moniker = assertDefined(id, 'moniker', this.monikerData) - monikers.push(moniker) - item.monikers.push(id) - document.monikers.set(id, moniker) - - if (moniker.packageInformation) { - const packageInformation = assertDefined( - moniker.packageInformation, - 'packageInformation', - this.packageInformationData - ) - - document.packageInformation.set(moniker.packageInformation, packageInformation) - } - } - - return monikers - } + // Insert definitions and references records + await insertMonikerRanges(correlator.definitionData, definitionMonikers, definitionInserter) + await insertMonikerRanges(correlator.referenceData, referenceMonikers, referenceInserter) } /** - * Return the value of `id`, or throw an exception if it is undefined. - * - * @param id The identifier. + * Insert metadata row. This gives us a place to store the version of the converter that + * created a database in case we have backwards-incompatible changes in the future that + * require historic version flagging. This also stores the number of result chunks + * determined above so that we can have stable hashes at query time. */ -function assertId(id: Id | undefined): Id { - if (id) { - return id - } - - throw new Error('id is undefined') +async function populateMetadataTable( + correlator: Correlator, + metaInserter: TableInserter MetaModel>, + numResultChunks: number +): Promise { + await metaInserter.insert({ + id: 1, + lsifVersion: correlator.lsifVersion, + sourcegraphVersion: INTERNAL_LSIF_VERSION, + numResultChunks, + }) } /** - * Return the value of the key `id` in one of the given maps. The first value - * to exist is returned. If the key does not exist in any map, an exception is - * thrown. - * - * @param id The id to search for. - * @param name The type of element (used for exception message). - * @param maps The set of maps to query. + * Gather all package information that is referenced by an exported + * moniker. These will be the packages that are provided by the repository + * represented by this LSIF dump. */ -function assertDefined(id: Id, name: string, ...maps: Map[]): T { - for (const map of maps) { - const value = map.get(id) - if (value !== undefined) { - return value - } +function getPackages(correlator: Correlator): Package[] { + const packages: Package[] = [] + for (const id of correlator.exportedMonikers) { + const source = mustGet(correlator.monikerData, id, 'moniker') + const packageInformationId = assertId(source.packageInformationId) + const packageInfo = mustGet(correlator.packageInformationData, packageInformationId, 'packageInformation') + packages.push({ + scheme: source.scheme, + name: packageInfo.name, + version: packageInfo.version, + }) } - throw new Error(`Unknown ${name} '${id}'.`) + return uniqWith(packages, isEqual) } /** - * Call `assertDefined` over the given ids. - * - * @param ids The ids to map over. - * @param name The type of element (used for exception message). - * @param maps The set of maps to query. + * Gather all imported moniker identifiers along with their package + * information. These will be the packages that are a dependency of the + * repository represented by this LSIF dump. */ -function mapAssertDefined(ids: Id[], name: string, ...maps: Map[]): T[] { - return ids.map(id => assertDefined(id, name, ...maps)) -} +function getReferences(correlator: Correlator): SymbolReferences[] { + const packageIdentifiers: Map = new Map() + for (const id of correlator.importedMonikers) { + const source = mustGet(correlator.monikerData, id, 'moniker') + const packageInformationId = assertId(source.packageInformationId) + const packageInfo = mustGet(correlator.packageInformationData, packageInformationId, 'packageInformation') + const pkg = JSON.stringify({ + scheme: source.scheme, + name: packageInfo.name, + version: packageInfo.version, + }) -/** - * Extract the version from a protocol `MetaData` object. - * - * @param meta The protocol object. - */ -function convertMetadata(meta: MetaData): { lsifVersion: string; sourcegraphVersion: string } { - return { - lsifVersion: meta.version, - sourcegraphVersion: INTERNAL_LSIF_VERSION, + const list = packageIdentifiers.get(pkg) + if (list) { + list.push(source.identifier) + } else { + packageIdentifiers.set(pkg, [source.identifier]) + } } -} -/** - * Convert a protocol `Moniker` object into a `MonikerData` object. - * - * @param moniker The moniker object. - */ -function convertMoniker(moniker: Moniker): MonikerData { - return { kind: moniker.kind || MonikerKind.local, scheme: moniker.scheme, identifier: moniker.identifier } -} - -/** - * Convert a protocol `PackageInformation` object into a `PackgeInformationData` object. - * - * @param info The protocol object. - */ -function convertPackageInformation(info: PackageInformation): PackageInformationData { - return { name: info.name, version: info.version || '$missing' } -} - -/** - * Convert a protocol `Range` object into a `RangeData` object. - * - * @param range The range object. - */ -function convertRange(range: Range): RangeData { - return { ...flattenRange(range), monikers: [] } + return Array.from(packageIdentifiers).map(([key, identifiers]) => ({ + package: JSON.parse(key) as Package, + identifiers, + })) } /** - * Convert a set of range identifers into the flattened range objects stored by - * identifier in the given document. This requires that the document's `ranges` - * and `orderedRanges` fields to be completely populated. + * Flatten the definition result, reference result, hover results, and monikers of range + * and result set items by following next links in the graph. This needs to be run over + * each range before committing them to a document. * - * @param document The document object. - * @param ids The list of ids. + * @param correlator The correlator with all vertices and edges inserted. + * @param id The item identifier. + * @param item The range or result set item. */ -export function lookupRanges(document: DecoratedDocumentData, ids: Id[]): FlattenedRange[] { - const ranges = [] - for (const id of ids) { - const rangeIndex = document.ranges.get(id) - if (rangeIndex === undefined) { - continue +function canonicalizeItem(correlator: Correlator, id: RangeId | ResultSetId, item: RangeData | ResultSetData): void { + const monikers = new Set() + if (item.monikerIds.size > 0) { + // If we have any monikers attached to this item, then we only need to look at the + // monikers reachable from any attached moniker. All other attached monikers are + // necessarily reachable, so we can choose any single value from the moniker set + // as the source of the graph traversal. + + const candidateMoniker = item.monikerIds.keys().next().value + + for (const monikerId of reachableMonikers(correlator.monikerSets, candidateMoniker)) { + if (mustGet(correlator.monikerData, monikerId, 'moniker').kind !== MonikerKind.local) { + monikers.add(monikerId) + } } - - const range = document.orderedRanges[rangeIndex] - ranges.push(range) } - return ranges -} + const nextId = correlator.nextData.get(id) + if (nextId !== undefined) { + // If we have a next edge to a result set, get it and canonicalize it first. This + // will recursively look at any result that that it can reach that hasn't yet been + // canonicalized. -/** - * Return the set of moniker identifiers which are reachable from the given value. - * This relies on `monikerSets` being properly set up: each moniker edge `a -> b` - * from the dump should ensure that `b` is a member of `monkerSets[a]`, and that - * `a` is a member of `monikerSets[b]`. - * - * @param monikerSets A undirected graph of moniker ids. - * @param id The initial moniker id. - */ -export function reachableMonikers(monikerSets: Map>, id: Id): Set { - const combined = new Set() - let frontier = [id] + const nextItem = mustGet(correlator.resultSetData, nextId, 'resultSet') + canonicalizeItem(correlator, nextId, nextItem) - while (true) { - const val = frontier.pop() - if (val === undefined) { - break + // Add each moniker of the next set to this item + for (const monikerId of nextItem.monikerIds) { + monikers.add(monikerId) } - if (combined.has(val)) { - continue + // If we do not have a definition, reference, or hover result, take the result + // value from the next item. + + if (item.definitionResultId === undefined) { + item.definitionResultId = nextItem.definitionResultId } - const nextValues = monikerSets.get(val) - if (nextValues) { - frontier = frontier.concat(Array.from(nextValues)) + if (item.referenceResultId === undefined) { + item.referenceResultId = nextItem.referenceResultId } - combined.add(val) + if (item.hoverResultId === undefined) { + item.hoverResultId = nextItem.hoverResultId + } } - return combined + // Update our moniker sets (our normalized sets and any monikers of our next item) + item.monikerIds = monikers + + // Remove the next edge so we don't traverse it a second time + correlator.nextData.delete(id) } /** - * Handle the life-cycle of an importer. Creates an `LsifImporter`. This will create - * a new importer, insert each vertex and edge in the given stream, then call the - * importer's finalize method. + * Create a self-contained document object from the data in the given correlator. This + * includes hover and moniker results, as well as identifiers to definition and reference + * results (but not the actual ranges). See result chunk table for details. * - * @param entityManager A transactional SQLite entity manager. - * @param elements The stream of vertex and edge objects composing the LSIF dump. + * @param correlator The correlator with all vertices and edges inserted. + * @param currentDocumentId The identifier of the document. + * @param path The path of the document. */ -export async function importLsif( - entityManager: EntityManager, - elements: AsyncIterable -): Promise<{ packages: Package[]; references: SymbolReferences[] }> { - const importer = new LsifImporter(entityManager) +function gatherDocument(correlator: Correlator, currentDocumentId: DocumentId, path: string): DocumentData { + const document = { + path, + ranges: new Map>(), + orderedRanges: [] as RangeData[], + hoverResults: new Map(), + monikers: new Map(), + packageInformation: new Map(), + } - let i = 0 - for await (const element of elements) { - try { - await importer.insert(element) - } catch (e) { - throw Object.assign( - new Error(`Failed to process line #${i + 1} (${JSON.stringify(element)}): ${e && e.message}`), - { status: 422 } - ) + const addHover = (id: HoverResultId | undefined): void => { + if (id === undefined || document.hoverResults.has(id)) { + return } - i++ + // Add hover result to the document, if defined and not a duplicate + const data = mustGet(correlator.hoverData, id, 'hoverResult') + document.hoverResults.set(id, data) } - return await importer.finalize() -} + const addPackageInformation = (id: PackageInformationId | undefined): void => { + if (id === undefined || document.packageInformation.has(id)) { + return + } -/** - * Normalize an LSP hover object into a string. - * - * @param hover The hover object. - */ -export function normalizeHover(hover: Hover): string { - const normalizeContent = (content: string | MarkupContent | { language: string; value: string }): string => { - if (typeof content === 'string') { - return content + // Add package information to the document, if defined and not a duplicate + const data = mustGet(correlator.packageInformationData, id, 'packageInformation') + document.packageInformation.set(id, data) + } + + const addMoniker = (id: MonikerId | undefined): void => { + if (id === undefined || document.monikers.has(id)) { + return } - if (MarkupContent.is(content)) { - return content.value + // Add moniker to the document, if defined and not a duplicate + const moniker = mustGet(correlator.monikerData, id, 'moniker') + document.monikers.set(id, moniker) + + // Add related package information to document + addPackageInformation(moniker.packageInformationId) + } + + // Correlate range data with its id so after we sort we can pull out the ids in the + // same order to make the identifier -> index mapping. + const orderedRanges: (RangeData & { id: RangeId })[] = [] + + for (const id of mustGet(correlator.containsData, currentDocumentId, 'contains')) { + const range = mustGet(correlator.rangeData, id, 'range') + orderedRanges.push({ id, ...range }) + addHover(range.hoverResultId) + for (const id of range.monikerIds) { + addMoniker(id) } + } - const tick = '```' - return `${tick}${content.language}\n${content.value}\n${tick}` + // Sort ranges by their starting position + orderedRanges.sort((a, b) => a.startLine - b.startLine || a.startCharacter - b.startCharacter) + + // Populate a reverse lookup so ranges can be queried by id via `orderedRanges[range[id]]`. + for (const [index, range] of orderedRanges.entries()) { + document.ranges.set(range.id, index) } - const separator = '\n\n---\n\n' - const contents = Array.isArray(hover.contents) ? hover.contents : [hover.contents] - return contents - .map(c => normalizeContent(c).trim()) - .filter(s => s) - .join(separator) + // eslint-disable-next-line require-atomic-updates + document.orderedRanges = orderedRanges.map(({ id, ...range }) => range) + + return document } /** - * Construct a flattened four-tuple of numbers from an LSP range. + * Return the set of moniker identifiers which are reachable from the given value. + * This relies on `monikerSets` being properly set up: each moniker edge `a -> b` + * from the dump should ensure that `b` is a member of `monkerSets[a]`, and that + * `a` is a member of `monikerSets[b]`. * - * @param range The LSP range. + * @param monikerSets A undirected graph of moniker ids. + * @param id The initial moniker id. */ -function flattenRange(range: Range): FlattenedRange { - return { - startLine: range.start.line, - startCharacter: range.start.character, - endLine: range.end.line, - endCharacter: range.end.character, +export function reachableMonikers(monikerSets: Map>, id: MonikerId): Set { + const monikerIds = new Set() + let frontier = [id] + + while (frontier.length > 0) { + const val = assertId(frontier.pop()) + if (monikerIds.has(val)) { + continue + } + + monikerIds.add(val) + + const nextValues = monikerSets.get(val) + if (nextValues) { + frontier = frontier.concat(Array.from(nextValues)) + } } + + // TODO - (efritz) should we sort these ids here instead of at query time? + return monikerIds } diff --git a/lsif/src/inserter.ts b/lsif/src/inserter.ts index 7a35eae358dcf..2c945465f0500 100644 --- a/lsif/src/inserter.ts +++ b/lsif/src/inserter.ts @@ -31,7 +31,7 @@ export class TableInserter T> { /** * Submit a model for insertion. This may happen immediately, on a - * subsequent call to insert, or when the `finalize` method is called. + * subsequent call to insert, or when the `flush` method is called. * * @param model The instance to save. */ @@ -46,7 +46,7 @@ export class TableInserter T> { /** * Ensure any outstanding records are inserted into the database. */ - public finalize(): Promise { + public flush(): Promise { return this.executeBatch() } diff --git a/lsif/src/models.database.ts b/lsif/src/models.database.ts new file mode 100644 index 0000000000000..50d60925e1fbf --- /dev/null +++ b/lsif/src/models.database.ts @@ -0,0 +1,375 @@ +import { Column, Entity, Index, PrimaryColumn } from 'typeorm' +import { Id, MonikerKind } from 'lsif-protocol' + +export type DocumentId = Id +export type DocumentPath = string +export type RangeId = Id +export type DefinitionResultId = Id +export type ReferenceResultId = Id +export type DefinitionReferenceResultId = DefinitionResultId | ReferenceResultId +export type HoverResultId = Id +export type MonikerId = Id +export type PackageInformationId = Id + +/** + * A type that describes a gzipped and JSON-encoded value of type `T`. + */ +export type JSONEncoded = string + +/** + * A type of hashed value created by hashing a value of type `T` and performing + * the modulus with a value of type `U`. This is to link the index of a result + * chunk to the hashed value of the identifiers stored within it. + */ +export type HashMod = number + +/** + * A type of array index bounded by another type or value `T`. This is used + * to link the values of `ranges` to the indices of `orderedRanges` in the + * `DocumentData` interface defined below. + */ +export type Ix = number + +/** +n entity within the database describing LSIF data for a single repository + * and commit pair. There should be only one metadata entity per database. + */ +@Entity({ name: 'meta' }) +export class MetaModel { + /** + * A unique ID required by typeorm entities: always zero here. + */ + @PrimaryColumn('int') + public id!: number + + /** + * The version string of the input LSIF that created this database. + */ + @Column('text') + public lsifVersion!: string + + /** + * The internal version of the LSIF server that created this database. + */ + @Column('text') + public sourcegraphVersion!: string + + /** + * The number of result chunks allocated when converting the dump stored + * in this database. This is used as an upper bound for the hash into the + * `resultChunks` table and must be record to keep the hash generation + * stable. + */ + @Column('int') + public numResultChunks!: number +} + +/** + * An entity within the database describing LSIF data for a single repository and + * commit pair. This contains a JSON-encoded `DocumentData` object that describes + * relations within a single file of the dump. + */ +@Entity({ name: 'documents' }) +export class DocumentModel { + /** + * The root-relative path of the document. + */ + @PrimaryColumn('text') + public path!: DocumentPath + + /** + * The JSON-encoded document data. + */ + @Column('text') + public data!: JSONEncoded +} + +/** + * An entity within the database describing LSIF data for a single repository and + * commit pair. This contains a JSON-encoded `ResultChunk` object that describes + * a subset of the definition and reference results of the dump. + */ +@Entity({ name: 'resultChunks' }) +export class ResultChunkModel { + /** + * The identifier of the chunk. This is also the index of the chunk during its + * construction, and the identifiers contained in this chunk hash to this index + * (modulo the total number of chunks for the dump). + */ + @PrimaryColumn('int') + public id!: HashMod + + /** + * The JSON-encoded chunk data. + */ + @Column('text') + public data!: JSONEncoded +} + +/** + * The base class for `DefinitionModel` and `ReferenceModel` as they have identical + * column descriptions. + */ +class Symbols { + /** + * A unique ID required by typeorm entities. + */ + @PrimaryColumn('int') + public id!: number + + /** + * The name of the package type (e.g. npm, pip). + */ + @Column('text') + public scheme!: string + + /** + * The unique identifier of the moniker. + */ + @Column('text') + public identifier!: string + + /** + * The path of the document to which this reference belongs. + */ + @Column('text') + public documentPath!: DocumentPath + + /** + * The zero-indexed line describing the start of this range. + */ + @Column('int') + public startLine!: number + + /** + * The zero-indexed line describing the end of this range. + */ + @Column('int') + public endLine!: number + + /** + * The zero-indexed line describing the start of this range. + */ + @Column('int') + public startCharacter!: number + + /** + * The zero-indexed line describing the end of this range. + */ + @Column('int') + public endCharacter!: number +} + +/** + * An entity within the database describing LSIF data for a single repository and commit + * pair. This maps external monikers to their range and the document that contains the + * definition of the moniker. + */ +@Entity({ name: 'definitions' }) +@Index(['scheme', 'identifier']) +export class DefinitionModel extends Symbols {} + +/** + * An entity within the database describing LSIF data for a single repository and commit + * pair. This maps imported monikers to their range and the document that contains a + * reference to the moniker. + */ +@Entity({ name: 'references' }) +@Index(['scheme', 'identifier']) +export class ReferenceModel extends Symbols {} + +/** + * Data for a single document within an LSIF dump. The data here can answer definitions, + * references, and hover queries if the results are all contained within the same document. + */ +export interface DocumentData { + /** + * A mapping from range identifiers to the index of the range in the + * `orderedRanges` array. We keep a mapping so we can look range data by + * identifier quickly, and keep them sorted so we can find the range that + * encloses a position quickly. + */ + ranges: Map> + + /** + * An array of range data sorted by startLine, then by startCharacter. This + * allows us to perform binary search to find a particular location subsumed + * by a range in the document. + */ + orderedRanges: OrderedRanges + + /** + * A map of hover result identifiers to hover results normalized as a single + * string. + */ + hoverResults: Map + + /** + * A map of moniker identifiers to moniker data. + */ + monikers: Map + + /** + * A map of package information identifiers to package information data. + */ + packageInformation: Map +} + +/** + * A range identifier that also specifies the identifier of the document to + * which it belongs. This is sometimes necessary as we hold definition and + * reference results between packages, but the identifier of the range must be + * looked up in a map of another encoded document. + */ +export interface DocumentIdRangeId { + /** + * The identifier of the document. The path of the document can be queried + * by this identifier in the containing document. + */ + documentId: DocumentId + + /** + * The identifier of the range in the referenced document. + */ + rangeId: RangeId +} + +/** + * A range identifier that also specifies the path of the document to which it + * belongs. This is generally created by determining the path from an instance of + * `DocumentIdRangeId`. + */ +export interface DocumentPathRangeId { + /** + * The path of the document. + */ + documentPath: DocumentPath + + /** + * The identifier of the range in the referenced document. + */ + rangeId: RangeId +} + +/** + * A result chunk is a subset of the definition and reference result data for the + * LSIF dump. Results are inserted into chunks based on the hash code of their + * identifier (thus every chunk has a roughly proportional amount of data). + */ +export interface ResultChunkData { + /** + * A map from document identifiers to document paths. The document identifiers + * in the `documentIdRangeIds` field reference a concrete path stored here. + */ + documentPaths: Map + + /** + * A map from definition or reference result identifiers to the ranges that + * compose the result set. Each range is paired with the identifier of the + * document in which it can be found. + */ + documentIdRangeIds: Map +} + +/** + * An internal representation of a range vertex from an LSIF dump. It contains the same + * relevant edge data, which can be subsequently queried in the containing document. The + * data that was reachable via a result set has been collapsed into this object during + * import. + */ +export interface RangeData { + /** + * The line on which the range starts (0-indexed, inclusive). + */ + startLine: number + + /** + * The line on which the range ends (0-indexed, inclusive). + */ + startCharacter: number + + /** + * The character on which the range starts (0-indexed, inclusive). + */ + endLine: number + + /** + * The character on which the range ends (0-indexed, inclusive). + */ + endCharacter: number + + /** + * The identifier of the definition result attached to this range, if one exists. + * The definition result object can be queried by its * identifier within the containing + * document. + */ + definitionResultId?: DefinitionResultId + + /** + * The identifier of the reference result attached to this range, if one exists. + * The reference result object can be queried by its identifier within the containing + * document. + */ + referenceResultId?: ReferenceResultId + + /** + * The identifier of the hover result attached to this range, if one exists. The + * hover result object can be queried by its identifier within the containing + * document. + */ + hoverResultId?: HoverResultId + + /** + * The set of moniker identifiers directly attached to this range. The moniker + * object can be queried by its identifier within the + * containing document. + */ + monikerIds: Set +} + +/** + * An array of `RangeData` instances. + */ +export type OrderedRanges = RangeData[] + +/** + * Data about a moniker attached to a range. + */ +export interface MonikerData { + /** + * The kind of moniker (e.g. local, import, export). + */ + kind: MonikerKind + + /** + * The name of the package type (e.g. npm, pip). + */ + scheme: string + + /** + * The unique identifier of the moniker. + */ + identifier: string + + /** + * The identifier of the package information to this moniker, if one exists. + * The package information object can be queried by its identifier within the + * containing document. + */ + packageInformationId?: PackageInformationId +} + +/** + * Additional data about a non-local moniker. + */ +export interface PackageInformationData { + /** + * The name of the package the moniker describes. + */ + name: string + + /** + * The version of the package the moniker describes. + */ + version: string | null +} diff --git a/lsif/src/models.ts b/lsif/src/models.ts deleted file mode 100644 index dc456b601f9b3..0000000000000 --- a/lsif/src/models.ts +++ /dev/null @@ -1,214 +0,0 @@ -import { PrimaryGeneratedColumn, Column, Entity, PrimaryColumn, Index } from 'typeorm' - -/** - * An entity within the database describing LSIF data for a single repository - * and commit pair. There should be only one metadata entity per database. - */ -@Entity({ name: 'meta' }) -export class MetaModel { - /** - * A unique ID required by typeorm entities. - */ - @PrimaryGeneratedColumn('increment', { type: 'int' }) - public id!: number - - /** - * The version string of the input LSIF that created this database. - */ - @Column('text') - public lsifVersion!: string - - /** - * The internal version of the LSIF server that created this database. - */ - @Column('text') - public sourcegraphVersion!: string -} - -/** - * An entity within the database describing LSIF data for a single repository and - * commit pair. This contains a JSON-encoded `DocumentData` object that describes - * relations within a single file. of the dump. - */ -@Entity({ name: 'documents' }) -export class DocumentModel { - /** - * The root-relative path of the document. - */ - @PrimaryColumn('text') - public path!: string - - /** - * The JSON-encoded document data. - */ - @Column('text') - public value!: string -} - -/** - * The base class for `DefModel` and `RefModel` as they have identical column - * descriptions. - */ -class Symbols { - /** - * A unique ID required by typeorm entities. - */ - @PrimaryColumn('int') - public id!: number - - /** - * The name of the package type (e.g. npm, pip). - */ - @Column('text') - public scheme!: string - - /** - * The unique identifier of the moniker. - */ - @Column('text') - public identifier!: string - - /** - * The path of the document to which this reference belongs. - */ - @Column('text') - public documentPath!: string - - /** - * The zero-indexed line describing the start of this range. - */ - @Column('int') - public startLine!: number - - /** - * The zero-indexed line describing the end of this range. - */ - @Column('int') - public endLine!: number - - /** - * The zero-indexed line describing the start of this range. - */ - @Column('int') - public startCharacter!: number - - /** - * The zero-indexed line describing the end of this range. - */ - @Column('int') - public endCharacter!: number -} - -/** - * An entity within the database describing LSIF data for a single repository and commit - * pair. This maps external monikers to their range and the document that contains the - * definition of the moniker. - */ -@Entity({ name: 'defs' }) -@Index(['scheme', 'identifier']) -export class DefModel extends Symbols {} - -/** - * An entity within the database describing LSIF data for a single repository and commit - * pair. This maps imported monikers to their range and the document that contains a - * reference to the moniker. - */ -@Entity({ name: 'refs' }) -@Index(['scheme', 'identifier']) -export class RefModel extends Symbols {} - -/** - * An entity within the xrepo database. This maps a given repository and - * commit pair to the package that it provides to other projects. - */ -@Entity({ name: 'packages' }) -@Index(['scheme', 'name', 'version']) -export class PackageModel { - /** - * A unique ID required by typeorm entities. - */ - @PrimaryGeneratedColumn('increment', { type: 'int' }) - public id!: number - - /** - * The name of the package type (e.g. npm, pip). - */ - @Column('text') - public scheme!: string - - /** - * The name of the package this repository and commit provides. - */ - @Column('text') - public name!: string - - /** - * The version of the package this repository and commit provides. - */ - @Column('text') - public version!: string - - /** - * The name of the source repository. - */ - @Column('text') - public repository!: string - - /** - * The source commit. - */ - @Column('text') - public commit!: string -} - -/** - * An entity within the xrepo database. This lists the dependencies of a given repository - * and commit pair to support find global reference operations. - */ -@Entity({ name: 'references' }) -@Index(['scheme', 'name', 'version']) -export class ReferenceModel { - /** - * A unique ID required by typeorm entities. - */ - @PrimaryGeneratedColumn('increment', { type: 'int' }) - public id!: number - - /** - * The name of the package type (e.g. npm, pip). - */ - @Column('text') - public scheme!: string - - /** - * The name of the package this repository and commit depends on. - */ - @Column('text') - public name!: string - - /** - * The version of the package this repository and commit depends on. - */ - @Column('text') - public version!: string - - /** - * The name of the source repository. - */ - @Column('text') - public repository!: string - - /** - * The source commit (revision hash). - */ - @Column('text') - public commit!: string - - /** - * A serialized bloom filter that encodes the set of symbols that this repository - * and commit imports from the given package. Testing this filter will prevent the - * backend from opening databases that will yield no results for a particular symbol. - */ - @Column('text') - public filter!: string -} diff --git a/lsif/src/models.xrepo.ts b/lsif/src/models.xrepo.ts new file mode 100644 index 0000000000000..c7991c2196dd8 --- /dev/null +++ b/lsif/src/models.xrepo.ts @@ -0,0 +1,97 @@ +import { PrimaryGeneratedColumn, Column, Entity, Index } from 'typeorm' + +/** + * An entity within the correlation database. This maps a given repository and + * commit pair to the package that it provides to other projects. + */ +@Entity({ name: 'packages' }) +@Index(['scheme', 'name', 'version']) +export class PackageModel { + /** + * A unique ID required by typeorm entities. + */ + @PrimaryGeneratedColumn('increment', { type: 'int' }) + public id!: number + + /** + * The name of the package type (e.g. npm, pip). + */ + @Column('text') + public scheme!: string + + /** + * The name of the package this repository and commit provides. + */ + @Column('text') + public name!: string + + /** + * The version of the package this repository and commit provides. + */ + @Column('text', { nullable: true }) + public version!: string | null + + /** + * The name of the source repository. + */ + @Column('text') + public repository!: string + + /** + * The source commit. + */ + @Column('text') + public commit!: string +} + +/** + * An entity within the correlation database. This lists the dependencies of a given + * repository and commit pair to support find global reference operations. + */ +@Entity({ name: 'references' }) +@Index(['scheme', 'name', 'version']) +export class ReferenceModel { + /** + * A unique ID required by typeorm entities. + */ + @PrimaryGeneratedColumn('increment', { type: 'int' }) + public id!: number + + /** + * The name of the package type (e.g. npm, pip). + */ + @Column('text') + public scheme!: string + + /** + * The name of the package this repository and commit depends on. + */ + @Column('text') + public name!: string + + /** + * The version of the package this repository and commit depends on. + */ + @Column('text', { nullable: true }) + public version!: string | null + + /** + * The name of the source repository. + */ + @Column('text') + public repository!: string + + /** + * The source commit (revision hash). + */ + @Column('text') + public commit!: string + + /** + * A serialized bloom filter that encodes the set of symbols that this repository + * and commit imports from the given package. Testing this filter will prevent the + * backend from opening databases that will yield no results for a particular symbol. + */ + @Column('text') + public filter!: string +} diff --git a/lsif/src/query-cpp.test.ts b/lsif/src/query-cpp.test.ts index 783fc656bfd9c..9eebef70e979a 100644 --- a/lsif/src/query-cpp.test.ts +++ b/lsif/src/query-cpp.test.ts @@ -1,24 +1,29 @@ import * as fs from 'mz/fs' -import * as temp from 'temp' +import * as rimraf from 'rimraf' import * as zlib from 'mz/zlib' -import { ConnectionCache, DocumentCache } from './cache' +import { ConnectionCache, DocumentCache, ResultChunkCache } from './cache' import { createBackend } from './backend' -import { lsp } from 'lsif-protocol' +import { createCommit, createLocation } from './test-utils' describe('Database', () => { let storageRoot!: string const connectionCache = new ConnectionCache(10) const documentCache = new DocumentCache(10) + const resultChunkCache = new ResultChunkCache(10) beforeAll(async () => { - storageRoot = temp.mkdirSync('cpp') // eslint-disable-line no-sync - const backend = await createBackend(storageRoot, connectionCache, documentCache) + storageRoot = await fs.promises.mkdtemp('cpp-') + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const input = fs.createReadStream('./test-data/cpp/data/data.lsif.gz').pipe(zlib.createGunzip()) await backend.insertDump(input, 'five', createCommit('five')) }) + afterAll(() => { + rimraf.sync(storageRoot) + }) + it('should find all defs of `four` from main.cpp', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('five', createCommit('five')) const definitions = await db.definitions('main.cpp', { line: 12, character: 3 }) // TODO - (FIXME) currently the dxr indexer returns zero-width ranges @@ -26,7 +31,7 @@ describe('Database', () => { }) it('should find all defs of `five` from main.cpp', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('five', createCommit('five')) const definitions = await db.definitions('main.cpp', { line: 11, character: 3 }) // TODO - (FIXME) currently the dxr indexer returns zero-width ranges @@ -34,7 +39,7 @@ describe('Database', () => { }) it('should find all refs of `five` from main.cpp', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('five', createCommit('five')) const references = await db.references('main.cpp', { line: 11, character: 3 }) @@ -44,26 +49,6 @@ describe('Database', () => { expect(references).toContainEqual(createLocation('five.cpp', 2, 4, 2, 4)) expect(references).toContainEqual(createLocation('main.cpp', 11, 2, 11, 6)) expect(references).toContainEqual(createLocation('main.cpp', 13, 2, 13, 6)) - expect(references && references.length).toEqual(4) + expect(references).toHaveLength(4) }) }) - -// -// Helpers - -function createLocation( - uri: string, - startLine: number, - startCharacter: number, - endLine: number, - endCharacter: number -): lsp.Location { - return lsp.Location.create(uri, { - start: { line: startLine, character: startCharacter }, - end: { line: endLine, character: endCharacter }, - }) -} - -function createCommit(repository: string): string { - return repository.repeat(40).substring(0, 40) -} diff --git a/lsif/src/query-typescript.test.ts b/lsif/src/query-typescript.test.ts index d2b91ee4c34e5..c1d5735428549 100644 --- a/lsif/src/query-typescript.test.ts +++ b/lsif/src/query-typescript.test.ts @@ -1,19 +1,20 @@ import * as fs from 'mz/fs' -import * as temp from 'temp' +import * as rimraf from 'rimraf' import * as zlib from 'mz/zlib' -import { ConnectionCache, DocumentCache } from './cache' +import { ConnectionCache, DocumentCache, ResultChunkCache } from './cache' import { createBackend } from './backend' -import { lsp } from 'lsif-protocol' import { Readable } from 'stream' +import { createCommit, createLocation, createRemoteLocation } from './test-utils' describe('Database', () => { let storageRoot!: string const connectionCache = new ConnectionCache(10) const documentCache = new DocumentCache(10) + const resultChunkCache = new ResultChunkCache(10) beforeAll(async () => { - storageRoot = temp.mkdirSync('typescript') // eslint-disable-line no-sync - const backend = await createBackend(storageRoot, connectionCache, documentCache) + storageRoot = await fs.promises.mkdtemp('typescript-') + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const inputs: { input: Readable; repository: string; commit: string }[] = [] for (const repository of ['a', 'b1', 'b2', 'b3', 'c1', 'c2', 'c3']) { @@ -29,8 +30,12 @@ describe('Database', () => { } }) + afterAll(() => { + rimraf.sync(storageRoot) + }) + it('should find all defs of `add` from repo a', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('a', createCommit('a')) const definitions = await db.definitions('src/index.ts', { line: 11, character: 18 }) expect(definitions).toContainEqual(createLocation('src/index.ts', 0, 16, 0, 19)) @@ -38,7 +43,7 @@ describe('Database', () => { }) it('should find all defs of `add` from repo b1', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('b1', createCommit('b1')) const definitions = await db.definitions('src/index.ts', { line: 3, character: 12 }) expect(definitions).toContainEqual(createRemoteLocation('a', 'src/index.ts', 0, 16, 0, 19)) @@ -46,7 +51,7 @@ describe('Database', () => { }) it('should find all defs of `mul` from repo b1', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('b1', createCommit('b1')) const definitions = await db.definitions('src/index.ts', { line: 3, character: 16 }) expect(definitions).toContainEqual(createRemoteLocation('a', 'src/index.ts', 4, 16, 4, 19)) @@ -54,7 +59,7 @@ describe('Database', () => { }) it('should find all refs of `mul` from repo a', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('a', createCommit('a')) // TODO - (FIXME) why are these garbage results in the index const references = (await db.references('src/index.ts', { line: 4, character: 19 }))!.filter( @@ -78,7 +83,7 @@ describe('Database', () => { }) it('should find all refs of `mul` from repo b1', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('b1', createCommit('b1')) // TODO - (FIXME) why are these garbage results in the index const references = (await db.references('src/index.ts', { line: 3, character: 16 }))!.filter( @@ -102,7 +107,7 @@ describe('Database', () => { }) it('should find all refs of `add` from repo a', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('a', createCommit('a')) // TODO - (FIXME) why are these garbage results in the index const references = (await db.references('src/index.ts', { line: 0, character: 17 }))!.filter( @@ -136,7 +141,7 @@ describe('Database', () => { }) it('should find all refs of `add` from repo c1', async () => { - const backend = await createBackend(storageRoot, connectionCache, documentCache) + const backend = await createBackend(storageRoot, connectionCache, documentCache, resultChunkCache) const db = await backend.createDatabase('c1', createCommit('c1')) // TODO - (FIXME) why are these garbage results in the index const references = (await db.references('src/index.ts', { line: 3, character: 16 }))!.filter( @@ -169,40 +174,3 @@ describe('Database', () => { expect(references && references.length).toEqual(20) }) }) - -// -// Helpers - -function createLocation( - uri: string, - startLine: number, - startCharacter: number, - endLine: number, - endCharacter: number -): lsp.Location { - return lsp.Location.create(uri, { - start: { line: startLine, character: startCharacter }, - end: { line: endLine, character: endCharacter }, - }) -} - -function createRemoteLocation( - repository: string, - path: string, - startLine: number, - startCharacter: number, - endLine: number, - endCharacter: number -): lsp.Location { - return createLocation( - `git://${repository}?${createCommit(repository)}#${path}`, - startLine, - startCharacter, - endLine, - endCharacter - ) -} - -function createCommit(repository: string): string { - return repository.repeat(40).substring(0, 40) -} diff --git a/lsif/src/server.ts b/lsif/src/server.ts index 1f004bc71ce73..97e43fb13d67b 100644 --- a/lsif/src/server.ts +++ b/lsif/src/server.ts @@ -1,6 +1,6 @@ import bodyParser from 'body-parser' import express from 'express' -import { ConnectionCache, DocumentCache } from './cache' +import { ConnectionCache, DocumentCache, ResultChunkCache } from './cache' import { ERRNOLSIFDATA, createBackend } from './backend' import { hasErrorCode, readEnvInt } from './util' import { wrap } from 'async-middleware' @@ -23,6 +23,11 @@ const CONNECTION_CACHE_SIZE = readEnvInt('CONNECTION_CACHE_SIZE', 1000) */ const DOCUMENT_CACHE_SIZE = readEnvInt('DOCUMENT_CACHE_SIZE', 1000) +/** + * The maximum number of result chunks that can be held in memory at once. + */ +const RESULT_CHUNK_CACHE_SIZE = readEnvInt('RESULT_CHUNK_CACHE_SIZE', 1000) + /** * Whether or not to log a message when the HTTP server is ready and listening. */ @@ -39,7 +44,8 @@ const STORAGE_ROOT = process.env.LSIF_STORAGE_ROOT || 'lsif-storage' async function main(): Promise { const connectionCache = new ConnectionCache(CONNECTION_CACHE_SIZE) const documentCache = new DocumentCache(DOCUMENT_CACHE_SIZE) - const backend = await createBackend(STORAGE_ROOT, connectionCache, documentCache) + const resultChunkCache = new ResultChunkCache(RESULT_CHUNK_CACHE_SIZE) + const backend = await createBackend(STORAGE_ROOT, connectionCache, documentCache, resultChunkCache) const app = express() app.use(errorHandler) diff --git a/lsif/src/test-utils.ts b/lsif/src/test-utils.ts new file mode 100644 index 0000000000000..9144eb9397f3d --- /dev/null +++ b/lsif/src/test-utils.ts @@ -0,0 +1,33 @@ +import { lsp } from 'lsif-protocol' + +export function createLocation( + uri: string, + startLine: number, + startCharacter: number, + endLine: number, + endCharacter: number +): lsp.Location { + return lsp.Location.create(uri, { + start: { line: startLine, character: startCharacter }, + end: { line: endLine, character: endCharacter }, + }) +} + +export function createRemoteLocation( + repository: string, + path: string, + startLine: number, + startCharacter: number, + endLine: number, + endCharacter: number +): lsp.Location { + const url = new URL(`git://${repository}`) + url.search = createCommit(repository) + url.hash = path + + return createLocation(url.href, startLine, startCharacter, endLine, endCharacter) +} + +export function createCommit(repository: string): string { + return repository.repeat(40).substring(0, 40) +} diff --git a/lsif/src/util.test.ts b/lsif/src/util.test.ts new file mode 100644 index 0000000000000..cfb8c6098dd6f --- /dev/null +++ b/lsif/src/util.test.ts @@ -0,0 +1,13 @@ +import { mustGetFromEither } from './util' + +describe('mustGetFromEither', () => { + it('should return first defined value', () => { + const map1 = new Map() + const map2 = new Map() + + map2.set('foo', 'baz') + expect(mustGetFromEither(map1, map2, 'foo', '')).toEqual('baz') + map1.set('foo', 'bar') + expect(mustGetFromEither(map1, map2, 'foo', '')).toEqual('bar') + }) +}) diff --git a/lsif/src/util.ts b/lsif/src/util.ts index c06640a8c7a60..433e52fc37fcb 100644 --- a/lsif/src/util.ts +++ b/lsif/src/util.ts @@ -1,3 +1,6 @@ +import { Id } from 'lsif-protocol' +import { DefinitionReferenceResultId } from './models.database' + /** * Reads an integer from an environment variable or defaults to the given value. * @@ -17,3 +20,75 @@ export function readEnvInt(key: string, defaultValue: number): number { export function hasErrorCode(e: any, expectedCode: string): boolean { return e && e.code === expectedCode } + +/** + * Return the value of the given key from the given map. If the key does not + * exist in the map, an exception is thrown with the given error text. + * + * @param maps The map to query. + * @param key The key to search for. + * @param elementType The type of element (used for exception message). + */ +export function mustGet(map: Map, key: K, elementType: string): V { + const value = map.get(key) + if (value !== undefined) { + return value + } + + throw new Error(`Unknown ${elementType} '${key}'.`) +} + +/** + * Return the value of the given key from one of the given maps. The first + * non-undefined value to be found is returned. If the key does not exist in + * either map, an exception is thrown with the given error text. + * + * @param map1 The first map to query. + * @param map2 The second map to query. + * @param key The key to search for. + * @param elementType The type of element (used for exception message). + */ +export function mustGetFromEither(map1: Map, map2: Map, key: K, elementType: string): V { + for (const map of [map1, map2]) { + const value = map.get(key) + if (value !== undefined) { + return value + } + } + + throw new Error(`Unknown ${elementType} '${key}'.`) +} + +/** + * Return the value of `id`, or throw an exception if it is undefined. + * + * @param id The identifier. + */ +export function assertId(id: T | undefined): T { + if (id !== undefined) { + return id + } + + throw new Error('id is undefined') +} + +/** + * Hash a string or numeric identifier into the range `[0, maxIndex)`. The + * hash algorithm here is similar to the one used in Java's String.hashCode. + * + * @param id The identifier to hash. + * @param maxIndex The maximum of the range. + */ +export function hashKey(id: DefinitionReferenceResultId, maxIndex: number): number { + const s = `${id}` + + let hash = 0 + for (let i = 0; i < s.length; i++) { + const chr = s.charCodeAt(i) + hash = (hash << 5) - hash + chr + hash |= 0 + } + + // Hash value may be negative - must unset sign bit before modulus + return Math.abs(hash) % maxIndex +} diff --git a/lsif/src/xrepo.ts b/lsif/src/xrepo.ts index ac2e7ff1f2781..53a3951c74ef6 100644 --- a/lsif/src/xrepo.ts +++ b/lsif/src/xrepo.ts @@ -1,7 +1,7 @@ import { Connection, EntityManager } from 'typeorm' import { testFilter, createFilter } from './encoding' import { ConnectionCache } from './cache' -import { ReferenceModel, PackageModel } from './models' +import { ReferenceModel, PackageModel } from './models.xrepo' import { TableInserter } from './inserter' /** @@ -22,7 +22,7 @@ export interface Package { /** * The version of the package. */ - version: string + version: string | null } /** @@ -62,7 +62,7 @@ export class XrepoDatabase { * @param name The package name. * @param version The package version. */ - public async getPackage(scheme: string, name: string, version: string): Promise { + public async getPackage(scheme: string, name: string, version: string | null): Promise { return await this.withConnection(connection => connection.getRepository(PackageModel).findOne({ where: { @@ -89,7 +89,7 @@ export class XrepoDatabase { await inserter.insert({ repository, commit, ...pkg }) } - await inserter.finalize() + await inserter.flush() }) } @@ -113,7 +113,7 @@ export class XrepoDatabase { }: { scheme: string name: string - version: string + version: string | null value: string }): Promise { const results = await this.withConnection(connection => @@ -160,7 +160,7 @@ export class XrepoDatabase { }) } - await inserter.finalize() + await inserter.flush() }) } diff --git a/lsif/test-data/typescript/README.md b/lsif/test-data/typescript/README.md index 920274f85aa46..1dff35ea704d5 100644 --- a/lsif/test-data/typescript/README.md +++ b/lsif/test-data/typescript/README.md @@ -6,7 +6,7 @@ The dump files used for testing are under version control, but can be regenerate ### Dump Layout -The repository `a` defines the `math-util` package containing functions `add` and `mul`. The later function is defined in terms fo the former (and thus contains a eference to it). +The repository `a` defines the `math-util` package containing functions `add` and `mul`. The later function is defined in terms of the former (and thus contains a eference to it). The repositories `b{1,2,3}` have a dependency on `math-util` and import both `add` and `mul` functions. diff --git a/lsif/yarn.lock b/lsif/yarn.lock index 85f750b713438..4b76041455eb8 100644 --- a/lsif/yarn.lock +++ b/lsif/yarn.lock @@ -382,6 +382,11 @@ dependencies: "@types/node" "*" +"@types/events@*": + version "3.0.0" + resolved "https://registry.npmjs.org/@types/events/-/events-3.0.0.tgz#2862f3f58a9a7f7c3e78d79f130dd4d71c25c2a7" + integrity sha512-EaObqwIvayI5a8dCzhFrjKzVwKLxjoG9T6Ppd5CEo07LRKfQ8Yokw54r5+Wq7FaBQ+yXRvQAYPrHwya1/UFt9g== + "@types/express-serve-static-core@*": version "4.16.9" resolved "https://registry.npmjs.org/@types/express-serve-static-core/-/express-serve-static-core-4.16.9.tgz#69e00643b0819b024bdede95ced3ff239bb54558" @@ -399,6 +404,15 @@ "@types/express-serve-static-core" "*" "@types/serve-static" "*" +"@types/glob@*": + version "7.1.1" + resolved "https://registry.npmjs.org/@types/glob/-/glob-7.1.1.tgz#aa59a1c6e3fbc421e07ccd31a944c30eba521575" + integrity sha512-1Bh06cbWJUHMC97acuD6UMG29nMt0Aqz1vF3guLfG+kHHJhy3AyohZFFxYk2f7Q1SQIrNwvncxAE0N/9s70F2w== + dependencies: + "@types/events" "*" + "@types/minimatch" "*" + "@types/node" "*" + "@types/istanbul-lib-coverage@*", "@types/istanbul-lib-coverage@^2.0.0": version "2.0.1" resolved "https://registry.npmjs.org/@types/istanbul-lib-coverage/-/istanbul-lib-coverage-2.0.1.tgz#42995b446db9a48a11a07ec083499a860e9138ff" @@ -441,6 +455,11 @@ resolved "https://registry.npmjs.org/@types/mime/-/mime-2.0.1.tgz#dc488842312a7f075149312905b5e3c0b054c79d" integrity sha512-FwI9gX75FgVBJ7ywgnq/P7tw+/o1GUbtP0KzbtusLigAOgIgNISRK0ZPl4qertvXSIE8YbsVJueQ90cDt9YYyw== +"@types/minimatch@*": + version "3.0.3" + resolved "https://registry.npmjs.org/@types/minimatch/-/minimatch-3.0.3.tgz#3dca0e3f33b200fc7d1139c0cd96c1268cadfd9d" + integrity sha512-tHq6qdbT9U1IRSGf14CL0pUlULksvY9OZ+5eEgl1N7t+OA3tGvNpxJCzuKQlsNgCVwbAs670L1vcVQi8j9HjnA== + "@types/mz@0.0.32": version "0.0.32" resolved "https://registry.npmjs.org/@types/mz/-/mz-0.0.32.tgz#e8248b4e41424c052edc1725dd33650c313a3659" @@ -463,6 +482,14 @@ resolved "https://registry.npmjs.org/@types/relateurl/-/relateurl-0.2.28.tgz#6bda7db8653fa62643f5ee69e9f69c11a392e3a6" integrity sha1-a9p9uGU/piZD9e5p6facEaOS46Y= +"@types/rimraf@^2.0.2": + version "2.0.2" + resolved "https://registry.npmjs.org/@types/rimraf/-/rimraf-2.0.2.tgz#7f0fc3cf0ff0ad2a99bb723ae1764f30acaf8b6e" + integrity sha512-Hm/bnWq0TCy7jmjeN5bKYij9vw5GrDFWME4IuxV08278NtU/VdGbzsBohcCUJ7+QMqmUq5hpRKB39HeQWJjztQ== + dependencies: + "@types/glob" "*" + "@types/node" "*" + "@types/serve-static@*": version "1.13.3" resolved "https://registry.npmjs.org/@types/serve-static/-/serve-static-1.13.3.tgz#eb7e1c41c4468272557e897e9171ded5e2ded9d1" @@ -481,13 +508,6 @@ resolved "https://registry.npmjs.org/@types/stack-utils/-/stack-utils-1.0.1.tgz#0a851d3bd96498fa25c33ab7278ed3bd65f06c3e" integrity sha512-l42BggppR6zLmpfU6fq9HEa2oGPEI8yrSPL3GITjfRInppYFahObbIQOQK3UGxEnyQpltZLaPe75046NOZQikw== -"@types/temp@^0.8.34": - version "0.8.34" - resolved "https://registry.npmjs.org/@types/temp/-/temp-0.8.34.tgz#03e4b3cb67cbb48c425bbf54b12230fef85540ac" - integrity sha512-oLa9c5LHXgS6UimpEVp08De7QvZ+Dfu5bMQuWyMhf92Z26Q10ubEMOWy9OEfUdzW7Y/sDWVHmUaLFtmnX/2j0w== - dependencies: - "@types/node" "*" - "@types/yallist@^3.0.1": version "3.0.1" resolved "https://registry.npmjs.org/@types/yallist/-/yallist-3.0.1.tgz#3f43dac55d779e3058ba10334659d9fbb373807c" @@ -3528,10 +3548,10 @@ rimraf@^2.5.4, rimraf@^2.6.1, rimraf@^2.6.3: dependencies: glob "^7.1.3" -rimraf@~2.6.2: - version "2.6.3" - resolved "https://registry.npmjs.org/rimraf/-/rimraf-2.6.3.tgz#b2d104fe0d8fb27cf9e0a1cda8262dd3833c6cab" - integrity sha512-mwqeW5XsA2qAejG46gYdENaxXjx9onRNCfn7L0duuP4hCuTIi/QO7PDK07KJfp1d+izWPrzEJDcSqBa0OZQriA== +rimraf@^3.0.0: + version "3.0.0" + resolved "https://registry.npmjs.org/rimraf/-/rimraf-3.0.0.tgz#614176d4b3010b75e5c390eb0ee96f6dc0cebb9b" + integrity sha512-NDGVxTsjqfunkds7CqsOiEnxln4Bo7Nddl3XhS4pXg5OzwkLqJ971ZVAAnB+DDLnF76N+VnDEiBHaVV8I06SUg== dependencies: glob "^7.1.3" @@ -3984,13 +4004,6 @@ tar@^4: safe-buffer "^5.1.2" yallist "^3.0.3" -temp@^0.9.0: - version "0.9.0" - resolved "https://registry.npmjs.org/temp/-/temp-0.9.0.tgz#61391795a11bd9738d4c4d7f55f012cb8f55edaa" - integrity sha512-YfUhPQCJoNQE5N+FJQcdPz63O3x3sdT4Xju69Gj4iZe0lBKOtnAMi0SLj9xKhGkcGhsxThvTJ/usxtFPo438zQ== - dependencies: - rimraf "~2.6.2" - test-exclude@^5.2.3: version "5.2.3" resolved "https://registry.npmjs.org/test-exclude/-/test-exclude-5.2.3.tgz#c3d3e1e311eb7ee405e092dac10aefd09091eac0"