diff --git a/docs/configuration.rst b/docs/configuration.rst index c90d98637..4ee98fba9 100644 --- a/docs/configuration.rst +++ b/docs/configuration.rst @@ -65,6 +65,12 @@ REQUEST_VALIDATION they conform to the protocol. This may result in clients with poor standards compliance receiving errors rather than the expected results. +INITIAL_PEERS + When starting, you can set a list of initial peers to contact using a + simple text file. Add a URL per line for peers you would like to add to + the registry as initial peers. Each time the server starts, this set of + peers will be announced and added to the registry. + LANDING_MESSAGE_HTML The server provides a simple landing page at its root. By setting this value to point at a file containing an HTML block element it is possible to diff --git a/docs/datarepo.rst b/docs/datarepo.rst index e77bc3ad7..f04ad14b2 100644 --- a/docs/datarepo.rst +++ b/docs/datarepo.rst @@ -72,6 +72,46 @@ understand the structure of the repository that they are managing. $ ga4gh_repo list registry.db +------------------ +list-announcements +------------------ + +To view the announcements the server has received, use this command. It will +output the announcements as a TSV that can be imported into a spreadsheet +application. + +.. argparse:: + :module: ga4gh.server.cli.repomanager + :func: getRepoManagerParser + :prog: ga4gh_repo + :path: list-announcements + :nodefault: + +**Examples:** + +.. code-block:: bash + + $ ga4gh_repo list-announcements registry.db > announcements.tsv + +------------------- +clear-announcements +------------------- + +To clear the received announcements run this command. This cannot be undone. + +.. argparse:: + :module: ga4gh.server.cli.repomanager + :func: getRepoManagerParser + :prog: ga4gh_repo + :path: clear-announcements + :nodefault: + +**Examples:** + +.. code-block:: bash + + $ ga4gh_repo clear-announcements registry.db + ------ verify @@ -102,6 +142,44 @@ well in their repository. $ ga4gh_repo verify registry.db +-------- +add-peer +-------- + +The server maintains a list of known peers. To add a peer to this list use +the ``add-peer`` command. + +.. argparse:: + :module: ga4gh.server.cli.repomanager + :func: getRepoManagerParser + :prog: ga4gh_repo + :path: add-peer + :nodefault: + +**Examples:** + +.. code-block:: bash + + $ ga4gh_repo add-peer http://1kgenomes.ga4gh.org + +----------- +remove-peer +----------- + +You can remove a peer from the list of peers by its URL. + +.. argparse:: + :module: ga4gh.server.cli.repomanager + :func: getRepoManagerParser + :prog: ga4gh_repo + :path: remove-peer + :nodefault: + +**Examples:** + +.. code-block:: bash + + $ ga4gh_repo remove-peer http://1kgenomes.ga4gh.org ----------- add-dataset diff --git a/ga4gh/server/backend.py b/ga4gh/server/backend.py index 2760292d1..8fa9e0ac1 100644 --- a/ga4gh/server/backend.py +++ b/ga4gh/server/backend.py @@ -571,6 +571,15 @@ def expressionLevelsGenerator(self, request): request, rnaQuant) return iterator + def peersGenerator(self, request): + """ + Returns a generator over the (peer, nextPageToken) pairs + defined by the specified request. + """ + return paging.PeerIterator( + request, + self.getDataRepository()) + ########################################################### # # Public API methods. Each of these methods implements the @@ -676,6 +685,65 @@ def runGetCallSet(self, id_): callSet = variantSet.getCallSet(id_) return self.runGetRequest(callSet) + def runGetInfo(self, request): + """ + Returns information about the service including protocol version. + """ + return protocol.toJson(protocol.GetInfoResponse( + protocol_version=protocol.version)) + + def runAddAnnouncement(self, flaskrequest): + """ + Takes a flask request from the frontend and attempts to parse + into an AnnouncePeerRequest. If successful, it will log the + announcement to the `announcement` table with some other metadata + gathered from the request. + """ + announcement = {} + # We want to parse the request ourselves to collect a little more + # data about it. + try: + requestData = protocol.fromJson( + flaskrequest.get_data(), protocol.AnnouncePeerRequest) + announcement['hostname'] = flaskrequest.host_url + announcement['remote_addr'] = flaskrequest.remote_addr + announcement['user_agent'] = flaskrequest.headers.get('User-Agent') + except AttributeError: + # Sometimes in testing we will send protocol requests instead + # of flask requests and so the hostname and user agent won't + # be present. + try: + requestData = protocol.fromJson( + flaskrequest, protocol.AnnouncePeerRequest) + except Exception as e: + raise exceptions.InvalidJsonException(e) + except Exception as e: + raise exceptions.InvalidJsonException(e) + + # Validate the url before accepting the announcement + peer = datamodel.peers.Peer(requestData.peer.url) + peer.setAttributesJson(protocol.toJson( + requestData.peer.attributes)) + announcement['url'] = peer.getUrl() + announcement['attributes'] = peer.getAttributes() + try: + self.getDataRepository().insertAnnouncement(announcement) + except: + raise exceptions.BadRequestException(announcement['url']) + return protocol.toJson( + protocol.AnnouncePeerResponse(success=True)) + + def runListPeers(self, request): + """ + Takes a ListPeersRequest and returns a ListPeersResponse using + a page_token and page_size if provided. + """ + return self.runSearchRequest( + request, + protocol.ListPeersRequest, + protocol.ListPeersResponse, + self.peersGenerator) + def runGetVariant(self, id_): """ Returns a variant with the given id diff --git a/ga4gh/server/cli/repomanager.py b/ga4gh/server/cli/repomanager.py index 99eb8a009..5ec59d823 100644 --- a/ga4gh/server/cli/repomanager.py +++ b/ga4gh/server/cli/repomanager.py @@ -24,6 +24,7 @@ import ga4gh.server.datamodel.sequence_annotations as sequence_annotations import ga4gh.server.datamodel.continuous as continuous import ga4gh.server.datamodel.variants as variants +import ga4gh.server.datamodel.peers as peers import ga4gh.server.datarepo as datarepo import ga4gh.server.exceptions as exceptions import ga4gh.server.repo.rnaseq2ga as rnaseq2ga @@ -128,6 +129,20 @@ def list(self): # TODO this is _very_ crude. We need much more options and detail here. self._repo.printSummary() + def listAnnouncements(self): + """ + Lists all the announcements the repo has received. + """ + self._openRepo() + self._repo.printAnnouncements() + + def clearAnnouncements(self): + """ + Clears the list of announcements from the repo. + """ + self._openRepo() + self._repo.clearAnnouncements() + def verify(self): """ Checks that the data pointed to in the repository works and @@ -508,6 +523,33 @@ def func(): self._updateRepo(self._repo.removeIndividual, individual) self._confirmDelete("Individual", individual.getLocalId(), func) + def addPeer(self): + """ + Adds a new peer into this repo + """ + self._openRepo() + try: + peer = peers.Peer( + self._args.url, json.loads(self._args.attributes)) + except exceptions.BadUrlException: + raise exceptions.RepoManagerException("The URL for the peer was " + "malformed.") + except ValueError as e: + raise exceptions.RepoManagerException( + "The attributes message " + "was malformed. {}".format(e)) + self._updateRepo(self._repo.insertPeer, peer) + + def removePeer(self): + """ + Removes a peer by URL from this repo + """ + self._openRepo() + + def func(): + self._updateRepo(self._repo.removePeer, self._args.url) + self._confirmDelete("Peer", self._args.url, func) + def removeOntology(self): """ Removes an ontology from the repo. @@ -654,6 +696,12 @@ def addOntologyNameArgument(cls, subparser): "ontologyName", help="the name of the ontology") + @classmethod + def addUrlArgument(cls, subparser): + subparser.add_argument( + "url", + help="The URL of the given resource") + @classmethod def addReadGroupSetNameArgument(cls, subparser): subparser.add_argument( @@ -783,6 +831,33 @@ def getParser(cls): listParser.set_defaults(runner="list") cls.addRepoArgument(listParser) + listAnnouncementsParser = common_cli.addSubparser( + subparsers, "list-announcements", "List the announcements in" + "the repo.") + listAnnouncementsParser.set_defaults(runner="listAnnouncements") + cls.addRepoArgument(listAnnouncementsParser) + + clearAnnouncementsParser = common_cli.addSubparser( + subparsers, "clear-announcements", "List the announcements in" + "the repo.") + clearAnnouncementsParser.set_defaults(runner="clearAnnouncements") + cls.addRepoArgument(clearAnnouncementsParser) + + addPeerParser = common_cli.addSubparser( + subparsers, "add-peer", "Add a peer to the registry by URL.") + addPeerParser.set_defaults(runner="addPeer") + cls.addRepoArgument(addPeerParser) + cls.addUrlArgument(addPeerParser) + cls.addAttributesArgument(addPeerParser) + + removePeerParser = common_cli.addSubparser( + subparsers, "remove-peer", "Remove a peer from " + "the registry by URL.") + removePeerParser.set_defaults(runner="removePeer") + cls.addRepoArgument(removePeerParser) + cls.addUrlArgument(removePeerParser) + cls.addForceOption(removePeerParser) + addDatasetParser = common_cli.addSubparser( subparsers, "add-dataset", "Add a dataset to the data repo") addDatasetParser.set_defaults(runner="addDataset") diff --git a/ga4gh/server/datamodel/peers.py b/ga4gh/server/datamodel/peers.py new file mode 100644 index 000000000..7df19e0fe --- /dev/null +++ b/ga4gh/server/datamodel/peers.py @@ -0,0 +1,111 @@ +""" +Peer datamodel for exchanging data about GA4GH services. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import json +import re +import urlparse + +import ga4gh.server.exceptions as exceptions + +import ga4gh.schemas.protocol as protocol + + +def isUrl(urlString): + """ + Attempts to return whether a given URL string is valid by checking + for the presence of the URL scheme and netloc using the urlparse + module, and then using a regex. + + From http://stackoverflow.com/questions/7160737/ + """ + parsed = urlparse.urlparse(urlString) + urlparseValid = parsed.netloc != '' and parsed.scheme != '' + regex = re.compile( + r'^(?:http|ftp)s?://' # http:// or https:// + r'(?:(?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)' + r'+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)|' # domain... + r'localhost|' # localhost... + r'\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3})' # ...or ip + r'(?::\d+)?' # optional port + r'(?:/?|[/?]\S+)$', re.IGNORECASE) + + return regex.match(urlString) and urlparseValid + + +class Peer(object): + """ + This class represents an abstract Peer object. + It sets default values and getters, as well as the + toProtocolElement function. + """ + def __init__(self, url, attributes={}, record=None): + self._url = "" + self._attributes = {} + self.setUrl(url) \ + .setAttributes(attributes) + if record is not None: + self.populateFromRow(record) + + def setUrl(self, url): + """ + Attempt to safely set the URL by string. + """ + if isUrl(url): + self._url = url + else: + raise exceptions.BadUrlException(url) + return self + + def getUrl(self): + return self._url + + def setAttributes(self, attributes): + """ + Sets the attributes message to the provided value. + """ + self._attributes = attributes + return self + + def setAttributesJson(self, attributesJson): + """ + Sets the attributes dictionary from a JSON string. + """ + try: + self._attributes = json.loads(attributesJson) + except: + raise exceptions.InvalidJsonException(attributesJson) + return self + + def serializeAttributes(self, msg): + """ + Sets the attrbutes of a message during serialization. + """ + attributes = self.getAttributes() + for key in attributes: + protocol.setAttribute( + msg.attributes.attr[key].values, attributes[key]) + return msg + + def getAttributes(self): + """ + Returns the attributes for the DatamodelObject. + """ + return self._attributes + + def toProtocolElement(self): + peer = protocol.Peer() + peer.url = self._url + self.serializeAttributes(peer) + return peer + + def populateFromRow(self, peerRecord): + """ + This method accepts a model record and sets class variables. + """ + self.setUrl(peerRecord.url) \ + .setAttributesJson(peerRecord.attributes) + return self diff --git a/ga4gh/server/datarepo.py b/ga4gh/server/datarepo.py index c84a598d6..b049693d7 100644 --- a/ga4gh/server/datarepo.py +++ b/ga4gh/server/datarepo.py @@ -22,9 +22,9 @@ import ga4gh.server.datamodel.genotype_phenotype as genotype_phenotype import ga4gh.server.datamodel.genotype_phenotype_featureset as g2pFeatureset import ga4gh.server.datamodel.rna_quantification as rna_quantification +import ga4gh.server.datamodel.peers as peers import ga4gh.server.exceptions as exceptions - -import repo.models as m +import ga4gh.server.repo.models as models import ga4gh.schemas.protocol as protocol @@ -46,6 +46,7 @@ def __init__(self): self._ontologyNameMap = {} self._ontologyIdMap = {} self._ontologyIds = [] + self._peers = [] def addDataset(self, dataset): """ @@ -79,6 +80,37 @@ def getDatasets(self): """ return [self._datasetIdMap[id_] for id_ in self._datasetIds] + def insertPeer(self, peer): + """ + Adds a peer to the list of peers in the repository. Used only in + testing. + """ + self._peers.append(peer) + + def getPeer(self, url): + """ + Select the first peer in the datarepo with the given url simulating + the behavior of selecting by URL. This is only used during testing. + """ + peers = filter(lambda x: x.getUrl() == url, self.getPeers()) + if len(peers) == 0: + raise exceptions.PeerNotFoundException(url) + return peers[0] + + def getPeers(self, offset=0, limit=100): + """ + Returns the list of peers with an optional offset and limit + simulating the SQL registry for testing. + """ + return self._peers[offset:offset + limit] + + def insertAnnouncement(self, announcement): + """ + A placeholder function to simulate receiving an announcement used + in testing. It will throw an exception if the URL is invalid. + """ + peers.Peer(announcement.get('url')) + def getNumDatasets(self): """ Returns the number of datasets in this data repository. @@ -420,9 +452,14 @@ def __init__( numReadGroupSets=1, numReadGroupsPerReadGroupSet=1, numPhenotypeAssociations=2, numPhenotypeAssociationSets=1, - numAlignments=2, numRnaQuantSets=2, numExpressionLevels=2): + numAlignments=2, numRnaQuantSets=2, numExpressionLevels=2, + numPeers=200): super(SimulatedDataRepository, self).__init__() + for i in xrange(numPeers): + peer = peers.Peer("http://test{}.org".format(i)) + self.insertPeer(peer) + # References for i in range(numReferenceSets): localId = "referenceSet{}".format(i) @@ -484,8 +521,8 @@ def __init__(self, fileName): self._creationTimeStamp = None # Connection to the DB. self._dbConnection = None - self.database = m.SqliteDatabase(self._dbFilename, **{}) - m.databaseProxy.initialize(self.database) + self.database = models.SqliteDatabase(self._dbFilename, **{}) + models.databaseProxy.initialize(self.database) def _checkWriteMode(self): if self._openMode != MODE_WRITE: @@ -495,6 +532,77 @@ def _checkReadMode(self): if self._openMode != MODE_READ: raise ValueError("Repo must be opened in read mode") + def getPeer(self, url): + """ + Finds a peer by URL and return the first peer record with that URL. + """ + peers = list(models.Peer.select().where(models.Peer.url == url)) + if len(peers) == 0: + raise exceptions.PeerNotFoundException(url) + return peers[0] + + def getPeers(self, offset=0, limit=1000): + """ + Get the list of peers using an SQL offset and limit. Returns a list + of peer datamodel objects in a list. + """ + select = models.Peer.select().order_by( + models.Peer.url).limit(limit).offset(offset) + return [peers.Peer(p.url, record=p) for p in select] + + def tableToTsv(self, model): + """ + Takes a model class and attempts to create a table in TSV format + that can be imported into a spreadsheet program. + """ + first = True + for item in model.select(): + if first: + header = "".join( + ["{}\t".format(x) for x in model._meta.fields.keys()]) + print(header) + first = False + row = "".join( + ["{}\t".format( + getattr(item, key)) for key in model._meta.fields.keys()]) + print(row) + + def printAnnouncements(self): + """ + Prints the announcement table to the log in tsv format. + """ + self.tableToTsv(models.Announcement) + + def clearAnnouncements(self): + """ + Flushes the announcement table. + """ + try: + q = models.Announcement.delete().where( + models.Announcement.id > 0) + q.execute() + except Exception as e: + raise exceptions.RepoManagerException(e) + + def insertAnnouncement(self, announcement): + """ + Adds an announcement to the registry for later analysis. + """ + url = announcement.get('url', None) + try: + peers.Peer(url) + except: + raise exceptions.BadUrlException(url) + try: + # TODO get more details about the user agent + models.Announcement.create( + url=announcement.get('url'), + attributes=json.dumps(announcement.get('attributes', {})), + remote_addr=announcement.get('remote_addr', None), + user_agent=announcement.get('user_agent', None)) + except Exception as e: + raise exceptions.RepoManagerException(e) + def open(self, mode=MODE_READ): """ Opens this repo in the specified mode. @@ -634,10 +742,10 @@ def _safeConnect(self): raise exceptions.RepoInvalidDatabaseException(self._dbFilename) def _createSystemTable(self): - self.database.create_table(m.System) - m.System.create( + self.database.create_table(models.System) + models.System.create( key=self.systemKeySchemaVersion, value=self.version) - m.System.create( + models.System.create( key=self.systemKeyCreationTimeStamp, value=datetime.datetime.now()) def _readSystemTable(self): @@ -645,10 +753,10 @@ def _readSystemTable(self): raise exceptions.RepoNotFoundException( self._dbFilename) try: - self._schemaVersion = m.System.get( - m.System.key == self.systemKeySchemaVersion).value - self._creationTimeStamp = m.System.get( - m.System.key == self.systemKeyCreationTimeStamp).value + self._schemaVersion = models.System.get( + models.System.key == self.systemKeySchemaVersion).value + self._creationTimeStamp = models.System.get( + models.System.key == self.systemKeyCreationTimeStamp).value except Exception: raise exceptions.RepoInvalidDatabaseException(self._dbFilename) schemaVersion = self.SchemaVersion(self._schemaVersion) @@ -657,14 +765,14 @@ def _readSystemTable(self): schemaVersion, self.version) def _createOntologyTable(self): - self.database.create_table(m.Ontology) + self.database.create_table(models.Ontology) def insertOntology(self, ontology): """ Inserts the specified ontology into this repository. """ try: - m.Ontology.create( + models.Ontology.create( id=ontology.getName(), name=ontology.getName(), dataurl=ontology.getDataUrl(), @@ -676,7 +784,7 @@ def insertOntology(self, ontology): # for the rest of the container objects. def _readOntologyTable(self): - for ont in m.Ontology.select(): + for ont in models.Ontology.select(): ontology = ontologies.Ontology(ont.name) ontology.populateFromRow(ont) self.addOntology(ontology) @@ -685,17 +793,17 @@ def removeOntology(self, ontology): """ Removes the specified ontology term map from this repository. """ - q = m.Ontology.delete().where(id == ontology.getId()) + q = models.Ontology.delete().where(id == ontology.getId()) q.execute() def _createReferenceTable(self): - self.database.create_table(m.Reference) + self.database.create_table(models.Reference) def insertReference(self, reference): """ Inserts the specified reference into this repository. """ - m.Reference.create( + models.Reference.create( id=reference.getId(), referencesetid=reference.getParentContainer().getId(), name=reference.getLocalId(), @@ -707,7 +815,7 @@ def insertReference(self, reference): sourceuri=reference.getSourceUri()) def _readReferenceTable(self): - for referenceRecord in m.Reference.select(): + for referenceRecord in models.Reference.select(): referenceSet = self.getReferenceSet( referenceRecord.referencesetid.id) reference = references.HtslibReference( @@ -717,14 +825,14 @@ def _readReferenceTable(self): referenceSet.addReference(reference) def _createReferenceSetTable(self): - self.database.create_table(m.Referenceset) + self.database.create_table(models.Referenceset) def insertReferenceSet(self, referenceSet): """ Inserts the specified referenceSet into this repository. """ try: - m.Referenceset.create( + models.Referenceset.create( id=referenceSet.getId(), name=referenceSet.getLocalId(), description=referenceSet.getDescription(), @@ -743,7 +851,7 @@ def insertReferenceSet(self, referenceSet): referenceSet.getLocalId()) def _readReferenceSetTable(self): - for referenceSetRecord in m.Referenceset.select(): + for referenceSetRecord in models.Referenceset.select(): referenceSet = references.HtslibReferenceSet( referenceSetRecord.name) referenceSet.populateFromRow(referenceSetRecord) @@ -752,14 +860,14 @@ def _readReferenceSetTable(self): self.addReferenceSet(referenceSet) def _createDatasetTable(self): - self.database.create_table(m.Dataset) + self.database.create_table(models.Dataset) def insertDataset(self, dataset): """ Inserts the specified dataset into this repository. """ try: - m.Dataset.create( + models.Dataset.create( id=dataset.getId(), name=dataset.getLocalId(), description=dataset.getDescription(), @@ -773,36 +881,37 @@ def removeDataset(self, dataset): Removes the specified dataset from this repository. This performs a cascading removal of all items within this dataset. """ - for datasetRecord in m.Dataset.select().where( - m.Dataset.id == dataset.getId()): + for datasetRecord in models.Dataset.select().where( + models.Dataset.id == dataset.getId()): datasetRecord.delete_instance(recursive=True) def removePhenotypeAssociationSet(self, phenotypeAssociationSet): """ Remove a phenotype association set from the repo """ - q = m.Phenotypeassociationset.delete().where( - m.Phenotypeassociationset.id == phenotypeAssociationSet.getId()) + q = models.Phenotypeassociationset.delete().where( + models.Phenotypeassociationset.id == + phenotypeAssociationSet.getId()) q.execute() def removeFeatureSet(self, featureSet): """ Removes the specified featureSet from this repository. """ - q = m.Featureset.delete().where( - m.Featureset.id == featureSet.getId()) + q = models.Featureset.delete().where( + models.Featureset.id == featureSet.getId()) q.execute() def removeContinuousSet(self, continuousSet): """ Removes the specified continuousSet from this repository. """ - q = m.ContinuousSet.delete().where( - m.ContinuousSet.id == continuousSet.getId()) + q = models.ContinuousSet.delete().where( + models.ContinuousSet.id == continuousSet.getId()) q.execute() def _readDatasetTable(self): - for datasetRecord in m.Dataset.select(): + for datasetRecord in models.Dataset.select(): dataset = datasets.Dataset(datasetRecord.name) dataset.populateFromRow(datasetRecord) assert dataset.getId() == datasetRecord.id @@ -810,7 +919,7 @@ def _readDatasetTable(self): self.addDataset(dataset) def _createReadGroupTable(self): - self.database.create_table(m.Readgroup) + self.database.create_table(models.Readgroup) def insertReadGroup(self, readGroup): """ @@ -820,7 +929,7 @@ def insertReadGroup(self, readGroup): experimentJson = json.dumps( protocol.toJsonDict(readGroup.getExperiment())) try: - m.Readgroup.create( + models.Readgroup.create( id=readGroup.getId(), readgroupsetid=readGroup.getParentContainer().getId(), name=readGroup.getLocalId(), @@ -839,8 +948,8 @@ def removeReadGroupSet(self, readGroupSet): Removes the specified readGroupSet from this repository. This performs a cascading removal of all items within this readGroupSet. """ - for readGroupSetRecord in m.Readgroupset.select().where( - m.Readgroupset.id == readGroupSet.getId()): + for readGroupSetRecord in models.Readgroupset.select().where( + models.Readgroupset.id == readGroupSet.getId()): readGroupSetRecord.delete_instance(recursive=True) def removeVariantSet(self, variantSet): @@ -848,26 +957,28 @@ def removeVariantSet(self, variantSet): Removes the specified variantSet from this repository. This performs a cascading removal of all items within this variantSet. """ - for variantSetRecord in m.Variantset.select().where( - m.Variantset.id == variantSet.getId()): + for variantSetRecord in models.Variantset.select().where( + models.Variantset.id == variantSet.getId()): variantSetRecord.delete_instance(recursive=True) def removeBiosample(self, biosample): """ Removes the specified biosample from this repository. """ - q = m.Biosample.delete().where(m.Biosample.id == biosample.getId()) + q = models.Biosample.delete().where( + models.Biosample.id == biosample.getId()) q.execute() def removeIndividual(self, individual): """ Removes the specified individual from this repository. """ - q = m.Individual.delete().where(m.Individual.id == individual.getId()) + q = models.Individual.delete().where( + models.Individual.id == individual.getId()) q.execute() def _readReadGroupTable(self): - for readGroupRecord in m.Readgroup.select(): + for readGroupRecord in models.Readgroup.select(): readGroupSet = self.getReadGroupSet( readGroupRecord.readgroupsetid.id) readGroup = reads.HtslibReadGroup( @@ -879,7 +990,7 @@ def _readReadGroupTable(self): readGroupSet.addReadGroup(readGroup) def _createReadGroupSetTable(self): - self.database.create_table(m.Readgroupset) + self.database.create_table(models.Readgroupset) def insertReadGroupSet(self, readGroupSet): """ @@ -890,7 +1001,7 @@ def insertReadGroupSet(self, readGroupSet): readGroupSet.getPrograms()]) statsJson = json.dumps(protocol.toJsonDict(readGroupSet.getStats())) try: - m.Readgroupset.create( + models.Readgroupset.create( id=readGroupSet.getId(), datasetid=readGroupSet.getParentContainer().getId(), referencesetid=readGroupSet.getReferenceSet().getId(), @@ -914,11 +1025,11 @@ def removeReferenceSet(self, referenceSet): referenceSet can be removed. """ try: - q = m.Reference.delete().where( - m.Reference.referencesetid == referenceSet.getId()) + q = models.Reference.delete().where( + models.Reference.referencesetid == referenceSet.getId()) q.execute() - q = m.Referenceset.delete().where( - m.Referenceset.id == referenceSet.getId()) + q = models.Referenceset.delete().where( + models.Referenceset.id == referenceSet.getId()) q.execute() except Exception: msg = ("Unable to delete reference set. " @@ -928,7 +1039,7 @@ def removeReferenceSet(self, referenceSet): raise exceptions.RepoManagerException(msg) def _readReadGroupSetTable(self): - for readGroupSetRecord in m.Readgroupset.select(): + for readGroupSetRecord in models.Readgroupset.select(): dataset = self.getDataset(readGroupSetRecord.datasetid.id) readGroupSet = reads.HtslibReadGroupSet( dataset, readGroupSetRecord.name) @@ -941,7 +1052,7 @@ def _readReadGroupSetTable(self): dataset.addReadGroupSet(readGroupSet) def _createVariantAnnotationSetTable(self): - self.database.create_table(m.Variantannotationset) + self.database.create_table(models.Variantannotationset) def insertVariantAnnotationSet(self, variantAnnotationSet): """ @@ -950,7 +1061,7 @@ def insertVariantAnnotationSet(self, variantAnnotationSet): analysisJson = json.dumps( protocol.toJsonDict(variantAnnotationSet.getAnalysis())) try: - m.Variantannotationset.create( + models.Variantannotationset.create( id=variantAnnotationSet.getId(), variantsetid=variantAnnotationSet.getParentContainer().getId(), ontologyid=variantAnnotationSet.getOntology().getId(), @@ -964,7 +1075,7 @@ def insertVariantAnnotationSet(self, variantAnnotationSet): raise exceptions.RepoManagerException(e) def _readVariantAnnotationSetTable(self): - for annotationSetRecord in m.Variantannotationset.select(): + for annotationSetRecord in models.Variantannotationset.select(): variantSet = self.getVariantSet( annotationSetRecord.variantsetid.id) ontology = self.getOntology(annotationSetRecord.ontologyid.id) @@ -977,14 +1088,14 @@ def _readVariantAnnotationSetTable(self): variantSet.addVariantAnnotationSet(variantAnnotationSet) def _createCallSetTable(self): - self.database.create_table(m.Callset) + self.database.create_table(models.Callset) def insertCallSet(self, callSet): """ Inserts a the specified callSet into this repository. """ try: - m.Callset.create( + models.Callset.create( id=callSet.getId(), name=callSet.getLocalId(), variantsetid=callSet.getParentContainer().getId(), @@ -994,7 +1105,7 @@ def insertCallSet(self, callSet): raise exceptions.RepoManagerException(e) def _readCallSetTable(self): - for callSetRecord in m.Callset.select(): + for callSetRecord in models.Callset.select(): variantSet = self.getVariantSet(callSetRecord.variantsetid.id) callSet = variants.CallSet(variantSet, callSetRecord.name) callSet.populateFromRow(callSetRecord) @@ -1003,7 +1114,7 @@ def _readCallSetTable(self): variantSet.addCallSet(callSet) def _createVariantSetTable(self): - self.database.create_table(m.Variantset) + self.database.create_table(models.Variantset) def insertVariantSet(self, variantSet): """ @@ -1017,7 +1128,7 @@ def insertVariantSet(self, variantSet): variantSet.getMetadata()]) urlMapJson = json.dumps(variantSet.getReferenceToDataUrlIndexMap()) try: - m.Variantset.create( + models.Variantset.create( id=variantSet.getId(), datasetid=variantSet.getParentContainer().getId(), referencesetid=variantSet.getReferenceSet().getId(), @@ -1033,7 +1144,7 @@ def insertVariantSet(self, variantSet): self.insertCallSet(callSet) def _readVariantSetTable(self): - for variantSetRecord in m.Variantset.select(): + for variantSetRecord in models.Variantset.select(): dataset = self.getDataset(variantSetRecord.datasetid.id) referenceSet = self.getReferenceSet( variantSetRecord.referencesetid.id) @@ -1046,7 +1157,7 @@ def _readVariantSetTable(self): dataset.addVariantSet(variantSet) def _createFeatureSetTable(self): - self.database.create_table(m.Featureset) + self.database.create_table(models.Featureset) def insertFeatureSet(self, featureSet): """ @@ -1054,7 +1165,7 @@ def insertFeatureSet(self, featureSet): """ # TODO add support for info and sourceUri fields. try: - m.Featureset.create( + models.Featureset.create( id=featureSet.getId(), datasetid=featureSet.getParentContainer().getId(), referencesetid=featureSet.getReferenceSet().getId(), @@ -1066,7 +1177,7 @@ def insertFeatureSet(self, featureSet): raise exceptions.RepoManagerException(e) def _readFeatureSetTable(self): - for featureSetRecord in m.Featureset.select(): + for featureSetRecord in models.Featureset.select(): dataset = self.getDataset(featureSetRecord.datasetid.id) # FIXME this should be handled elsewhere if 'cgd' in featureSetRecord.name: @@ -1087,7 +1198,7 @@ def _readFeatureSetTable(self): dataset.addFeatureSet(featureSet) def _createContinuousSetTable(self): - self.database.create_table(m.ContinuousSet) + self.database.create_table(models.ContinuousSet) def insertContinuousSet(self, continuousSet): """ @@ -1095,7 +1206,7 @@ def insertContinuousSet(self, continuousSet): """ # TODO add support for info and sourceUri fields. try: - m.ContinuousSet.create( + models.ContinuousSet.create( id=continuousSet.getId(), datasetid=continuousSet.getParentContainer().getId(), referencesetid=continuousSet.getReferenceSet().getId(), @@ -1106,7 +1217,7 @@ def insertContinuousSet(self, continuousSet): raise exceptions.RepoManagerException(e) def _readContinuousSetTable(self): - for continuousSetRecord in m.ContinuousSet.select(): + for continuousSetRecord in models.ContinuousSet.select(): dataset = self.getDataset(continuousSetRecord.datasetid.id) continuousSet = continuous.FileContinuousSet( dataset, continuousSetRecord.name) @@ -1118,14 +1229,14 @@ def _readContinuousSetTable(self): dataset.addContinuousSet(continuousSet) def _createBiosampleTable(self): - self.database.create_table(m.Biosample) + self.database.create_table(models.Biosample) def insertBiosample(self, biosample): """ Inserts the specified Biosample into this repository. """ try: - m.Biosample.create( + models.Biosample.create( id=biosample.getId(), datasetid=biosample.getParentContainer().getId(), name=biosample.getLocalId(), @@ -1141,7 +1252,7 @@ def insertBiosample(self, biosample): biosample.getParentContainer().getLocalId()) def _readBiosampleTable(self): - for biosampleRecord in m.Biosample.select(): + for biosampleRecord in models.Biosample.select(): dataset = self.getDataset(biosampleRecord.datasetid.id) biosample = biodata.Biosample( dataset, biosampleRecord.name) @@ -1150,15 +1261,14 @@ def _readBiosampleTable(self): dataset.addBiosample(biosample) def _createIndividualTable(self): - self.database.create_table(m.Individual) + self.database.create_table(models.Individual) def insertIndividual(self, individual): """ Inserts the specified individual into this repository. """ - # TODO add support for info and sourceUri fields. try: - m.Individual.create( + models.Individual.create( id=individual.getId(), datasetId=individual.getParentContainer().getId(), name=individual.getLocalId(), @@ -1174,7 +1284,7 @@ def insertIndividual(self, individual): individual.getParentContainer().getLocalId()) def _readIndividualTable(self): - for individualRecord in m.Individual.select(): + for individualRecord in models.Individual.select(): dataset = self.getDataset(individualRecord.datasetid.id) individual = biodata.Individual( dataset, individualRecord.name) @@ -1183,20 +1293,19 @@ def _readIndividualTable(self): dataset.addIndividual(individual) def _createPhenotypeAssociationSetTable(self): - self.database.create_table(m.Phenotypeassociationset) + self.database.create_table(models.Phenotypeassociationset) def _createRnaQuantificationSetTable(self): - self.database.create_table(m.Rnaquantificationset) + self.database.create_table(models.Rnaquantificationset) def insertPhenotypeAssociationSet(self, phenotypeAssociationSet): """ Inserts the specified phenotype annotation set into this repository. """ - # TODO add support for info and sourceUri fields. datasetId = phenotypeAssociationSet.getParentContainer().getId() attributes = json.dumps(phenotypeAssociationSet.getAttributes()) try: - m.Phenotypeassociationset.create( + models.Phenotypeassociationset.create( id=phenotypeAssociationSet.getId(), name=phenotypeAssociationSet.getLocalId(), datasetid=datasetId, @@ -1207,7 +1316,7 @@ def insertPhenotypeAssociationSet(self, phenotypeAssociationSet): phenotypeAssociationSet.getParentContainer().getId()) def _readPhenotypeAssociationSetTable(self): - for associationSetRecord in m.Phenotypeassociationset.select(): + for associationSetRecord in models.Phenotypeassociationset.select(): dataset = self.getDataset(associationSetRecord.datasetid.id) phenotypeAssociationSet = \ genotype_phenotype.RdfPhenotypeAssociationSet( @@ -1221,7 +1330,7 @@ def insertRnaQuantificationSet(self, rnaQuantificationSet): Inserts a the specified rnaQuantificationSet into this repository. """ try: - m.Rnaquantificationset.create( + models.Rnaquantificationset.create( id=rnaQuantificationSet.getId(), datasetid=rnaQuantificationSet.getParentContainer().getId(), referencesetid=rnaQuantificationSet.getReferenceSet().getId(), @@ -1234,7 +1343,7 @@ def insertRnaQuantificationSet(self, rnaQuantificationSet): rnaQuantificationSet.getParentContainer().getLocalId()) def _readRnaQuantificationSetTable(self): - for quantificationSetRecord in m.Rnaquantificationset.select(): + for quantificationSetRecord in models.Rnaquantificationset.select(): dataset = self.getDataset(quantificationSetRecord.datasetid.id) referenceSet = self.getReferenceSet( quantificationSetRecord.referencesetid.id) @@ -1252,17 +1361,42 @@ def removeRnaQuantificationSet(self, rnaQuantificationSet): performs a cascading removal of all items within this rnaQuantificationSet. """ - q = m.Rnaquantificationset.delete().where( - m.Rnaquantificationset.id == rnaQuantificationSet.getId()) + q = models.Rnaquantificationset.delete().where( + models.Rnaquantificationset.id == rnaQuantificationSet.getId()) q.execute() + def insertPeer(self, peer): + """ + Accepts a peer datamodel object and adds it to the registry. + """ + try: + models.Peer.create( + url=peer.getUrl(), + attributes=json.dumps(peer.getAttributes())) + except Exception as e: + raise exceptions.RepoManagerException(e) + + def removePeer(self, url): + """ + Remove peers by URL. + """ + q = models.Peer.delete().where( + models.Peer.url == url) + q.execute() + + def _createNetworkTables(self): + """""" + self.database.create_table(models.Peer) + self.database.create_table(models.Announcement) + def initialise(self): """ - Initialise this data repostitory, creating any necessary directories + Initialise this data repository, creating any necessary directories and file paths. """ self._checkWriteMode() self._createSystemTable() + self._createNetworkTables() self._createOntologyTable() self._createReferenceSetTable() self._createReferenceTable() diff --git a/ga4gh/server/exceptions.py b/ga4gh/server/exceptions.py index bcd610c06..9704e383b 100644 --- a/ga4gh/server/exceptions.py +++ b/ga4gh/server/exceptions.py @@ -192,6 +192,12 @@ class DatamodelValidationException(BadRequestException): """ +class BadUrlException(RuntimeException): + def __init__(self, url): + self.message = "The URL: '{}' was malformed".format(url) + self.httpStatus = 400 + + class ReadGroupSetNotMappedToReferenceSetException(BadRequestException): def __init__(self, readGroupSetId): @@ -209,6 +215,12 @@ class NotFoundException(RuntimeException): message = "A resource was not found" +class PeerNotFoundException(NotFoundException): + def __init__(self, url): + self.message = "A peer with url '{}' was not found".format( + url) + + class PathNotFoundException(NotFoundException): message = "The request path was not found" diff --git a/ga4gh/server/frontend.py b/ga4gh/server/frontend.py index 737af5922..6de8c6fd1 100644 --- a/ga4gh/server/frontend.py +++ b/ga4gh/server/frontend.py @@ -31,10 +31,10 @@ import ga4gh.server.exceptions as exceptions import ga4gh.server.datarepo as datarepo import ga4gh.server.auth as auth +import ga4gh.server.network as network import ga4gh.schemas.protocol as protocol - MIMETYPE = "application/json" SEARCH_ENDPOINT_METHODS = ['POST', 'OPTIONS'] SECRET_KEY_LENGTH = 24 @@ -305,6 +305,11 @@ def configure(configFile=None, baseConfig="ProductionConfig", app.cache_dir = '/tmp/ga4gh' app.cache = FileSystemCache( app.cache_dir, threshold=5000, default_timeout=600, mode=384) + # Peer service initialization + network.initialize( + app.config.get('INITIAL_PEERS'), + app.backend.getDataRepository(), + app.logger) app.oidcClient = None app.myPort = port if app.config.get('AUTH0_ENABLED'): @@ -611,6 +616,13 @@ def robots(): app.static_folder, flask.request.path[1:]) +@DisplayedRoute('/info') +@requires_auth +def getInfo(): + return handleFlaskGetRequest( + None, flask.request, app.backend.runGetInfo) + + @DisplayedRoute('/references/') @requires_auth def getReference(id): @@ -733,6 +745,21 @@ def searchIndividuals(): flask.request, app.backend.runSearchIndividuals) +@DisplayedRoute('/peers/list', postMethod=True) +@requires_auth +def listPeers(): + return handleFlaskPostRequest( + flask.request, app.backend.runListPeers) + + +@DisplayedRoute('/announce', postMethod=True) +@requires_auth +def announce(): + # We can't use the post handler here because we want detailed request + # data. + return app.backend.runAddAnnouncement(flask.request) + + @DisplayedRoute( '/biosamples/', pathDisplay='/biosamples/') diff --git a/ga4gh/server/network/__init__.py b/ga4gh/server/network/__init__.py new file mode 100644 index 000000000..5359e2fe3 --- /dev/null +++ b/ga4gh/server/network/__init__.py @@ -0,0 +1,72 @@ +""" +Provides methods to initialize the server's peer to peer connections. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import os +import requests + +import ga4gh.server.datamodel as datamodel +import ga4gh.server.exceptions as exceptions + + +def getInitialPeerList(filePath, logger=None): + """ + Attempts to get a list of peers from a file specified in configuration. + + This file has one URL per line and can contain newlines and comments. + + # Main ga4gh node + http://1kgenomes.ga4gh.org + # Local intranet peer + https://192.168.1.1 + + The server will attempt to add URLs in this file to its registry at + startup and will log a warning if the file isn't found. + """ + ret = [] + with open(filePath) as textFile: + ret = textFile.readlines() + if len(ret) == 0: + if logger: + logger.warn("Couldn't load the initial " + "peer list. Try adding a " + "file named 'initial_peers.txt' " + "to {}".format(os.getcwd())) + # Remove lines that start with a hash or are empty. + return filter(lambda x: x != "" and not x.find("#") != -1, ret) + + +def insertInitialPeer(dataRepository, url, logger=None): + """ + Takes the datarepository, a url, and an optional logger and attempts + to add the peer into the repository. + """ + insertPeer = dataRepository.insertPeer + try: + peer = datamodel.peers.Peer(url) + insertPeer(peer) + except exceptions.RepoManagerException as exc: + if logger: + logger.debug( + "Peer already in registry {} {}".format(peer.getUrl(), exc)) + except exceptions.BadUrlException as exc: + if logger: + logger.debug("A URL in the initial " + "peer list {} was malformed. {}".format(url), exc) + + +def initialize(filePath, dataRepository, logger=None): + for initialPeer in getInitialPeerList(filePath): + try: + headers = {'content-type': 'application/json'} + data = {"peer": {"url": initialPeer}} + url = "{}/announce".format(initialPeer.rstrip("/")) + requests.post(url, headers=headers, json=data) + except Exception as e: + if logger: + logger.info("Couldn't announce to initial peer {}".format( + (e, url))) + insertInitialPeer(dataRepository, initialPeer, logger) diff --git a/ga4gh/server/paging.py b/ga4gh/server/paging.py index e4d17111b..483370bba 100644 --- a/ga4gh/server/paging.py +++ b/ga4gh/server/paging.py @@ -448,3 +448,28 @@ def _search(self): def _prepare(self, obj): return obj + + +class PeerIterator(SequenceIterator): + """ + Iterates through peers + """ + def __init__(self, request, dataRepo): + self._dataRepo = dataRepo + super(PeerIterator, self).__init__(request) + + def _initialize(self): + if self._request.page_token != '': + self._startIndex = int(self._request.page_token) + else: + self._startIndex = 0 + self._maxResults = self._request.page_size + + def _search(self): + iterator = self._dataRepo.getPeers( + offset=int(self._startIndex), + limit=self._maxResults) + return iterator + + def _prepare(self, obj): + return obj.toProtocolElement() diff --git a/ga4gh/server/repo/models.py b/ga4gh/server/repo/models.py index fa7fb9dfe..1892c93de 100644 --- a/ga4gh/server/repo/models.py +++ b/ga4gh/server/repo/models.py @@ -16,6 +16,7 @@ from __future__ import print_function from __future__ import unicode_literals +import datetime import peewee as pw # The databaseProxy is used to dynamically changed the @@ -40,6 +41,27 @@ class Meta: database = databaseProxy +class Peer(BaseModel): + url = pw.TextField(primary_key=True, null=False, unique=True) + + +class Announcement(BaseModel): + # Provides the storage layer for AnnouncePeerRequest + # Primary key for the record autoincrement + id = pw.PrimaryKeyField() + # URL submitted as a possible peer + url = pw.TextField(null=False) + # Other information about the request stored as JSON. + remote_addr = pw.TextField(null=True) + user_agent = pw.TextField(null=True) + # The time at which this record was created. + created = pw.DateTimeField() + + def save(self, *args, **kwargs): + self.created = datetime.datetime.now() + return super(Announcement, self).save(*args, **kwargs) + + class Dataset(BaseModel): description = pw.TextField(null=True) id = pw.TextField(primary_key=True) diff --git a/ga4gh/server/serverconfig.py b/ga4gh/server/serverconfig.py index 62ce99fad..38038232b 100644 --- a/ga4gh/server/serverconfig.py +++ b/ga4gh/server/serverconfig.py @@ -38,6 +38,7 @@ class BaseConfig(object): FILE_HANDLE_CACHE_MAX_SIZE = 50 LANDING_MESSAGE_HTML = "landing_message.html" + INITIAL_PEERS = "ga4gh/server/templates/initial_peers.txt" class ComplianceConfig(BaseConfig): diff --git a/ga4gh/server/templates/initial_peers.txt b/ga4gh/server/templates/initial_peers.txt new file mode 100644 index 000000000..ae1974894 --- /dev/null +++ b/ga4gh/server/templates/initial_peers.txt @@ -0,0 +1,10 @@ +# Put a list of initial peers you would like to add here. +# You can also manage the peer list via the registry. +# +# ga4gh_repo add-peer registry.db http://1kgenomes.ga4gh.org +# +# Every time you start the server it will attempt to +# read this file to bootstrap the p2p network and will announce +# to these peers. +# +http://1kgenomes.ga4gh.org \ No newline at end of file diff --git a/scripts/build_test_data.py b/scripts/build_test_data.py index 9498aa4c5..f057be861 100644 --- a/scripts/build_test_data.py +++ b/scripts/build_test_data.py @@ -34,6 +34,8 @@ def buildTestData( useRelativePath = '-r' if relativePaths else '' run("init", "-f", repoFile) + run("add-peer", repoFile, useRelativePath, "http://example.ga4gh.org") + pattern = os.path.join(prefix, "referenceSets", "*.fa.gz") for dataFile in glob.glob(pattern): run("add-referenceset", repoFile, useRelativePath, dataFile, diff --git a/tests/end_to_end/test_client_json.py b/tests/end_to_end/test_client_json.py index 837588888..e4aa453fb 100644 --- a/tests/end_to_end/test_client_json.py +++ b/tests/end_to_end/test_client_json.py @@ -567,3 +567,19 @@ def testGetRnaQuantificationSet(self): [rnaQuantificationSet], "rnaquantificationsets-get", rnaQuantificationSet.id) + + def testListPeers(self): + iterator = self._client.list_peers() + cliString = "list-peers" + self.verifyParsedOutputsEqual(iterator, cliString) + + def testInfo(self): + info = self._client.get_info() + cliString = "get-info" + self.verifyParsedOutputsEqual([info], cliString) + + def testAnnounce(self): + url = "http://1kgenomes.ga4gh.org" + response = self._client.announce(url) + cliString = "announce" + self.verifyParsedOutputsEqual([response], cliString, url) diff --git a/tests/end_to_end/test_gestalt.py b/tests/end_to_end/test_gestalt.py index c3076a857..735a39c28 100644 --- a/tests/end_to_end/test_gestalt.py +++ b/tests/end_to_end/test_gestalt.py @@ -25,6 +25,7 @@ class TestGestalt(server_test.ServerTest): def testEndToEnd(self): # extract ids from a simulated data repo with the same config repo = datarepo.SimulatedDataRepository() + peer = repo.getPeers()[0] dataset = repo.getDatasets()[0] datasetId = dataset.getId() variantSet = dataset.getVariantSets()[0] @@ -37,6 +38,7 @@ def testEndToEnd(self): variantAnnotationSetId = \ variantSet.getVariantAnnotationSets()[0].getId() + self.simulatedPeerUrl = peer.getUrl() self.simulatedDatasetId = datasetId self.simulatedVariantSetId = variantSetId self.simulatedReadGroupId = readGroupId @@ -46,6 +48,7 @@ def testEndToEnd(self): self.client = client.ClientForTesting(self.server.getUrl()) self.runVariantsRequest() self.assertLogsWritten() + self.runPeersRequests() self.runReadsRequest() self.runReferencesRequest() self.runVariantSetsRequestDatasetTwo() @@ -139,3 +142,14 @@ def runVariantSetsRequestDatasetTwo(self): cmd = "variantsets-search" args = "--datasetId {}".format(self.simulatedDatasetId) self.runClientCmd(self.client, cmd, args) + + def runPeersRequests(self): + cmd = "list-peers" + self.runClientCmd(self.client, cmd) + + cmd = "get-info" + self.runClientCmd(self.client, cmd) + + cmd = "announce" + args = "http://1kgenomes.ga4gh.org" + self.runClientCmd(self.client, cmd, args) diff --git a/tests/end_to_end/test_repo_manager.py b/tests/end_to_end/test_repo_manager.py index 622c5df20..8e08288b9 100644 --- a/tests/end_to_end/test_repo_manager.py +++ b/tests/end_to_end/test_repo_manager.py @@ -48,6 +48,9 @@ def _runCmd(self, cmd, *args): def testEndToEnd(self): self._runCmd("init") + self._runCmd("list-announcements") + self._runCmd("clear-announcements") + self._runCmd("add-peer", paths.peerUrl) self._runCmd("add-ontology", paths.ontologyPath) self._runCmd( "add-referenceset", paths.faPath, @@ -94,7 +97,7 @@ def testEndToEnd(self): self._runCmd("verify") self._runCmd("list") - + self._runCmd("remove-peer", paths.peerUrl, "-f") self._runCmd( "remove-phenotypeassociationset", self.datasetName, phenotypeAssociationSetName, "-f") diff --git a/tests/paths.py b/tests/paths.py index 742d30892..8e5ac383b 100644 --- a/tests/paths.py +++ b/tests/paths.py @@ -85,3 +85,5 @@ def getGa4ghFilePath(): # misc. landingMessageHtml = os.path.join(testDataDir, "test.html") + +peerUrl = "http://1kgenomes.ga4gh.org" diff --git a/tests/unit/test_datarepo.py b/tests/unit/test_datarepo.py index 54052a550..6c6411b79 100644 --- a/tests/unit/test_datarepo.py +++ b/tests/unit/test_datarepo.py @@ -88,7 +88,7 @@ def testDirectory(self): with self.assertRaises(exceptions.RepoInvalidDatabaseException): repo.open(datarepo.MODE_READ) - def testNonexistantFile(self): + def testNonexistentFile(self): repo = datarepo.SqlDataRepository("aFilePathThatDoesNotExist") with self.assertRaises(exceptions.RepoNotFoundException): repo.open(datarepo.MODE_READ) diff --git a/tests/unit/test_imports.py b/tests/unit/test_imports.py index faf7ade0c..198533444 100644 --- a/tests/unit/test_imports.py +++ b/tests/unit/test_imports.py @@ -189,6 +189,7 @@ class ImportGraphLayerChecker(object): 'ga4gh/server/datamodel/continuous.py', 'ga4gh/server/datamodel/genotype_phenotype.py', 'ga4gh/server/datamodel/genotype_phenotype_featureset.py', + 'ga4gh/server/datamodel/peers.py', 'ga4gh/server/gff3.py', 'ga4gh/server/sqlite_backend.py', ], diff --git a/tests/unit/test_peers.py b/tests/unit/test_peers.py new file mode 100644 index 000000000..318517d20 --- /dev/null +++ b/tests/unit/test_peers.py @@ -0,0 +1,49 @@ +""" +Unit tests for the peer data model. +""" +from __future__ import division +from __future__ import print_function +from __future__ import unicode_literals + +import unittest + +from ga4gh.server.datamodel import peers +from ga4gh.server import exceptions + + +class TestPeers(unittest.TestCase): + """ + Unit tests for the peer datamodel. + """ + def testBadUrlException(self): + peers.Peer("https://1kgenomes.ga4gh.org") + with (self.assertRaises(exceptions.BadUrlException)): + peers.Peer("ht9://1kgenomes.ga4gh.org") + with (self.assertRaises(exceptions.BadUrlException)): + peers.Peer("http://1kgen") + with (self.assertRaises(exceptions.BadUrlException)): + peers.Peer("http://.org") + + def testToProtocolElement(self): + url = "http://1kgenomes.ga4gh.org" + key = "testkey" + value = "testvalue" + attributes = {key: value} + peer = peers.Peer(url, attributes) + protocolElement = peer.toProtocolElement() + self.assertEqual(url, protocolElement.url) + self.assertEqual( + attributes[key], + protocolElement.attributes.attr[key].values[0].string_value) + + def testAttributesJson(self): + url = "http://1kgenomes.ga4gh.org" + with (self.assertRaises(exceptions.InvalidJsonException)): + peer = peers.Peer(url) + peer.setAttributesJson('a{"bad": "json"}') + with (self.assertRaises(exceptions.InvalidJsonException)): + peer = peers.Peer(url) + peer.setAttributesJson('{"bad"; "json"}') + with (self.assertRaises(exceptions.InvalidJsonException)): + peer = peers.Peer(url) + peer.setAttributesJson('{"bad": json}') diff --git a/tests/unit/test_repo_manager.py b/tests/unit/test_repo_manager.py index d9673dc63..d721dded7 100644 --- a/tests/unit/test_repo_manager.py +++ b/tests/unit/test_repo_manager.py @@ -77,6 +77,7 @@ def readRepo(self): return repo def init(self): + self._peerUrl = paths.peerUrl self.runCommand("init {}".format(self._repoPath)) def addOntology(self): @@ -84,6 +85,10 @@ def addOntology(self): cmd = "add-ontology {} {}".format(self._repoPath, paths.ontologyPath) self.runCommand(cmd) + def addPeer(self): + cmd = "add-peer {} {}".format(self._repoPath, paths.peerUrl) + self.runCommand(cmd) + def addDataset(self, datasetName=None): if datasetName is None: datasetName = "test_dataset" @@ -629,6 +634,7 @@ def setUp(self): def testVerify(self): self.init() + self.addPeer() self.addDataset() self.addOntology() self.addReferenceSet() @@ -641,6 +647,39 @@ def testVerify(self): self.runCommand(cmd) +class TestAddPeer(AbstractRepoManagerTest): + + def setUp(self): + super(TestAddPeer, self).setUp() + self.init() + + def testDefaults(self): + self.runCommand("add-peer {} {}".format( + self._repoPath, self._peerUrl)) + repo = self.readRepo() + peer = repo.getPeer(self._peerUrl) + self.assertEqual(peer.url, self._peerUrl) + + +class TestRemovePeer(AbstractRepoManagerTest): + + def setUp(self): + super(TestRemovePeer, self).setUp() + self.init() + self.addPeer() + + def assertPeerRemoved(self): + repo = self.readRepo() + self.assertRaises( + exceptions.PeerNotFoundException, + repo.getPeer, self._peerUrl) + + def testDefaults(self): + self.runCommand("remove-peer {} {} -f".format( + self._repoPath, self._peerUrl)) + self.assertPeerRemoved() + + class TestRemoveOntology(AbstractRepoManagerTest): def setUp(self): diff --git a/tests/unit/test_simulated_stack.py b/tests/unit/test_simulated_stack.py index 03e604398..cc5bea14c 100644 --- a/tests/unit/test_simulated_stack.py +++ b/tests/unit/test_simulated_stack.py @@ -1240,6 +1240,39 @@ def testSearchGenotypePhenotypes(self): for clientAssoc in responseData.associations: self.assertEqual(clientAssoc, repoAssoc) + def testPeersList(self): + path = "/peers/list" + peers = list(self.dataRepo.getPeers(0, 10)) + self.assertGreater(len(peers), 0) + request = protocol.ListPeersRequest() + request.page_size = 10 + responseData = self.sendSearchRequest( + path, request, protocol.ListPeersResponse) + urls = map(lambda p: p.url, responseData.peers) + for peer in responseData.peers: + self.assertIn(peer.url, urls) + repoPeer = self.dataRepo.getPeer(peer.url) + self.assertEqual(repoPeer.getUrl(), peer.url) + + def testAnnounce(self): + path = "/announce" + request = protocol.AnnouncePeerRequest() + request.peer.url = "http://1kgenomes.ga4gh.org" + responseData = self.sendSearchRequest( + path, request, protocol.AnnouncePeerResponse) + self.assertTrue(responseData.success, "A well formed URL should post") + request.peer.url = "Not a URL" + response = self.sendJsonPostRequest(path, protocol.toJson(request)) + self.assertEqual(response.status_code, 400) + + def testInfo(self): + path = "/info" + response = self.app.get(path) + responseData = protocol.fromJson(response.data, + protocol.GetInfoResponse) + self.assertIsNotNone(responseData) + self.assertEqual(responseData.protocol_version, protocol.version) + # TODO def testSearchGenotypePhenotypes(self): # TODO def testGetExpressionLevel(self):