From f831ebb3524112c46daef64a8005a7034bf16a9f Mon Sep 17 00:00:00 2001 From: Sean Arms Date: Sat, 18 Mar 2017 15:16:28 -0600 Subject: [PATCH] Process dataset elements with access children Fix Unidata/siphon#114 Fix Unidata/siphon#115 --- siphon/catalog.py | 84 +- siphon/tests/fixtures/cat_access_elements | 1101 +++++++++++++++++++++ siphon/tests/test_catalog.py | 8 + 3 files changed, 1173 insertions(+), 20 deletions(-) create mode 100644 siphon/tests/fixtures/cat_access_elements diff --git a/siphon/catalog.py b/siphon/catalog.py index 514e51d56..848db2bbb 100644 --- a/siphon/catalog.py +++ b/siphon/catalog.py @@ -81,12 +81,28 @@ def __init__(self, catalog_url): self.services = [] self.catalog_refs = OrderedDict() self.metadata = {} + self.ds_with_access_elements_to_process = [] service_skip_count = 0 service_skip = 0 + current_dataset = None + previous_dataset = None for child in root.iter(): tag_type = child.tag.split('}')[-1] if tag_type == 'dataset': + current_dataset = child.attrib['name'] self._process_dataset(child) + + if previous_dataset: + # see if the previously processed dataset has access elements as children + # if so, these datasets need to be processed specially when making + # access_urls + if self.datasets[previous_dataset].access_element_info: + self.ds_with_access_elements_to_process.append(previous_dataset) + + previous_dataset = current_dataset + + elif tag_type == 'access': + self.datasets[current_dataset].add_access_element_info(child) elif tag_type == 'catalogRef': self._process_catalog_ref(child) elif (tag_type == 'metadata') or (tag_type == ''): @@ -110,12 +126,13 @@ def __init__(self, catalog_url): self._process_datasets() def _process_dataset(self, element): + catalog_url = '' if 'urlPath' in element.attrib: if element.attrib['urlPath'] == 'latest.xml': - ds = Dataset(element, self.catalog_url) - else: - ds = Dataset(element) - self.datasets[ds.name] = ds + catalog_url = self.catalog_url + + ds = Dataset(element, catalog_url=catalog_url) + self.datasets[ds.name] = ds def _process_catalog_ref(self, element): catalog_ref = CatalogRef(self.catalog_url, element) @@ -128,8 +145,16 @@ def _process_metadata(self, element, tag_type): def _process_datasets(self): for dsName in list(self.datasets.keys()): - self.datasets[dsName].make_access_urls( - self.base_tds_url, self.services, metadata=self.metadata) + # check to see if dataset needs to have access urls created, if not, + # remove the dataset + has_url_path = self.datasets[dsName].url_path is not None + is_ds_with_access_elements_to_process = \ + dsName in self.ds_with_access_elements_to_process + if has_url_path or is_ds_with_access_elements_to_process: + self.datasets[dsName].make_access_urls( + self.base_tds_url, self.services, metadata=self.metadata) + else: + self.datasets.pop(dsName) class CatalogRef(object): @@ -204,8 +229,12 @@ def __init__(self, element_node, catalog_url=''): """ self.name = element_node.attrib['name'] - self.url_path = element_node.attrib['urlPath'] + if ('urlPath' in element_node.attrib): + self.url_path = element_node.attrib['urlPath'] + else: + self.url_path = None self.catalog_name = '' + self.access_element_info = {} self._resolved = False self._resolverUrl = None # if latest.xml, resolve the latest url @@ -267,6 +296,8 @@ def make_access_urls(self, catalog_url, all_services, metadata=None): metadata : TDSCatalogMetadata Metadata from the :class:`TDSCatalog` """ + + all_service_dict = {service.name: service for service in all_services} service_name = None if metadata: if 'serviceName' in metadata: @@ -275,26 +306,39 @@ def make_access_urls(self, catalog_url, all_services, metadata=None): access_urls = {} server_url = _find_base_tds_url(catalog_url) - found_service = None - if service_name: - for service in all_services: - if service.name == service_name: - found_service = service - break - - service = found_service - if service: + # process access urls for datasets that reference top + # level catalog services (individual or compound service + # types). + if service_name in all_service_dict: + service = all_service_dict[service_name] if service.service_type != 'Resolver': + # if service is a CompoundService, create access url + # for each SimpleService if isinstance(service, CompoundService): for subservice in service.services: - access_urls[subservice.service_type] = server_url + \ - subservice.base + self.url_path + access_urls[subservice.service_type] = (server_url + + subservice.base + + self.url_path) else: - access_urls[service.service_type] = server_url + \ - service.base + self.url_path + access_urls[service.service_type] = (server_url + + service.base + + self.url_path) + + # process access children of dataset elements + for service_type in self.access_element_info: + url_path = self.access_element_info[service_type] + if service_type in all_service_dict: + access_urls[service_type] = (server_url + + all_service_dict[service_type].base + + url_path) self.access_urls = access_urls + def add_access_element_info(self, access_element): + service_name = access_element.attrib['serviceName'] + url_path = access_element.attrib['urlPath'] + self.access_element_info[service_name] = url_path + class SimpleService(object): r""" diff --git a/siphon/tests/fixtures/cat_access_elements b/siphon/tests/fixtures/cat_access_elements new file mode 100644 index 000000000..6af85bf04 --- /dev/null +++ b/siphon/tests/fixtures/cat_access_elements @@ -0,0 +1,1101 @@ +interactions: +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [Siphon (0.4.0+45.g669af16.dirty)] + method: GET + uri: http://oceandata.sci.gsfc.nasa.gov/opendap/SeaWiFS/L3SMI/2001/001/catalog.xml + response: + body: {string: "\r\n301 Moved Permanently\r\n\r\n

301 Moved Permanently

\r\n
nginx
\r\n\r\n\r\n"} + headers: + Connection: [keep-alive] + Content-Length: ['178'] + Content-Type: [text/html] + Date: ['Sat, 18 Mar 2017 19:24:35 GMT'] + Location: ['https://oceandata.sci.gsfc.nasa.gov/opendap/SeaWiFS/L3SMI/2001/001/catalog.xml'] + Server: [nginx] + status: {code: 301, message: Moved Permanently} +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [Siphon (0.4.0+45.g669af16.dirty)] + method: GET + uri: https://oceandata.sci.gsfc.nasa.gov/opendap/SeaWiFS/L3SMI/2001/001/catalog.xml + response: + body: {string: "\n \n + \ \n + \ \n + \ \n + \ \n + \ \n + \ 6038934\n 2017-03-14T12:25:25\n \n + \ \n + \ \n \n + \ 1990904\n 2015-10-01T21:23:02\n \n + \ \n + \ \n \n + \ 1973123\n 2015-10-01T21:23:14\n \n + \ \n + \ \n \n + \ 1598151\n 2015-10-01T20:05:33\n \n + \ \n + \ \n \n + \ 1579207\n 2015-10-01T20:05:46\n \n + \ \n + \ \n \n + \ 1489384\n 2015-10-01T20:05:58\n \n + \ \n + \ \n \n + \ 1381617\n 2015-10-01T20:06:11\n \n + \ \n + \ \n \n + \ 1162213\n 2015-10-01T20:06:24\n \n + \ \n + \ \n \n + \ 1272702\n 2015-10-01T20:06:37\n \n + \ \n + \ \n \n + \ 1439652\n 2015-10-01T20:06:51\n \n + \ \n + \ \n \n + \ 502242\n 2015-10-01T20:07:04\n \n + \ \n + \ \n \n + \ 1148667\n 2015-10-01T20:07:18\n \n + \ \n + \ \n \n + \ 1527007\n 2015-10-01T20:07:31\n \n + \ \n + \ \n \n + \ 1294330\n 2015-10-01T20:07:44\n \n + \ \n + \ \n \n + \ 1726481\n 2015-10-01T20:07:56\n \n + \ \n + \ \n \n + \ 1711685\n 2015-10-01T20:08:10\n \n + \ \n + \ \n \n + \ 1688098\n 2015-10-01T20:08:23\n \n + \ \n + \ \n \n + \ 1682916\n 2015-10-01T20:08:35\n \n + \ \n + \ \n \n + \ 1671952\n 2015-10-01T20:08:49\n \n + \ \n + \ \n \n + \ 1615696\n 2015-10-01T20:09:01\n \n + \ \n + \ \n \n + \ 1736420\n 2015-10-01T20:09:14\n \n + \ \n + \ \n \n + \ 1806131\n 2015-10-01T20:09:26\n \n + \ \n + \ \n \n + \ 1367274\n 2015-10-01T20:09:38\n \n + \ \n + \ \n \n + \ 1598232\n 2015-10-01T21:42:28\n \n + \ \n + \ \n \n + \ 9731902\n 2015-10-01T22:35:47\n \n + \ \n + \ \n \n + \ 1130798\n 2015-10-01T21:59:48\n \n + \ \n + \ \n \n + \ 1592346\n 2015-10-01T22:16:53\n \n + \ \n + \ \n \n + \ 1897495\n 2015-10-01T20:45:06\n \n + \ \n + \ \n \n + \ 1882028\n 2015-10-01T20:45:18\n \n + \ \n + \ \n \n + \ 1833043\n 2015-10-01T20:45:31\n \n + \ \n + \ \n \n + \ 1800676\n 2015-10-01T20:45:43\n \n + \ \n + \ \n \n + \ 1774880\n 2015-10-01T20:45:57\n \n + \ \n + \ \n \n + \ 1608395\n 2015-10-01T20:46:10\n \n + \ \n + \ \n \n + \ 1991331\n 2015-10-01T20:46:23\n \n + \ \n + \ \n \n + \ 1882441\n 2015-10-01T20:46:36\n \n + \ \n + \ \n \n + \ 22583639\n 2017-03-14T13:27:51\n \n + \ \n + \ \n \n + \ 8249049\n 2015-10-01T23:53:42\n \n + \ \n + \ \n \n + \ 8159431\n 2015-10-01T23:53:55\n \n + \ \n + \ \n \n + \ 6837979\n 2015-10-01T23:47:15\n \n + \ \n + \ \n \n + \ 6756277\n 2015-10-01T23:47:27\n \n + \ \n + \ \n \n + \ 6400135\n 2015-10-01T23:47:41\n \n + \ \n + \ \n \n + \ 5930700\n 2015-10-01T23:47:56\n \n + \ \n + \ \n \n + \ 4972077\n 2015-10-01T23:48:11\n \n + \ \n + \ \n \n + \ 5330234\n 2015-10-01T23:48:25\n \n + \ \n + \ \n \n + \ 6269183\n 2015-10-01T23:48:38\n \n + \ \n + \ \n \n + \ 909008\n 2015-10-01T23:48:50\n \n + \ \n + \ \n \n + \ 5033242\n 2015-10-01T23:49:03\n \n + \ \n + \ \n \n + \ 6581440\n 2015-10-01T23:49:16\n \n + \ \n + \ \n \n + \ 5702395\n 2015-10-01T23:49:30\n \n + \ \n + \ \n \n + \ 6701993\n 2015-10-01T23:49:43\n \n + \ \n + \ \n \n + \ 6603184\n 2015-10-01T23:49:56\n \n + \ \n + \ \n \n + \ 6466390\n 2015-10-01T23:50:09\n \n + \ \n + \ \n \n + \ 6425260\n 2015-10-01T23:50:21\n \n + \ \n + \ \n \n + \ 6347730\n 2015-10-01T23:50:34\n \n + \ \n + \ \n \n + \ 6047416\n 2015-10-01T23:50:47\n \n + \ \n + \ \n \n + \ 6704408\n 2015-10-01T23:51:01\n \n + \ \n + \ \n \n + \ 7409861\n 2015-10-01T23:51:15\n \n + \ \n + \ \n \n + \ 4896678\n 2015-10-01T23:51:28\n \n + \ \n + \ \n \n + \ 6871294\n 2015-10-01T23:55:39\n \n + \ \n + \ \n \n + \ 14236064\n 2015-10-02T00:00:18\n \n + \ \n + \ \n \n + \ 4106441\n 2015-10-01T23:57:01\n \n + \ \n + \ \n \n + \ 7135463\n 2015-10-01T23:58:27\n \n + \ \n + \ \n \n + \ 7573582\n 2015-10-01T23:46:12\n \n + \ \n + \ \n \n + \ 7437758\n 2015-10-01T23:46:24\n \n + \ \n + \ \n \n + \ 7052935\n 2015-10-01T23:46:36\n \n + \ \n + \ \n \n + \ 6828565\n 2015-10-01T23:46:49\n \n + \ \n + \ \n \n + \ 6592847\n 2015-10-01T23:47:01\n \n + \ \n + \ \n \n + \ 5704050\n 2015-10-01T23:47:14\n \n + \ \n + \ \n \n + \ 8347072\n 2015-10-01T23:47:27\n \n + \ \n + \ \n \n + \ 7976355\n 2015-10-01T23:47:41\n \n + \ \n + \ \n \n + \ 27887810\n 2017-03-14T14:10:53\n \n + \ \n + \ \n \n + \ 12343210\n 2015-10-02T00:08:07\n \n + \ \n + \ \n \n + \ 12228075\n 2015-10-02T00:08:20\n \n + \ \n + \ \n \n + \ 11051904\n 2015-10-02T00:11:49\n \n + \ \n + \ \n \n + \ 10935974\n 2015-10-02T00:12:22\n \n + \ \n + \ \n \n + \ 10423521\n 2015-10-02T00:12:35\n \n + \ \n + \ \n \n + \ 9681490\n 2015-10-02T00:12:47\n \n + \ \n + \ \n \n + \ 8283354\n 2015-10-02T00:13:00\n \n + \ \n + \ \n \n + \ 8596767\n 2015-10-02T00:13:12\n \n + \ \n + \ \n \n + \ 10311967\n 2015-10-02T00:13:24\n \n + \ \n + \ \n \n + \ 723929\n 2015-10-02T00:13:37\n \n + \ \n + \ \n \n + \ 8292325\n 2015-10-02T00:13:49\n \n + \ \n + \ \n \n + \ 10712282\n 2015-10-02T00:14:02\n \n + \ \n + \ \n \n + \ 9345922\n 2015-10-02T00:14:14\n \n + \ \n + \ \n \n + \ 9611296\n 2015-10-02T00:14:27\n \n + \ \n + \ \n \n + \ 9442827\n 2015-10-02T00:14:39\n \n + \ \n + \ \n \n + \ 9216538\n 2015-10-02T00:14:51\n \n + \ \n + \ \n \n + \ 9139360\n 2015-10-02T00:15:04\n \n + \ \n + \ \n \n + \ 9011862\n 2015-10-02T00:15:16\n \n + \ \n + \ \n \n + \ 8563387\n 2015-10-02T00:15:29\n \n + \ \n + \ \n \n + \ 9579480\n 2015-10-02T00:15:41\n \n + \ \n + \ \n \n + \ 11083275\n 2015-10-02T00:15:54\n \n + \ \n + \ \n \n + \ 6884303\n 2015-10-02T00:16:07\n \n + \ \n + \ \n \n + \ 11235295\n 2015-10-02T00:08:22\n \n + \ \n + \ \n \n + \ 14229820\n 2015-10-02T00:10:44\n \n + \ \n + \ \n \n + \ 6434568\n 2015-10-02T00:08:58\n \n + \ \n + \ \n \n + \ 11710395\n 2015-10-02T00:09:18\n \n + \ \n + \ \n \n + \ 11041163\n 2015-10-02T00:09:04\n \n + \ \n + \ \n \n + \ 10763725\n 2015-10-02T00:09:17\n \n + \ \n + \ \n \n + \ 10018490\n 2015-10-02T00:09:29\n \n + \ \n + \ \n \n + \ 9584886\n 2015-10-02T00:09:42\n \n + \ \n + \ \n \n + \ 9162439\n 2015-10-02T00:09:54\n \n + \ \n + \ \n \n + \ 7860528\n 2015-10-02T00:10:11\n \n + \ \n + \ \n \n + \ 12591160\n 2015-10-02T00:10:26\n \n + \ \n + \ \n \n + \ 12201332\n 2015-10-02T00:10:40\n \n + \ \n + \ \n \n + \ 32741168\n 2017-03-14T14:00:03\n \n + \ \n + \ \n \n + \ 12407762\n 2015-10-02T00:22:34\n \n + \ \n + \ \n \n + \ 12292643\n 2015-10-02T00:22:46\n \n + \ \n + \ \n \n + \ 11122024\n 2015-10-02T00:17:30\n \n + \ \n + \ \n \n + \ 11004439\n 2015-10-02T00:17:46\n \n + \ \n + \ \n \n + \ 10489758\n 2015-10-02T00:18:02\n \n + \ \n + \ \n \n + \ 9742772\n 2015-10-02T00:18:15\n \n + \ \n + \ \n \n + \ 8339346\n 2015-10-02T00:18:29\n \n + \ \n + \ \n \n + \ 8649980\n 2015-10-02T00:18:49\n \n + \ \n + \ \n \n + \ 10378786\n 2015-10-02T00:19:11\n \n + \ \n + \ \n \n + \ 718745\n 2015-10-02T00:19:28\n \n + \ \n + \ \n \n + \ 8346345\n 2015-10-02T00:19:41\n \n + \ \n + \ \n \n + \ 10780246\n 2015-10-02T00:19:59\n \n + \ \n + \ \n \n + \ 9405467\n 2015-10-02T00:20:18\n \n + \ \n + \ \n \n + \ 9651590\n 2015-10-02T00:20:35\n \n + \ \n + \ \n \n + \ 9482643\n 2015-10-02T00:20:53\n \n + \ \n + \ \n \n + \ 9255548\n 2015-10-02T00:21:11\n \n + \ \n + \ \n \n + \ 9177381\n 2015-10-02T00:21:28\n \n + \ \n + \ \n \n + \ 9049524\n 2015-10-02T00:21:41\n \n + \ \n + \ \n \n + \ 8598840\n 2015-10-02T00:22:04\n \n + \ \n + \ \n \n + \ 9620088\n 2015-10-02T00:22:17\n \n + \ \n + \ \n \n + \ 11140915\n 2015-10-02T00:22:35\n \n + \ \n + \ \n \n + \ 6911492\n 2015-10-02T00:22:47\n \n + \ \n + \ \n \n + \ 11307888\n 2015-10-02T00:24:59\n \n + \ \n + \ \n \n + \ 14231929\n 2015-10-02T00:30:30\n \n + \ \n + \ \n \n + \ 6472816\n 2015-10-02T00:26:32\n \n + \ \n + \ \n \n + \ 11786853\n 2015-10-02T00:28:15\n \n + \ \n + \ \n \n + \ 11094632\n 2015-10-02T00:18:27\n \n + \ \n + \ \n \n + \ 10813692\n 2015-10-02T00:18:40\n \n + \ \n + \ \n \n + \ 10061320\n 2015-10-02T00:18:52\n \n + \ \n + \ \n \n + \ 9623894\n 2015-10-02T00:19:05\n \n + \ \n + \ \n \n + \ 9198845\n 2015-10-02T00:19:17\n \n + \ \n + \ \n \n + \ 7890823\n 2015-10-02T00:19:30\n \n + \ \n + \ \n \n + \ 12657623\n 2015-10-02T00:19:44\n \n + \ \n + \ \n \n + \ 12267721\n 2015-10-02T00:19:57\n \n + \ \n + \ \n \n + \ 14849215\n 2015-10-02T00:50:27\n \n + \ \n + \ \n \n + \ 14799962\n 2015-10-02T00:50:40\n \n + \ \n + \ \n \n + \ 13862969\n 2015-10-02T00:58:22\n \n + \ \n + \ \n \n + \ 13711415\n 2015-10-02T00:58:34\n \n + \ \n + \ \n \n + \ 13088124\n 2015-10-02T00:58:48\n \n + \ \n + \ \n \n + \ 12186528\n 2015-10-02T00:59:01\n \n + \ \n + \ \n \n + \ 10612322\n 2015-10-02T00:59:13\n \n + \ \n + \ \n \n + \ 10747782\n 2015-10-02T00:59:26\n \n + \ \n + \ \n \n + \ 13063652\n 2015-10-02T00:59:39\n \n + \ \n + \ \n \n + \ 411894\n 2015-10-02T00:59:51\n \n + \ \n + \ \n \n + \ 10472053\n 2015-10-02T01:00:04\n \n + \ \n + \ \n \n + \ 13294259\n 2015-10-02T01:00:16\n \n + \ \n + \ \n \n + \ 11620338\n 2015-10-02T01:00:29\n \n + \ \n + \ \n \n + \ 10331717\n 2015-10-02T01:00:42\n \n + \ \n + \ \n \n + \ 10156179\n 2015-10-02T01:00:54\n \n + \ \n + \ \n \n + \ 9914767\n 2015-10-02T01:01:07\n \n + \ \n + \ \n \n + \ 9820405\n 2015-10-02T01:01:20\n \n + \ \n + \ \n \n + \ 9683558\n 2015-10-02T01:01:32\n \n + \ \n + \ \n \n + \ 9265952\n 2015-10-02T01:01:45\n \n + \ \n + \ \n \n + \ 10292950\n 2015-10-02T01:01:58\n \n + \ \n + \ \n \n + \ 13438139\n 2015-10-02T01:02:10\n \n + \ \n + \ \n \n + \ 7455006\n 2015-10-02T01:02:23\n \n + \ \n + \ \n \n + \ 13956709\n 2015-10-02T00:50:00\n \n + \ \n + \ \n \n + \ 14484929\n 2015-10-02T00:50:16\n \n + \ \n + \ \n \n + \ 7627377\n 2015-10-02T00:50:01\n \n + \ \n + \ \n \n + \ 14762349\n 2015-10-02T00:50:02\n \n + \ \n + \ \n \n + \ 12675417\n 2015-10-02T00:52:21\n \n + \ \n + \ \n \n + \ 12134091\n 2015-10-02T00:52:33\n \n + \ \n + \ \n \n + \ 10907525\n 2015-10-02T00:52:46\n \n + \ \n + \ \n \n + \ 10017695\n 2015-10-02T00:52:58\n \n + \ \n + \ \n \n + \ 9543004\n 2015-10-02T00:53:10\n \n + \ \n + \ \n \n + \ 8137831\n 2015-10-02T00:53:23\n \n + \ \n + \ \n \n + \ 14908211\n 2015-10-02T00:53:35\n \n + \ \n + \ \n \n + \ 14547365\n 2015-10-02T00:53:48\n \n + \ \n + \ \n \n \n"} + headers: + Connection: [keep-alive] + Content-Description: [thredds_catalog] + Content-Type: [text/xml; charset=utf-8] + Date: ['Sat, 18 Mar 2017 19:24:35 GMT'] + Server: [nginx] + Set-Cookie: [JSESSIONID=9A6D157C07970FF7349B484964CD112E; Path=/opendap/; HttpOnly] + Strict-Transport-Security: [max-age=31536000; includeSubDomains; preload] + X-DAP: ['3.2'] + X-FRAME-OPTIONS: [DENY] + XDODS-Server: [dods/3.2] + XOPeNDAP-Server: ['bes/3.15.1, csv_handler/1.1.1, dap-server/ascii/4.1.5, dap-server/usage/4.2.1, + dap-server/www/4.1.5, dapreader_module/0.0.1, fileout_gdal/0.9.6, fileout_json/1.0.1, + fileout_netcdf/1.3.1, fits_handler/1.0.13, freeform_handler/3.9.1, gateway_module/1.1.4, + gdal_handler/1.0.1, libdap/3.15.1, ncml_moddule/1.3.0, netcdf_handler/3.11.1, + w10n_handler/1.0.1, xml_data_handler/1.0.7'] + status: {code: 200, message: OK} +version: 1 diff --git a/siphon/tests/test_catalog.py b/siphon/tests/test_catalog.py index ce5073fe8..758c9a234 100644 --- a/siphon/tests/test_catalog.py +++ b/siphon/tests/test_catalog.py @@ -135,3 +135,11 @@ def test_non_standard_context_path(): expected = ('http://ereeftds.bom.gov.au/ereefs/tds/dodsC/ereef/mwq/' 'P1A/A20020101.P1A.ANN_MIM_RMP.nc') assert ds.access_urls['OPENDAP'] == expected + + +@recorder.use_cassette('cat_access_elements') +def test_access_elements(): + 'Test parsing access elements in TDS client catalog' + url = 'http://oceandata.sci.gsfc.nasa.gov/opendap/SeaWiFS/L3SMI/2001/001/catalog.xml' + cat = TDSCatalog(url) + assert len(list(cat.datasets)) != 0