Skip to content

Commit

Permalink
Merge pull request #16 from csdms/mcflugen/update-list
Browse files Browse the repository at this point in the history
Update standard names list
  • Loading branch information
mcflugen authored Mar 3, 2024
2 parents 52cc7a3 + f391272 commit 12b3b60
Show file tree
Hide file tree
Showing 25 changed files with 7,406 additions and 1,150 deletions.
65 changes: 55 additions & 10 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,22 +1,67 @@
[![Test](https://github.com/csdms/standard_names/actions/workflows/test.yml/badge.svg)](https://github.com/csdms/standard_names/actions/workflows/test.yml)
[![Documentation Status](https://readthedocs.org/projects/standard-names/badge/?version=latest)](http://standard-names.readthedocs.io/en/latest/?badge=latest)
[![Coverage Status](https://coveralls.io/repos/github/csdms/standard_names/badge.svg?branch=master)](https://coveralls.io/github/csdms/standard_names?branch=master)
[![Conda Version](https://img.shields.io/conda/vn/conda-forge/standard_names.svg)](https://anaconda.org/conda-forge/standard_names)
[![PyPI](https://img.shields.io/pypi/v/standard_names)](https://pypi.org/project/standard_names)
![[Python][pypi-link]][python-badge]
![[Build Status][build-link]][build-badge]
![[PyPI][pypi-link]][pypi-badge]
![[Build Status][anaconda-link]][anaconda-badge]


standard_names
==============
[anaconda-badge]: https://anaconda.org/conda-forge/standard_names/badges/version.svg
[anaconda-link]: https://anaconda.org/conda-forge/standard_names
[build-badge]: https://github.com/csdms/standard_names/actions/workflows/test.yml/badge.svg
[build-link]: https://github.com/csdms/standard_names/actions/workflows/test.yml
[csdms-workbench]: https://csdms.colorado.edu/wiki/Workbench
[pypi-badge]: https://badge.fury.io/py/standard_names.svg
[pypi-link]: https://pypi.org/project/standard_names/
[python-badge]: https://img.shields.io/pypi/pyversions/standard_names.svg

# standard_names

Python utilities for working with CSDMS Standard Names.

CSDMS Standard Names is an element of the [CSDMS Workbench](https://csdms.colorado.edu/wiki/Workbench),
CSDMS Standard Names is an element of the [CSDMS Workbench][csdms-workbench],
an integrated system of software tools, technologies, and standards
for building and coupling models.

## As Regular Expression

```
^ # Start of the object name
[a-z]+ # Starts with one or more lowercase letters
(?: # Start of a non-capturing group for subsequent parts
[-~_]? # Optional separator: hyphen, tilde, or underscore
[a-zA-Z0-9]+ # One or more alphanumeric characters
)* # Zero or more repetitions of the group
__ # Double underscore separator
[a-z]+ # Start of the quantity
(?: # Start of a non-capturing group for subsequent parts
[-~_]? # Optional separator: hyphen, tilde, or underscore
[a-zA-Z0-9]+ # One or more alphanumeric characters
)* # Zero or more repetitions of the group
$ # End of the name
```

## As Parsing Expression Grammar

```peg
Start
= LowercaseWord UnderscoreSeparator LowercaseWord
LowercaseWord
= [a-z] AdditionalCharacters*
AdditionalCharacters
= Separator? Alphanumeric+
Separator
= "-" / "~" / "_"
Alphanumeric
= [a-zA-Z0-9]
UnderscoreSeparator
= "__"
```

Links
-----
# Links

* [Source code](http://github.com/csdms/standard_names): The
*standard_names* source code repository.
Expand Down
2 changes: 1 addition & 1 deletion noxfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
@nox.session(python=PYTHON_VERSION, venv_backend="conda")
def test(session: nox.Session) -> None:
"""Run the tests."""
session.install(".[testing]")
session.install(".[peg,testing]")

args = ["--cov", PROJECT, "-vvv"] + session.posargs

Expand Down
10 changes: 4 additions & 6 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ dynamic = [
]
dependencies = [
"packaging",
"pyyaml",
]

[project.license]
Expand All @@ -51,6 +50,9 @@ Issues = "https://github.com/csdms/standard_names/issues"
Repository = "https://github.com/csdms/standard_names"

[project.optional-dependencies]
peg = [
"pyparsing",
]
dev = [
"nox",
]
Expand All @@ -64,11 +66,7 @@ docs = [
]

[project.scripts]
snbuild = "standard_names.cmd.snbuild:run"
sndump = "standard_names.cmd.sndump:run"
snscrape = "standard_names.cmd.snscrape:run"
snsql = "standard_names.cmd.snsql:run"
snvalidate = "standard_names.cmd.snvalidate:run"
"standard-names" = "standard_names.cmd.main:main"

[build-system]
requires = [
Expand Down
6 changes: 6 additions & 0 deletions src/standard_names/__main__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from __future__ import annotations

from standard_names.cli.main import main

if __name__ == "__main__":
raise SystemExit(main())
117 changes: 117 additions & 0 deletions src/standard_names/_format.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
from collections.abc import Iterable


def as_wiki_list(
items: Iterable[str], heading: str | None = None, level: int = 1
) -> str:
"""
Examples
--------
>>> from standard_names._format import as_wiki_list
>>> print(as_wiki_list(["line 1", "line 2"], heading="Lines"))
= Lines =
<tt>
line 1<br/>
line 2<br/>
</tt>
"""
newline = "\n"

if heading is not None:
formatted_lines = [f"{'=' * level} {heading} {'=' * level}"]
else:
formatted_lines = []

formatted_lines += ["<tt>"] + [item.strip() + "<br/>" for item in items] + ["</tt>"]

return newline.join(formatted_lines)


def as_yaml_list(
items: Iterable[str], heading: str | None = None, level: int = 1
) -> str:
"""
Examples
--------
>>> from standard_names._format import as_yaml_list
>>> print(as_yaml_list(["line 1", "line 2"], heading="Lines"))
Lines:
- line 1
- line 2
"""
newline = "\n"
indent = 2 if heading else 0
formatted_lines = [f"{heading}:"] if heading else []

if heading is None:
formatted_lines = []
indent = 0
else:
formatted_lines = [f"{heading}:"]
indent = 2

stripped_items = [stripped for item in items if (stripped := item.strip())]

if stripped_items:
formatted_lines += [f"{' ' * indent}- {item}" for item in stripped_items]
else:
formatted_lines += [f"{' ' * indent}[]"]

return newline.join(formatted_lines)


def as_myst_list(
items: Iterable[str], heading: str | None = None, level: int = 1
) -> str:
"""
Examples
--------
>>> from standard_names._format import as_myst_list
>>> print(as_myst_list(["line 1", "line 2"], heading="Lines"))
# Lines
* line 1
* line 2
"""
newline = "\n"

formatted_lines = ([f"# {heading}"] if heading else []) + [
f"* {stripped}" for item in items if (stripped := item.strip())
]

return newline.join(formatted_lines)


def as_text_list(
items: Iterable[str], heading: str | None = None, level: int = 1
) -> str:
"""
Examples
--------
>>> from standard_names._format import as_text_list
>>> print(as_text_list(["line 1", "line 2"], heading="# Lines"))
# Lines
line 1
line 2
"""
newline = "\n"

formatted_lines = ([heading] if heading else []) + [
stripped for item in items if (stripped := item.strip())
]

return newline.join(formatted_lines)


FORMATTERS = {
"wiki": as_wiki_list,
"yaml": as_yaml_list,
"text": as_text_list,
"myst": as_myst_list,
}
File renamed without changes.
82 changes: 82 additions & 0 deletions src/standard_names/cli/_scrape.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#! /usr/bin/env python
"""
Example usage:
```bash
snscrape http://csdms.colorado.edu/wiki/CSN_Quantity_Templates \
http://csdms.colorado.edu/wiki/CSN_Object_Templates \
http://csdms.colorado.edu/wiki/CSN_Operation_Templates \
> data/scraped.yaml
```
"""
from __future__ import annotations

from collections.abc import Iterable
from urllib.request import urlopen

from standard_names.registry import NamesRegistry


def scrape_names(files: Iterable[str]) -> NamesRegistry:
"""Scrape standard names from a file or URL.
Parameters
----------
files : iterable of str
Files to search for names.
Returns
-------
NamesRegistry
A registry of the names found in the files.
"""
registry = NamesRegistry([])
for file in files:
registry |= NamesRegistry(search_file_for_names(file))
return registry


def find_all_names(lines: Iterable[str], engine: str = "regex") -> set[str]:
"""Find standard names.
Examples
--------
>>> from standard_names.cli._scrape import find_all_names
>>> contents = '''
... A file with text and names (air__temperature) mixed in. Some names
... have double underscores (like, Water__Temperature) by are not
... valid names. Others, like water__temperature, or "wind__speed" are good.
... '''
>>> sorted(find_all_names(contents.splitlines(), engine="regex"))
['air__temperature', 'water__temperature', 'wind__speed']
>>> sorted(find_all_names(contents.splitlines(), engine="peg"))
['air__temperature', 'water__temperature', 'wind__speed']
"""
if engine == "regex":
from standard_names.regex import findall
elif engine == "peg":
from standard_names.peg import findall
else:
raise ValueError(
"engine not understood: {engine!r} is not one of 'regex', 'peg'"
)

names = set()
for line in lines:
names |= set(findall(line.strip()))

return names


def search_file_for_names(path: str) -> set[str]:
names = set()
if path.startswith(("http://", "https://")):
with urlopen(path) as response:
names = find_all_names(line.decode("utf-8") for line in response)
else:
with open(path) as fp:
names = find_all_names(fp)

return names
26 changes: 2 additions & 24 deletions src/standard_names/cmd/snsql.py → src/standard_names/cli/_sql.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
#! /usr/bin/env python
from __future__ import annotations

import os

Expand Down Expand Up @@ -46,7 +46,7 @@ def as_sql_commands(names: NamesRegistry, newline: str = os.linesep) -> str:
Examples
--------
>>> from standard_names.registry import NamesRegistry
>>> from standard_names.cmd.snsql import as_sql_commands
>>> from standard_names.cli._sql import as_sql_commands
>>> names = NamesRegistry()
>>> names.add("air__temperature")
Expand Down Expand Up @@ -99,25 +99,3 @@ def as_sql_commands(names: NamesRegistry, newline: str = os.linesep) -> str:
commands = newline.join(db.iterdump())

return commands


def main() -> str:
"""
Build a database of CSDMS standard names from a list.
"""
import argparse

parser = argparse.ArgumentParser(
description="Build an sqlite database from a list of names"
)
parser.add_argument(
"file", nargs="+", type=argparse.FileType("r"), help="List of names"
)
args = parser.parse_args()

names = NamesRegistry(args.file)
return as_sql_commands(names)


def run() -> None:
print(main())
Loading

0 comments on commit 12b3b60

Please sign in to comment.