From 7aaa985b01736a3c45183f646488b24fc62a73cd Mon Sep 17 00:00:00 2001 From: Orest Dubay Date: Sun, 17 Dec 2023 21:45:38 +0100 Subject: [PATCH] minor doc update --- site/apidocs/liquer/app.html | 40 +- site/apidocs/liquer/config.html | 603 ++++++++++++++++++-------- site/apidocs/liquer/context.html | 254 +++++++++-- site/apidocs/liquer/ext/meta.html | 12 +- site/apidocs/liquer/index.html | 2 +- site/apidocs/liquer/recipes.html | 12 +- site/apidocs/liquer/server/index.html | 5 + site/guide/index.html | 24 +- site/index.html | 2 +- site/search/search_index.json | 2 +- site/sitemap.xml.gz | Bin 127 -> 127 bytes 11 files changed, 696 insertions(+), 260 deletions(-) diff --git a/site/apidocs/liquer/app.html b/site/apidocs/liquer/app.html index f2eff36..8361bd2 100644 --- a/site/apidocs/liquer/app.html +++ b/site/apidocs/liquer/app.html @@ -37,6 +37,16 @@

Module liquer.app

import webbrowser from liquer.config import load_config, initialize, preset, config +def quickstart(config_file="config.yaml", index_link=None): + """Quickstart function for liquer. It loads configuration, initializes liquer and starts the server.""" + load_config(config_file) + initialize() + if index_link: + webbrowser.open("http://localhost:5000/"+index_link) + else: + webbrowser.open("http://localhost:5000") + preset().start_server(config()) + if __name__=="__main__": parser = argparse.ArgumentParser(description='Liquer command line interface') parser.add_argument('--config', '-c', type=str, action="store", help='Configuration file') @@ -66,7 +76,7 @@

Module liquer.app

with open(args.create_config,"w") as f: f.write(preset().default_config()) print(f"Configuration file {args.create_config} created.") - sys.exit(0) + sys.exit(0) if args.query: query = args.query @@ -86,6 +96,29 @@

Module liquer.app

+

Functions

+
+
+def quickstart(config_file='config.yaml', index_link=None) +
+
+

Quickstart function for liquer. It loads configuration, initializes liquer and starts the server.

+
+ +Expand source code + +
def quickstart(config_file="config.yaml", index_link=None):
+    """Quickstart function for liquer. It loads configuration, initializes liquer and starts the server."""
+    load_config(config_file)
+    initialize()
+    if index_link:
+        webbrowser.open("http://localhost:5000/"+index_link)
+    else:
+        webbrowser.open("http://localhost:5000")
+    preset().start_server(config())
+
+
+
@@ -101,6 +134,11 @@

Index

  • liquer
  • +
  • Functions

    + +
  • diff --git a/site/apidocs/liquer/config.html b/site/apidocs/liquer/config.html index a3d13d9..07c0996 100644 --- a/site/apidocs/liquer/config.html +++ b/site/apidocs/liquer/config.html @@ -42,6 +42,7 @@

    Module liquer.config

    from pathlib import Path import yaml import logging + try: from yaml import CLoader as Loader, CDumper as Dumper except ImportError: @@ -51,34 +52,48 @@

    Module liquer.config

    _config = None + def config(): """Return the config dictionary""" global _config if _config is None: - _config = dict(setup=dict(preset="liquer.config.Preset")) + _config = dict(setup=dict(preset="liquer.config.RichPreset")) return _config + def load_config(filename): """Load configuration from a file""" global _config if _config is None: - _config = {} - with open(filename, "r") as f: - new_config = yaml.load(f, Loader=Loader) - _config.update(new_config) + _config = config() + try: + with open(filename, "r") as f: + new_config = yaml.load(f, Loader=Loader) + _config.update(new_config) + except FileNotFoundError: + print(f"Configuration file {filename} not found") + print("Create a default configuration file with:") + print() + print("python -m liquer.app --create-config config.yaml") + print() logger.info(f"Configuration loaded from {filename}") + _initializer = None + def initializer(): """Return the initializer function""" global _initializer + def __initializer(config, worker_environment=False): preset().initialize(config, worker_environment=worker_environment) + if _initializer is None: _initializer = __initializer return _initializer + def set_initializer(f): """Set the initializer function. Initializer is a function taking the configuration dictionary and a boolean flag @@ -89,6 +104,7 @@

    Module liquer.config

    global _initializer _initializer = f + def initialize(worker_environment=False): """Initialize the configuration This function is called automatically when the configuration is loaded. @@ -96,8 +112,10 @@

    Module liquer.config

    """ initializer()(config(), worker_environment=worker_environment) + _preset = None + def preset(): """Return the preset object""" global _preset @@ -105,30 +123,22 @@

    Module liquer.config

    set_preset() return _preset -async def run_tornado(port=5000, index_query="index"): - from liquer.server.tornado_handlers import url_mapping, response + +async def run_tornado(port=5000, index_link="index"): + from liquer.server.tornado_handlers import url_mapping + from liquer.server.handlers import response import tornado.web import asyncio import traceback from liquer.query import evaluate class IndexHandler(tornado.web.RequestHandler): - def prepare(self): - header = "Content-Type" - body = "text/html" - self.set_header(header, body) def get(self): - try: - b, mimetype, filename = response(evaluate(index_query)) - self.write(b) - except: - traceback.print_exc() - self.set_status(500) - self.finish(f"500 - Failed to create a response to {index_query}") - return + self.redirect(index_link) application = tornado.web.Application( - url_mapping() + [ + url_mapping() + + [ (r"/", IndexHandler), (r"/index.html", IndexHandler), ] @@ -137,6 +147,7 @@

    Module liquer.config

    application.listen(port) await asyncio.Event().wait() + class Preset(object): """A preset is a configuration object, which can be used to initialize liquer. It's role is to @@ -148,41 +159,50 @@

    Module liquer.config

    It as well can create default configuration files, initialize and start the pool, server and other services """ - modules=[] + + modules = [] + def __init__(self): pass @classmethod def preset_class(cls): """Return the preset class name""" - x="" - if cls.__module__ != "__main__": - x+=cls.__module__ - x+="." - x+=cls.__name__ + x = "" + if cls.__module__ != "__main__": + x += cls.__module__ + x += "." + x += cls.__name__ return x - + def _find_modules(self, path): """Find modules in a path containing LiQuer commands""" from glob import glob import os - for filename in glob(os.path.join(path,"*.py")): - with open(filename,"r") as f: + + for filename in glob(os.path.join(path, "*.py")): + with open(filename, "r") as f: mod = f.read() if "@command" in mod or "@first_command" in mod: yield os.path.splitext(os.path.basename(filename))[0] + def find_modules(self, path): """Find modules in a path containing LiQuer commands""" import os + for root, dirs, files in os.walk(path): rootpath = os.path.relpath(root, path) - module = rootpath.replace(os.path.sep,".") + module = rootpath.replace(os.path.sep, ".") while module.startswith("."): module = module[1:] for file in files: - if file.endswith(".py") and not file.startswith("_") and not file.startswith("test_"): - with open(os.path.join(root, file),"r") as f: + if ( + file.endswith(".py") + and not file.startswith("_") + and not file.startswith("test_") + ): + with open(os.path.join(root, file), "r") as f: mod = f.read() if "@command" in mod or "@first_command" in mod: x = module + "." + os.path.splitext(file)[0] @@ -192,9 +212,8 @@

    Module liquer.config

    def default_config(self): """Return a default configuration file in yaml format""" - modules = sorted(set(self.modules - + ["liquer.ext.meta", "liquer.ext.basic"])) - modules_list="\n".join([f" - {m}" for m in modules]) + modules = sorted(set(self.modules + ["liquer.ext.meta", "liquer.ext.basic"])) + modules_list = "\n".join([f" - {m}" for m in modules]) return f""" setup: preset: {self.preset_class():<35} # Preset class name @@ -208,9 +227,10 @@

    Module liquer.config

    server_type: {"flask":<35} # Server type (flask, tornado, ...) url_prefix: {'"/liquer"':<35} # URL prefix for the server port: {5000:<35} # Server port + index_link: {"/liquer/q/index/index.html":<35} # Index query """ - - def initialize(self, config, worker_environment=False): + + def initialize(self, config, worker_environment=False): """Initialize from configuration. If worker_environment is True, then initialize specificallz the worker environment, i.e. the worker pool should not be initialized. @@ -226,21 +246,22 @@

    Module liquer.config

    """Helper method to get a parameter from the setup section of the configuration""" if "setup" not in config: raise Exception("No setup section in configuration") - return config["setup"].get(name,default) - + return config["setup"].get(name, default) + def load_modules(self, config): """Load modules scpecified in the configuration List of modules is taken from config["setup"]["modules"] """ - for module in self.get_setup_parameter(config, "modules", []): + for module in self.get_setup_parameter(config, "modules", self.modules): logger.info(f"Loading module {module}") __import__(module, fromlist=["*"]) def create_cache(self, config): "Create cache object from configuration" import liquer.cache - cache=self.get_setup_parameter(config, "cache", "off") - if cache in ["off","none", "no", None, False]: + + cache = self.get_setup_parameter(config, "cache", "off") + if cache in ["off", "none", "no", None, False]: logger.info(f"Cache disabled") return liquer.cache.NoCache() elif cache in ["memory"]: @@ -256,15 +277,23 @@

    Module liquer.config

    def initialize_cache(self, config): """Initialize cache from configuration""" import liquer.cache + liquer.cache.set_cache(self.create_cache(config)) - + def initialize_store(self, config): """Initialize store from configuration""" import liquer.store from liquer.recipes import RecipeSpecStore - recipe_folders=self.get_setup_parameter(config, "recipe_folders", []) + from pathlib import Path + + liquer.store.get_web_store() + recipe_folders = self.get_setup_parameter(config, "recipe_folders", []) for folder_name in recipe_folders: logger.info(f"Adding recipe folder {folder_name}") + p=Path(folder_name) + if not p.exists(): + print(f"Creating folder {folder_name}") + p.mkdir(parents=True) s = RecipeSpecStore(liquer.store.FileStore(folder_name)).with_indexer() liquer.store.mount(folder_name, s) @@ -272,10 +301,11 @@

    Module liquer.config

    """Initialize pool from configuration""" import liquer.pool import liquer.cache - cache_concurrency=self.get_setup_parameter(config, "cache_concurrency") - if cache_concurrency in["off","none", "no", None, False]: + + cache_concurrency = self.get_setup_parameter(config, "cache_concurrency") + if cache_concurrency in ["off", "none", "no", None, False]: logger.info(f"Cache not configured for concurrency") - logger.warning(f"Distributed execution will not work properly") + logger.warning(f"Distributed execution will not work properly") elif cache_concurrency == "central": logger.info(f"Enabling central cache") liquer.pool.set_central_cache(liquer.cache.get_cache()) @@ -290,53 +320,103 @@

    Module liquer.config

    from liquer.state import set_var from liquer.query import evaluate - server_type=self.get_setup_parameter(config, "server_type", "flask") + server_type = self.get_setup_parameter(config, "server_type", "flask") if server_type in ["flask"]: logger.info(f"Starting flask server") import flask import liquer.server.blueprint as bp import traceback + app = flask.Flask(__name__) - url_prefix=self.get_setup_parameter(config, "url_prefix", "/liquer") - port=self.get_setup_parameter(config, "port", "5000") - flask_debug=self.get_setup_parameter(config, "debug", False) - host=self.get_setup_parameter(config, "host", "127.0.0.1") - flask_threaded=self.get_setup_parameter(config, "threaded", False) - index_query=self.get_setup_parameter(config, "index_query", "index") + url_prefix = self.get_setup_parameter(config, "url_prefix", "/liquer") + port = self.get_setup_parameter(config, "port", "5000") + flask_debug = self.get_setup_parameter(config, "debug", False) + host = self.get_setup_parameter(config, "host", "127.0.0.1") + flask_threaded = self.get_setup_parameter(config, "threaded", False) + index_link = self.get_setup_parameter(config, "index_link", "/liquer/q/index/index.html") + print(f"Index link: {index_link}") app.register_blueprint(bp.app, url_prefix=url_prefix) - - @app.route('/') - @app.route('/index.html') + + @app.route("/") + @app.route("/index.html") def index(): - try: - return bp.response(evaluate(index_query)) - except: - traceback.print_exc() - flask.abort(500) - - set_var("api_path",url_prefix+"/q/") - set_var("server",f"http://{host}:{port}") + print(f"Redirect to index link: {index_link}") + return flask.redirect(index_link, code=302) + + set_var("api_path", url_prefix + "/q/") + set_var("server", f"http://{host}:{port}") app.run(debug=flask_debug, host=host, port=port, threaded=flask_threaded) elif server_type in ["tornado"]: import asyncio + logger.info(f"Starting tornado server") - port=self.get_setup_parameter(config, "port", "5000") - url_prefix=self.get_setup_parameter(config, "url_prefix", "/liquer") - index_query=self.get_setup_parameter(config, "index_query", "index") - host=self.get_setup_parameter(config, "host", "127.0.0.1") - set_var("api_path",url_prefix+"/q/") - set_var("server",f"http://{host}:{port}") - asyncio.run(run_tornado(port, index_query=index_query)) + port = self.get_setup_parameter(config, "port", "5000") + url_prefix = self.get_setup_parameter(config, "url_prefix", "/liquer") + index_link = self.get_setup_parameter(config, "index_link", "/liquer/q/index/index.html") + host = self.get_setup_parameter(config, "host", "127.0.0.1") + set_var("api_path", url_prefix + "/q/") + set_var("server", f"http://{host}:{port}") + asyncio.run(run_tornado(port, index_link=index_link)) else: raise Exception(f"Unknown server type: {server_type}") + +class RichPreset(Preset): + modules = [ + "liquer.ext.dataframe_batches", + "liquer.ext.lq_matplotlib", + "liquer.ext.lq_openpyxl", + "liquer.ext.lq_pandas", + "liquer.ext.lq_pil", + "liquer.ext.lq_plotly", + "liquer.ext.lq_polars", + "liquer.ext.lq_pygments", + "liquer.ext.lq_python", + "liquer.ext.lq_sweetviz", + "liquer.ext.basic", + "liquer.ext.meta", + "liquer_pcv", + "liquer_gui" + ] + def default_config(self): + """Return a default configuration file in yaml format""" + modules = sorted(set(self.modules + ["liquer.ext.meta", "liquer.ext.basic"])) + modules_list = "\n".join([f" - {m}" for m in modules]) + recipe_folders ="\n".join (f" - {m}" for m in ["data", "reports"]) + return f""" +setup: + preset: {self.preset_class():<35} # Preset class name + modules: {"":<35} # Modules with commands to import +{modules_list} + cache: {"off":<35} # Cache type (off, memory, file, ...) + cache_path: {"cache":<35} # Cache path (for file cache) + cache_concurrency: {"central":<35} # Cache concurrency (off, local, central) + recipe_folders: {"":<35} # Recipe folders +{recipe_folders} + server_type: {"flask":<35} # Server type (flask, tornado, ...) + url_prefix: {'"/liquer"':<35} # URL prefix for the server + port: {5000:<35} # Server port + index_query: {"/liquer/web/gui":<35} # Index query +""" + @classmethod + def get_setup_parameter(cls, config, name, default=None): + """Helper method to get a parameter from the setup section of the configuration""" + if "setup" not in config: + raise Exception("No setup section in configuration") + if name == "index_link": + return config["setup"].get("index_link", "/liquer/web/gui") + if name == "recipe_folders": + return config["setup"].get("recipe_folders", ["data", "reports"]) + return config["setup"].get(name, default) + def set_preset(preset_def=None): """Set the preset object Preset can either be a preset object (instance of Preset class), a preset class name (string) or configuration (dictionary). If preset is None, preset is loaded based on the configuration file. """ import liquer.util + global _preset if preset_def is None: preset_def = config() @@ -350,7 +430,7 @@

    Module liquer.config

    raise Exception("Preset must be a fully qualified class name") if isinstance(preset_def, str): logger.info(f"Instantiating preset {preset_def}") - preset_def = liquer.util.eval_fully_qualified_name(preset_def)() + preset_def = liquer.util.eval_fully_qualified_name(preset_def)() if isinstance(preset_def, Preset): logger.info(f"Instantance of {preset_def.preset_class()} used as preset") _preset = preset_def @@ -378,7 +458,7 @@

    Functions

    """Return the config dictionary""" global _config if _config is None: - _config = dict(setup=dict(preset="liquer.config.Preset")) + _config = dict(setup=dict(preset="liquer.config.RichPreset")) return _config @@ -413,8 +493,10 @@

    Functions

    def initializer():
         """Return the initializer function"""
         global _initializer
    +
         def __initializer(config, worker_environment=False):
             preset().initialize(config, worker_environment=worker_environment)
    +
         if _initializer is None:
             _initializer = __initializer
         return _initializer
    @@ -433,10 +515,17 @@

    Functions

    """Load configuration from a file""" global _config if _config is None: - _config = {} - with open(filename, "r") as f: - new_config = yaml.load(f, Loader=Loader) - _config.update(new_config) + _config = config() + try: + with open(filename, "r") as f: + new_config = yaml.load(f, Loader=Loader) + _config.update(new_config) + except FileNotFoundError: + print(f"Configuration file {filename} not found") + print("Create a default configuration file with:") + print() + print("python -m liquer.app --create-config config.yaml") + print() logger.info(f"Configuration loaded from {filename}") @@ -458,7 +547,7 @@

    Functions

    -async def run_tornado(port=5000, index_query='index') +async def run_tornado(port=5000, index_link='index')
    @@ -466,30 +555,21 @@

    Functions

    Expand source code -
    async def run_tornado(port=5000, index_query="index"):
    -    from liquer.server.tornado_handlers import url_mapping, response
    +
    async def run_tornado(port=5000, index_link="index"):
    +    from liquer.server.tornado_handlers import url_mapping
    +    from liquer.server.handlers import response
         import tornado.web
         import asyncio
         import traceback
         from liquer.query import evaluate
     
         class IndexHandler(tornado.web.RequestHandler):
    -        def prepare(self):
    -            header = "Content-Type"
    -            body = "text/html"
    -            self.set_header(header, body)
             def get(self):
    -            try:
    -                b, mimetype, filename = response(evaluate(index_query))
    -                self.write(b)
    -            except:
    -                traceback.print_exc()
    -                self.set_status(500)
    -                self.finish(f"500 - Failed to create a response to {index_query}")
    -                return
    +            self.redirect(index_link)
     
         application = tornado.web.Application(
    -        url_mapping() + [
    +        url_mapping()
    +        + [
                 (r"/", IndexHandler),
                 (r"/index.html", IndexHandler),
             ]
    @@ -539,6 +619,7 @@ 

    Functions

    If preset is None, preset is loaded based on the configuration file. """ import liquer.util + global _preset if preset_def is None: preset_def = config() @@ -552,7 +633,7 @@

    Functions

    raise Exception("Preset must be a fully qualified class name") if isinstance(preset_def, str): logger.info(f"Instantiating preset {preset_def}") - preset_def = liquer.util.eval_fully_qualified_name(preset_def)() + preset_def = liquer.util.eval_fully_qualified_name(preset_def)() if isinstance(preset_def, Preset): logger.info(f"Instantance of {preset_def.preset_class()} used as preset") _preset = preset_def @@ -592,41 +673,50 @@

    Classes

    It as well can create default configuration files, initialize and start the pool, server and other services """ - modules=[] + + modules = [] + def __init__(self): pass @classmethod def preset_class(cls): """Return the preset class name""" - x="" - if cls.__module__ != "__main__": - x+=cls.__module__ - x+="." - x+=cls.__name__ + x = "" + if cls.__module__ != "__main__": + x += cls.__module__ + x += "." + x += cls.__name__ return x - + def _find_modules(self, path): """Find modules in a path containing LiQuer commands""" from glob import glob import os - for filename in glob(os.path.join(path,"*.py")): - with open(filename,"r") as f: + + for filename in glob(os.path.join(path, "*.py")): + with open(filename, "r") as f: mod = f.read() if "@command" in mod or "@first_command" in mod: yield os.path.splitext(os.path.basename(filename))[0] + def find_modules(self, path): """Find modules in a path containing LiQuer commands""" import os + for root, dirs, files in os.walk(path): rootpath = os.path.relpath(root, path) - module = rootpath.replace(os.path.sep,".") + module = rootpath.replace(os.path.sep, ".") while module.startswith("."): module = module[1:] for file in files: - if file.endswith(".py") and not file.startswith("_") and not file.startswith("test_"): - with open(os.path.join(root, file),"r") as f: + if ( + file.endswith(".py") + and not file.startswith("_") + and not file.startswith("test_") + ): + with open(os.path.join(root, file), "r") as f: mod = f.read() if "@command" in mod or "@first_command" in mod: x = module + "." + os.path.splitext(file)[0] @@ -636,9 +726,8 @@

    Classes

    def default_config(self): """Return a default configuration file in yaml format""" - modules = sorted(set(self.modules - + ["liquer.ext.meta", "liquer.ext.basic"])) - modules_list="\n".join([f" - {m}" for m in modules]) + modules = sorted(set(self.modules + ["liquer.ext.meta", "liquer.ext.basic"])) + modules_list = "\n".join([f" - {m}" for m in modules]) return f""" setup: preset: {self.preset_class():<35} # Preset class name @@ -652,9 +741,10 @@

    Classes

    server_type: {"flask":<35} # Server type (flask, tornado, ...) url_prefix: {'"/liquer"':<35} # URL prefix for the server port: {5000:<35} # Server port + index_link: {"/liquer/q/index/index.html":<35} # Index query """ - - def initialize(self, config, worker_environment=False): + + def initialize(self, config, worker_environment=False): """Initialize from configuration. If worker_environment is True, then initialize specificallz the worker environment, i.e. the worker pool should not be initialized. @@ -670,21 +760,22 @@

    Classes

    """Helper method to get a parameter from the setup section of the configuration""" if "setup" not in config: raise Exception("No setup section in configuration") - return config["setup"].get(name,default) - + return config["setup"].get(name, default) + def load_modules(self, config): """Load modules scpecified in the configuration List of modules is taken from config["setup"]["modules"] """ - for module in self.get_setup_parameter(config, "modules", []): + for module in self.get_setup_parameter(config, "modules", self.modules): logger.info(f"Loading module {module}") __import__(module, fromlist=["*"]) def create_cache(self, config): "Create cache object from configuration" import liquer.cache - cache=self.get_setup_parameter(config, "cache", "off") - if cache in ["off","none", "no", None, False]: + + cache = self.get_setup_parameter(config, "cache", "off") + if cache in ["off", "none", "no", None, False]: logger.info(f"Cache disabled") return liquer.cache.NoCache() elif cache in ["memory"]: @@ -700,15 +791,23 @@

    Classes

    def initialize_cache(self, config): """Initialize cache from configuration""" import liquer.cache + liquer.cache.set_cache(self.create_cache(config)) - + def initialize_store(self, config): """Initialize store from configuration""" import liquer.store from liquer.recipes import RecipeSpecStore - recipe_folders=self.get_setup_parameter(config, "recipe_folders", []) + from pathlib import Path + + liquer.store.get_web_store() + recipe_folders = self.get_setup_parameter(config, "recipe_folders", []) for folder_name in recipe_folders: logger.info(f"Adding recipe folder {folder_name}") + p=Path(folder_name) + if not p.exists(): + print(f"Creating folder {folder_name}") + p.mkdir(parents=True) s = RecipeSpecStore(liquer.store.FileStore(folder_name)).with_indexer() liquer.store.mount(folder_name, s) @@ -716,10 +815,11 @@

    Classes

    """Initialize pool from configuration""" import liquer.pool import liquer.cache - cache_concurrency=self.get_setup_parameter(config, "cache_concurrency") - if cache_concurrency in["off","none", "no", None, False]: + + cache_concurrency = self.get_setup_parameter(config, "cache_concurrency") + if cache_concurrency in ["off", "none", "no", None, False]: logger.info(f"Cache not configured for concurrency") - logger.warning(f"Distributed execution will not work properly") + logger.warning(f"Distributed execution will not work properly") elif cache_concurrency == "central": logger.info(f"Enabling central cache") liquer.pool.set_central_cache(liquer.cache.get_cache()) @@ -734,47 +834,51 @@

    Classes

    from liquer.state import set_var from liquer.query import evaluate - server_type=self.get_setup_parameter(config, "server_type", "flask") + server_type = self.get_setup_parameter(config, "server_type", "flask") if server_type in ["flask"]: logger.info(f"Starting flask server") import flask import liquer.server.blueprint as bp import traceback + app = flask.Flask(__name__) - url_prefix=self.get_setup_parameter(config, "url_prefix", "/liquer") - port=self.get_setup_parameter(config, "port", "5000") - flask_debug=self.get_setup_parameter(config, "debug", False) - host=self.get_setup_parameter(config, "host", "127.0.0.1") - flask_threaded=self.get_setup_parameter(config, "threaded", False) - index_query=self.get_setup_parameter(config, "index_query", "index") + url_prefix = self.get_setup_parameter(config, "url_prefix", "/liquer") + port = self.get_setup_parameter(config, "port", "5000") + flask_debug = self.get_setup_parameter(config, "debug", False) + host = self.get_setup_parameter(config, "host", "127.0.0.1") + flask_threaded = self.get_setup_parameter(config, "threaded", False) + index_link = self.get_setup_parameter(config, "index_link", "/liquer/q/index/index.html") + print(f"Index link: {index_link}") app.register_blueprint(bp.app, url_prefix=url_prefix) - - @app.route('/') - @app.route('/index.html') + + @app.route("/") + @app.route("/index.html") def index(): - try: - return bp.response(evaluate(index_query)) - except: - traceback.print_exc() - flask.abort(500) - - set_var("api_path",url_prefix+"/q/") - set_var("server",f"http://{host}:{port}") + print(f"Redirect to index link: {index_link}") + return flask.redirect(index_link, code=302) + + set_var("api_path", url_prefix + "/q/") + set_var("server", f"http://{host}:{port}") app.run(debug=flask_debug, host=host, port=port, threaded=flask_threaded) elif server_type in ["tornado"]: import asyncio + logger.info(f"Starting tornado server") - port=self.get_setup_parameter(config, "port", "5000") - url_prefix=self.get_setup_parameter(config, "url_prefix", "/liquer") - index_query=self.get_setup_parameter(config, "index_query", "index") - host=self.get_setup_parameter(config, "host", "127.0.0.1") - set_var("api_path",url_prefix+"/q/") - set_var("server",f"http://{host}:{port}") - asyncio.run(run_tornado(port, index_query=index_query)) + port = self.get_setup_parameter(config, "port", "5000") + url_prefix = self.get_setup_parameter(config, "url_prefix", "/liquer") + index_link = self.get_setup_parameter(config, "index_link", "/liquer/q/index/index.html") + host = self.get_setup_parameter(config, "host", "127.0.0.1") + set_var("api_path", url_prefix + "/q/") + set_var("server", f"http://{host}:{port}") + asyncio.run(run_tornado(port, index_link=index_link)) else: raise Exception(f"Unknown server type: {server_type}")
    +

    Subclasses

    +

    Class variables

    var modules
    @@ -798,7 +902,7 @@

    Static methods

    """Helper method to get a parameter from the setup section of the configuration""" if "setup" not in config: raise Exception("No setup section in configuration") - return config["setup"].get(name,default)
    + return config["setup"].get(name, default)
    @@ -813,11 +917,11 @@

    Static methods

    @classmethod
     def preset_class(cls):
         """Return the preset class name"""
    -    x=""
    -    if cls.__module__ != "__main__":          
    -        x+=cls.__module__
    -        x+="."
    -    x+=cls.__name__
    +    x = ""
    +    if cls.__module__ != "__main__":
    +        x += cls.__module__
    +        x += "."
    +    x += cls.__name__
         return x
    @@ -836,8 +940,9 @@

    Methods

    def create_cache(self, config):
         "Create cache object from configuration"
         import liquer.cache
    -    cache=self.get_setup_parameter(config, "cache", "off")
    -    if cache in ["off","none", "no", None, False]:
    +
    +    cache = self.get_setup_parameter(config, "cache", "off")
    +    if cache in ["off", "none", "no", None, False]:
             logger.info(f"Cache disabled")
             return liquer.cache.NoCache()
         elif cache in ["memory"]:
    @@ -862,9 +967,8 @@ 

    Methods

        def default_config(self):
             """Return a default configuration file in yaml format"""
    -        modules = sorted(set(self.modules
    -                             + ["liquer.ext.meta", "liquer.ext.basic"]))
    -        modules_list="\n".join([f"      - {m}" for m in modules])
    +        modules = sorted(set(self.modules + ["liquer.ext.meta", "liquer.ext.basic"]))
    +        modules_list = "\n".join([f"      - {m}" for m in modules])
             return f"""
     setup:
         preset:            {self.preset_class():<35} # Preset class name
    @@ -878,6 +982,7 @@ 

    Methods

    server_type: {"flask":<35} # Server type (flask, tornado, ...) url_prefix: {'"/liquer"':<35} # URL prefix for the server port: {5000:<35} # Server port + index_link: {"/liquer/q/index/index.html":<35} # Index query """
    @@ -893,15 +998,20 @@

    Methods

    def find_modules(self, path):
         """Find modules in a path containing LiQuer commands"""
         import os
    +
         for root, dirs, files in os.walk(path):
             rootpath = os.path.relpath(root, path)
    -        module = rootpath.replace(os.path.sep,".")
    +        module = rootpath.replace(os.path.sep, ".")
             while module.startswith("."):
                 module = module[1:]
     
             for file in files:
    -            if file.endswith(".py") and not file.startswith("_") and not file.startswith("test_"):
    -                with open(os.path.join(root, file),"r") as f:
    +            if (
    +                file.endswith(".py")
    +                and not file.startswith("_")
    +                and not file.startswith("test_")
    +            ):
    +                with open(os.path.join(root, file), "r") as f:
                         mod = f.read()
                         if "@command" in mod or "@first_command" in mod:
                             x = module + "." + os.path.splitext(file)[0]
    @@ -921,7 +1031,7 @@ 

    Methods

    Expand source code -
    def initialize(self, config, worker_environment=False):        
    +
    def initialize(self, config, worker_environment=False):
         """Initialize from configuration.
         If worker_environment is True, then initialize specificallz the worker environment,
         i.e. the worker pool should not be initialized.
    @@ -945,6 +1055,7 @@ 

    Methods

    def initialize_cache(self, config):
         """Initialize cache from configuration"""
         import liquer.cache
    +
         liquer.cache.set_cache(self.create_cache(config))
    @@ -961,10 +1072,11 @@

    Methods

    """Initialize pool from configuration""" import liquer.pool import liquer.cache - cache_concurrency=self.get_setup_parameter(config, "cache_concurrency") - if cache_concurrency in["off","none", "no", None, False]: + + cache_concurrency = self.get_setup_parameter(config, "cache_concurrency") + if cache_concurrency in ["off", "none", "no", None, False]: logger.info(f"Cache not configured for concurrency") - logger.warning(f"Distributed execution will not work properly") + logger.warning(f"Distributed execution will not work properly") elif cache_concurrency == "central": logger.info(f"Enabling central cache") liquer.pool.set_central_cache(liquer.cache.get_cache()) @@ -988,9 +1100,16 @@

    Methods

    """Initialize store from configuration""" import liquer.store from liquer.recipes import RecipeSpecStore - recipe_folders=self.get_setup_parameter(config, "recipe_folders", []) + from pathlib import Path + + liquer.store.get_web_store() + recipe_folders = self.get_setup_parameter(config, "recipe_folders", []) for folder_name in recipe_folders: logger.info(f"Adding recipe folder {folder_name}") + p=Path(folder_name) + if not p.exists(): + print(f"Creating folder {folder_name}") + p.mkdir(parents=True) s = RecipeSpecStore(liquer.store.FileStore(folder_name)).with_indexer() liquer.store.mount(folder_name, s)
    @@ -1009,7 +1128,7 @@

    Methods

    """Load modules scpecified in the configuration List of modules is taken from config["setup"]["modules"] """ - for module in self.get_setup_parameter(config, "modules", []): + for module in self.get_setup_parameter(config, "modules", self.modules): logger.info(f"Loading module {module}") __import__(module, fromlist=["*"])
    @@ -1028,50 +1147,144 @@

    Methods

    from liquer.state import set_var from liquer.query import evaluate - server_type=self.get_setup_parameter(config, "server_type", "flask") + server_type = self.get_setup_parameter(config, "server_type", "flask") if server_type in ["flask"]: logger.info(f"Starting flask server") import flask import liquer.server.blueprint as bp import traceback + app = flask.Flask(__name__) - url_prefix=self.get_setup_parameter(config, "url_prefix", "/liquer") - port=self.get_setup_parameter(config, "port", "5000") - flask_debug=self.get_setup_parameter(config, "debug", False) - host=self.get_setup_parameter(config, "host", "127.0.0.1") - flask_threaded=self.get_setup_parameter(config, "threaded", False) - index_query=self.get_setup_parameter(config, "index_query", "index") + url_prefix = self.get_setup_parameter(config, "url_prefix", "/liquer") + port = self.get_setup_parameter(config, "port", "5000") + flask_debug = self.get_setup_parameter(config, "debug", False) + host = self.get_setup_parameter(config, "host", "127.0.0.1") + flask_threaded = self.get_setup_parameter(config, "threaded", False) + index_link = self.get_setup_parameter(config, "index_link", "/liquer/q/index/index.html") + print(f"Index link: {index_link}") app.register_blueprint(bp.app, url_prefix=url_prefix) - - @app.route('/') - @app.route('/index.html') + + @app.route("/") + @app.route("/index.html") def index(): - try: - return bp.response(evaluate(index_query)) - except: - traceback.print_exc() - flask.abort(500) - - set_var("api_path",url_prefix+"/q/") - set_var("server",f"http://{host}:{port}") + print(f"Redirect to index link: {index_link}") + return flask.redirect(index_link, code=302) + + set_var("api_path", url_prefix + "/q/") + set_var("server", f"http://{host}:{port}") app.run(debug=flask_debug, host=host, port=port, threaded=flask_threaded) elif server_type in ["tornado"]: import asyncio + logger.info(f"Starting tornado server") - port=self.get_setup_parameter(config, "port", "5000") - url_prefix=self.get_setup_parameter(config, "url_prefix", "/liquer") - index_query=self.get_setup_parameter(config, "index_query", "index") - host=self.get_setup_parameter(config, "host", "127.0.0.1") - set_var("api_path",url_prefix+"/q/") - set_var("server",f"http://{host}:{port}") - asyncio.run(run_tornado(port, index_query=index_query)) + port = self.get_setup_parameter(config, "port", "5000") + url_prefix = self.get_setup_parameter(config, "url_prefix", "/liquer") + index_link = self.get_setup_parameter(config, "index_link", "/liquer/q/index/index.html") + host = self.get_setup_parameter(config, "host", "127.0.0.1") + set_var("api_path", url_prefix + "/q/") + set_var("server", f"http://{host}:{port}") + asyncio.run(run_tornado(port, index_link=index_link)) else: raise Exception(f"Unknown server type: {server_type}")
    +
    +class RichPreset +
    +
    +

    A preset is a configuration object, which can be used to initialize liquer. +It's role is to +* interpret the configuration file +* initialize commands +* initialize caches +* initialize store

    +

    It as well can create default configuration files, +initialize and start the pool, server and other services

    +
    + +Expand source code + +
    class RichPreset(Preset):
    +    modules = [
    +        "liquer.ext.dataframe_batches",
    +        "liquer.ext.lq_matplotlib",
    +        "liquer.ext.lq_openpyxl",
    +        "liquer.ext.lq_pandas",
    +        "liquer.ext.lq_pil",
    +        "liquer.ext.lq_plotly",
    +        "liquer.ext.lq_polars",
    +        "liquer.ext.lq_pygments",
    +        "liquer.ext.lq_python",
    +        "liquer.ext.lq_sweetviz",
    +        "liquer.ext.basic",
    +        "liquer.ext.meta",
    +        "liquer_pcv",
    +        "liquer_gui"
    +    ]
    +    def default_config(self):
    +        """Return a default configuration file in yaml format"""
    +        modules = sorted(set(self.modules + ["liquer.ext.meta", "liquer.ext.basic"]))
    +        modules_list = "\n".join([f"      - {m}" for m in modules])
    +        recipe_folders ="\n".join (f"      - {m}" for m in ["data", "reports"])
    +        return f"""
    +setup:
    +    preset:            {self.preset_class():<35} # Preset class name
    +    modules:           {"":<35} # Modules with commands to import
    +{modules_list}
    +    cache:             {"off":<35} # Cache type (off, memory, file, ...)
    +    cache_path:        {"cache":<35} # Cache path (for file cache)
    +    cache_concurrency: {"central":<35} # Cache concurrency (off, local, central)
    +    recipe_folders:    {"":<35} # Recipe folders
    +{recipe_folders}
    +    server_type:       {"flask":<35} # Server type (flask, tornado, ...)
    +    url_prefix:        {'"/liquer"':<35} # URL prefix for the server
    +    port:              {5000:<35} # Server port
    +    index_query:       {"/liquer/web/gui":<35} # Index query
    +"""
    +    @classmethod
    +    def get_setup_parameter(cls, config, name, default=None):
    +        """Helper method to get a parameter from the setup section of the configuration"""
    +        if "setup" not in config:
    +            raise Exception("No setup section in configuration")
    +        if name == "index_link":
    +            return config["setup"].get("index_link", "/liquer/web/gui")
    +        if name == "recipe_folders":
    +            return config["setup"].get("recipe_folders", ["data", "reports"])
    +        return config["setup"].get(name, default)
    +
    +

    Ancestors

    + +

    Class variables

    +
    +
    var modules
    +
    +
    +
    +
    +

    Inherited members

    + +
    @@ -1117,6 +1330,12 @@

    Presetstart_server +
  • +

    RichPreset

    + +
  • diff --git a/site/apidocs/liquer/context.html b/site/apidocs/liquer/context.html index 46b26d4..b06f4b8 100644 --- a/site/apidocs/liquer/context.html +++ b/site/apidocs/liquer/context.html @@ -920,10 +920,19 @@

    Module liquer.context

    traceback.print_exc() return state - def create_initial_state(self): - state = State() - state.query = "" - return state + def create_initial_state(self, input_value=None): + """Create an initial state. + This is used to create the input state for a resource query. + The initial value can be specified as an argument. + """ + if input_value is None: + state = State() + state.query = "" + return state + else: + state = State().with_data(input_value) + state.query = "" + return state @classmethod def to_query(cls, query): @@ -991,6 +1000,7 @@

    Module liquer.context

    store_key=None, store_to=None, extra_parameters=None, + input_value=None, ): """Evaluate query, returns a State. This method can be used in a command to evaluate a subquery, @@ -1006,6 +1016,8 @@

    Module liquer.context

    If extra_parameters are specified, these parameters are appended to the parameters of the last action. This effectively renders the evaluation volatile. Note that the action needs correct amount of parameters. + + The input_value parameter can be used to specify the input value for the first action. """ self.enable_store_metadata = False # Prevents overwriting cache with metadata self.status = Status.EVALUATION @@ -1049,7 +1061,7 @@

    Module liquer.context

    self.debug(f"Using cache {repr(cache)}") self.debug(f"Try cache {query}") - if extra_parameters is None or len(extra_parameters)==0: + if (extra_parameters is None or len(extra_parameters)==0) and input_value is None: state = cache.get(query.encode()) if state is not None: self.debug(f"Cache hit {query}") @@ -1058,8 +1070,12 @@

    Module liquer.context

    return state else: state=None - print("Extra parameters specified, cache disabled", extra_parameters) - self.debug("Extra parameters specified, cache disabled") + if input_value is not None: + print("Input value specified, cache disabled") + self.debug("Input value specified, cache disabled") + else: + print("Extra parameters specified, cache disabled", extra_parameters) + self.debug("Extra parameters specified, cache disabled") self.enable_store_metadata = ( True # Metadata can be only written after trying to read from cache, ) @@ -1078,7 +1094,7 @@

    Module liquer.context

    self.debug(f"PROCESS Predecessor:{p} Action: {r}") if p is None or p.is_empty(): self.parent_query = "" - state = self.create_initial_state() + state = self.create_initial_state(input_value=input_value) state.metadata["created"] = self.now() self.debug(f"INITIAL STATE") else: @@ -1088,7 +1104,7 @@

    Module liquer.context

    c=self.child_context() c.evaluated_key = self.evaluated_key c.cwd_key = self.cwd_key - state = c.evaluate(p, cache=cache) + state = c.evaluate(p, cache=cache, input_value=input_value) if state.is_error: self.status = Status.ERROR self.store_metadata() @@ -1145,6 +1161,25 @@

    Module liquer.context

    self.warning("Indexer failed", traceback=traceback.format_exc()) return state + def evaluate_on(self, value, query): + """Evaluate query on a given value. + This is a convenience method, which creates a context, evaluates the query and returns the result. + """ + print(f"*** Evaluate on {value} query {query} started") + return self.child_context().evaluate(query, input_value=value) + + def create_evaluate_on_state_function(self, query): + """Create a function, which evaluates query on a given value. + This is a convenience method, which creates a context, evaluates the query and returns the result. + """ + return lambda value=None, q=query, c=self: c.evaluate_on(value, query) + + def create_evaluate_on_function(self, query): + """Create a function, which evaluates query on a given value. + This is a convenience method, which creates a context, evaluates the query and returns the result. + """ + return lambda value=None, q=query, c=self: c.evaluate_on(value, query).get() + def evaluate_and_save( self, query, @@ -1243,14 +1278,20 @@

    Module liquer.context

    if path is not None: q = parse(q).to_absolute(path, resource_segment_name=resource_segment_name).encode() if q in local_cache: + self.info(f"Using cached {q}") result += local_cache[q] else: - state = self.evaluate(q, description=f"template expansion of {q}") - if state.is_error: - self.error(f"Template failed to expand {q}") + self.info(f"Evaluating {q}") + try: + state = self.evaluate(q, description=f"template expansion of {q}") + if state.is_error: + self.error(f"Template failed to expand {q}") + qr = f"ERROR({q})" + else: + qr = str(state.get()) + except: + self.error(f"Template crashed on expanding {q}", traceback=traceback.format_exc()) qr = f"ERROR({q})" - else: - qr = str(state.get()) local_cache[q] = qr result += qr return result

    @@ -1971,10 +2012,19 @@

    Classes

    traceback.print_exc() return state - def create_initial_state(self): - state = State() - state.query = "" - return state + def create_initial_state(self, input_value=None): + """Create an initial state. + This is used to create the input state for a resource query. + The initial value can be specified as an argument. + """ + if input_value is None: + state = State() + state.query = "" + return state + else: + state = State().with_data(input_value) + state.query = "" + return state @classmethod def to_query(cls, query): @@ -2042,6 +2092,7 @@

    Classes

    store_key=None, store_to=None, extra_parameters=None, + input_value=None, ): """Evaluate query, returns a State. This method can be used in a command to evaluate a subquery, @@ -2057,6 +2108,8 @@

    Classes

    If extra_parameters are specified, these parameters are appended to the parameters of the last action. This effectively renders the evaluation volatile. Note that the action needs correct amount of parameters. + + The input_value parameter can be used to specify the input value for the first action. """ self.enable_store_metadata = False # Prevents overwriting cache with metadata self.status = Status.EVALUATION @@ -2100,7 +2153,7 @@

    Classes

    self.debug(f"Using cache {repr(cache)}") self.debug(f"Try cache {query}") - if extra_parameters is None or len(extra_parameters)==0: + if (extra_parameters is None or len(extra_parameters)==0) and input_value is None: state = cache.get(query.encode()) if state is not None: self.debug(f"Cache hit {query}") @@ -2109,8 +2162,12 @@

    Classes

    return state else: state=None - print("Extra parameters specified, cache disabled", extra_parameters) - self.debug("Extra parameters specified, cache disabled") + if input_value is not None: + print("Input value specified, cache disabled") + self.debug("Input value specified, cache disabled") + else: + print("Extra parameters specified, cache disabled", extra_parameters) + self.debug("Extra parameters specified, cache disabled") self.enable_store_metadata = ( True # Metadata can be only written after trying to read from cache, ) @@ -2129,7 +2186,7 @@

    Classes

    self.debug(f"PROCESS Predecessor:{p} Action: {r}") if p is None or p.is_empty(): self.parent_query = "" - state = self.create_initial_state() + state = self.create_initial_state(input_value=input_value) state.metadata["created"] = self.now() self.debug(f"INITIAL STATE") else: @@ -2139,7 +2196,7 @@

    Classes

    c=self.child_context() c.evaluated_key = self.evaluated_key c.cwd_key = self.cwd_key - state = c.evaluate(p, cache=cache) + state = c.evaluate(p, cache=cache, input_value=input_value) if state.is_error: self.status = Status.ERROR self.store_metadata() @@ -2196,6 +2253,25 @@

    Classes

    self.warning("Indexer failed", traceback=traceback.format_exc()) return state + def evaluate_on(self, value, query): + """Evaluate query on a given value. + This is a convenience method, which creates a context, evaluates the query and returns the result. + """ + print(f"*** Evaluate on {value} query {query} started") + return self.child_context().evaluate(query, input_value=value) + + def create_evaluate_on_state_function(self, query): + """Create a function, which evaluates query on a given value. + This is a convenience method, which creates a context, evaluates the query and returns the result. + """ + return lambda value=None, q=query, c=self: c.evaluate_on(value, query) + + def create_evaluate_on_function(self, query): + """Create a function, which evaluates query on a given value. + This is a convenience method, which creates a context, evaluates the query and returns the result. + """ + return lambda value=None, q=query, c=self: c.evaluate_on(value, query).get() + def evaluate_and_save( self, query, @@ -2294,14 +2370,20 @@

    Classes

    if path is not None: q = parse(q).to_absolute(path, resource_segment_name=resource_segment_name).encode() if q in local_cache: + self.info(f"Using cached {q}") result += local_cache[q] else: - state = self.evaluate(q, description=f"template expansion of {q}") - if state.is_error: - self.error(f"Template failed to expand {q}") + self.info(f"Evaluating {q}") + try: + state = self.evaluate(q, description=f"template expansion of {q}") + if state.is_error: + self.error(f"Template failed to expand {q}") + qr = f"ERROR({q})" + else: + qr = str(state.get()) + except: + self.error(f"Template crashed on expanding {q}", traceback=traceback.format_exc()) qr = f"ERROR({q})" - else: - qr = str(state.get()) local_cache[q] = qr result += qr return result
    @@ -2425,19 +2507,64 @@

    Methods

    return command_registry() +
    +def create_evaluate_on_function(self, query) +
    +
    +

    Create a function, which evaluates query on a given value. +This is a convenience method, which creates a context, evaluates the query and returns the result.

    +
    + +Expand source code + +
    def create_evaluate_on_function(self, query):
    +    """Create a function, which evaluates query on a given value.
    +    This is a convenience method, which creates a context, evaluates the query and returns the result.
    +    """
    +    return lambda value=None, q=query, c=self: c.evaluate_on(value, query).get()
    +
    +
    +
    +def create_evaluate_on_state_function(self, query) +
    +
    +

    Create a function, which evaluates query on a given value. +This is a convenience method, which creates a context, evaluates the query and returns the result.

    +
    + +Expand source code + +
    def create_evaluate_on_state_function(self, query):
    +    """Create a function, which evaluates query on a given value.
    +    This is a convenience method, which creates a context, evaluates the query and returns the result.
    +    """
    +    return lambda value=None, q=query, c=self: c.evaluate_on(value, query)
    +
    +
    -def create_initial_state(self) +def create_initial_state(self, input_value=None)
    -
    +

    Create an initial state. +This is used to create the input state for a resource query. +The initial value can be specified as an argument.

    Expand source code -
    def create_initial_state(self):
    -    state = State()
    -    state.query = ""
    -    return state
    +
    def create_initial_state(self, input_value=None):
    +    """Create an initial state.
    +    This is used to create the input state for a resource query.
    +    The initial value can be specified as an argument.
    +    """
    +    if input_value is None:
    +        state = State()
    +        state.query = ""
    +        return state
    +    else:
    +        state = State().with_data(input_value)
    +        state.query = ""
    +        return state
    @@ -2482,7 +2609,7 @@

    Methods

    -def evaluate(self, query, cache=None, description=None, store_key=None, store_to=None, extra_parameters=None) +def evaluate(self, query, cache=None, description=None, store_key=None, store_to=None, extra_parameters=None, input_value=None)

    Evaluate query, returns a State. @@ -2495,7 +2622,8 @@

    Methods

    Evaluation can be (besides cache) stored in the store under the key specified by the store_key (if not None). A store can be specified too via the store_to option. If None (default), the default store (from the store method) is used.

    If extra_parameters are specified, these parameters are appended to the parameters of the last action. -This effectively renders the evaluation volatile. Note that the action needs correct amount of parameters.

    +This effectively renders the evaluation volatile. Note that the action needs correct amount of parameters.

    +

    The input_value parameter can be used to specify the input value for the first action.

    Expand source code @@ -2508,6 +2636,7 @@

    Methods

    store_key=None, store_to=None, extra_parameters=None, + input_value=None, ): """Evaluate query, returns a State. This method can be used in a command to evaluate a subquery, @@ -2523,6 +2652,8 @@

    Methods

    If extra_parameters are specified, these parameters are appended to the parameters of the last action. This effectively renders the evaluation volatile. Note that the action needs correct amount of parameters. + + The input_value parameter can be used to specify the input value for the first action. """ self.enable_store_metadata = False # Prevents overwriting cache with metadata self.status = Status.EVALUATION @@ -2566,7 +2697,7 @@

    Methods

    self.debug(f"Using cache {repr(cache)}") self.debug(f"Try cache {query}") - if extra_parameters is None or len(extra_parameters)==0: + if (extra_parameters is None or len(extra_parameters)==0) and input_value is None: state = cache.get(query.encode()) if state is not None: self.debug(f"Cache hit {query}") @@ -2575,8 +2706,12 @@

    Methods

    return state else: state=None - print("Extra parameters specified, cache disabled", extra_parameters) - self.debug("Extra parameters specified, cache disabled") + if input_value is not None: + print("Input value specified, cache disabled") + self.debug("Input value specified, cache disabled") + else: + print("Extra parameters specified, cache disabled", extra_parameters) + self.debug("Extra parameters specified, cache disabled") self.enable_store_metadata = ( True # Metadata can be only written after trying to read from cache, ) @@ -2595,7 +2730,7 @@

    Methods

    self.debug(f"PROCESS Predecessor:{p} Action: {r}") if p is None or p.is_empty(): self.parent_query = "" - state = self.create_initial_state() + state = self.create_initial_state(input_value=input_value) state.metadata["created"] = self.now() self.debug(f"INITIAL STATE") else: @@ -2605,7 +2740,7 @@

    Methods

    c=self.child_context() c.evaluated_key = self.evaluated_key c.cwd_key = self.cwd_key - state = c.evaluate(p, cache=cache) + state = c.evaluate(p, cache=cache, input_value=input_value) if state.is_error: self.status = Status.ERROR self.store_metadata() @@ -2930,6 +3065,24 @@

    Methods

    return state
    +
    +def evaluate_on(self, value, query) +
    +
    +

    Evaluate query on a given value. +This is a convenience method, which creates a context, evaluates the query and returns the result.

    +
    + +Expand source code + +
    def evaluate_on(self, value, query):
    +    """Evaluate query on a given value.
    +    This is a convenience method, which creates a context, evaluates the query and returns the result.
    +    """
    +    print(f"*** Evaluate on {value} query {query} started")
    +    return self.child_context().evaluate(query, input_value=value)
    +
    +
    def evaluate_parameter(self, p, action)
    @@ -3115,14 +3268,20 @@

    Methods

    if path is not None: q = parse(q).to_absolute(path, resource_segment_name=resource_segment_name).encode() if q in local_cache: + self.info(f"Using cached {q}") result += local_cache[q] else: - state = self.evaluate(q, description=f"template expansion of {q}") - if state.is_error: - self.error(f"Template failed to expand {q}") + self.info(f"Evaluating {q}") + try: + state = self.evaluate(q, description=f"template expansion of {q}") + if state.is_error: + self.error(f"Template failed to expand {q}") + qr = f"ERROR({q})" + else: + qr = str(state.get()) + except: + self.error(f"Template crashed on expanding {q}", traceback=traceback.format_exc()) qr = f"ERROR({q})" - else: - qr = str(state.get()) local_cache[q] = qr result += qr return result @@ -4060,6 +4219,8 @@

    Conte
  • can_report
  • child_context
  • command_registry
  • +
  • create_evaluate_on_function
  • +
  • create_evaluate_on_state_function
  • create_initial_state
  • create_state
  • disable_cache
  • @@ -4067,6 +4228,7 @@

    Conte
  • evaluate
  • evaluate_action
  • evaluate_and_save
  • +
  • evaluate_on
  • evaluate_parameter
  • evaluate_resource
  • evaluate_template
  • diff --git a/site/apidocs/liquer/ext/meta.html b/site/apidocs/liquer/ext/meta.html index b5ade0e..3e48888 100644 --- a/site/apidocs/liquer/ext/meta.html +++ b/site/apidocs/liquer/ext/meta.html @@ -246,7 +246,11 @@

    Module liquer.ext.meta

    data = [] if store.is_dir(key): for name in store.listdir(key): - metadata = store.get_metadata(store.join_key(key, name)) + try: + metadata = store.get_metadata(store.join_key(key, name)) + except: + context.warning(f"Can't read metadata for {store.join_key(key, name)}", traceback=traceback.format_exc()) + continue fileinfo = metadata.get("fileinfo", {}) data.append( dict( @@ -676,7 +680,11 @@

    Functions

    data = [] if store.is_dir(key): for name in store.listdir(key): - metadata = store.get_metadata(store.join_key(key, name)) + try: + metadata = store.get_metadata(store.join_key(key, name)) + except: + context.warning(f"Can't read metadata for {store.join_key(key, name)}", traceback=traceback.format_exc()) + continue fileinfo = metadata.get("fileinfo", {}) data.append( dict( diff --git a/site/apidocs/liquer/index.html b/site/apidocs/liquer/index.html index 1701a1d..dfc460a 100644 --- a/site/apidocs/liquer/index.html +++ b/site/apidocs/liquer/index.html @@ -39,7 +39,7 @@

    Package liquer

    from liquer.query import evaluate, evaluate_and_save, evaluate_template from liquer.context import get_context -__version__ = "0.8.0"
    +__version__ = "0.9.4"
    diff --git a/site/apidocs/liquer/recipes.html b/site/apidocs/liquer/recipes.html index 251b1df..62f0c23 100644 --- a/site/apidocs/liquer/recipes.html +++ b/site/apidocs/liquer/recipes.html @@ -571,7 +571,7 @@

    Module liquer.recipes

    if self.substore.is_dir(dir_key): for d in self.listdir(dir_key): key = f"{dir_key}/{d}" if len(dir_key) else d - if d == self.STATUS_FILE: + if d == self.STATUS_FILE and self.STATUS_FILE is not None: continue if not self.is_dir(key): metadata = self.get_metadata(key) @@ -629,6 +629,8 @@

    Module liquer.recipes

    return txt def create_status(self, key): + if self.STATUS_FILE is None: + return if self.key_name(key) != self.STATUS_FILE: if not self.is_dir(key): key = self.parent_key(key) @@ -2037,7 +2039,7 @@

    Inherited members

    if self.substore.is_dir(dir_key): for d in self.listdir(dir_key): key = f"{dir_key}/{d}" if len(dir_key) else d - if d == self.STATUS_FILE: + if d == self.STATUS_FILE and self.STATUS_FILE is not None: continue if not self.is_dir(key): metadata = self.get_metadata(key) @@ -2095,6 +2097,8 @@

    Inherited members

    return txt def create_status(self, key): + if self.STATUS_FILE is None: + return if self.key_name(key) != self.STATUS_FILE: if not self.is_dir(key): key = self.parent_key(key) @@ -2168,6 +2172,8 @@

    Methods

    Expand source code
    def create_status(self, key):
    +    if self.STATUS_FILE is None:
    +        return
         if self.key_name(key) != self.STATUS_FILE:
             if not self.is_dir(key):
                 key = self.parent_key(key)
    @@ -2199,7 +2205,7 @@ 

    Methods

    if self.substore.is_dir(dir_key): for d in self.listdir(dir_key): key = f"{dir_key}/{d}" if len(dir_key) else d - if d == self.STATUS_FILE: + if d == self.STATUS_FILE and self.STATUS_FILE is not None: continue if not self.is_dir(key): metadata = self.get_metadata(key) diff --git a/site/apidocs/liquer/server/index.html b/site/apidocs/liquer/server/index.html index 22fa438..1e3a4d2 100644 --- a/site/apidocs/liquer/server/index.html +++ b/site/apidocs/liquer/server/index.html @@ -37,6 +37,10 @@

    Sub-modules

    Flask blueprint for LiQuer server

    +
    liquer.server.fastapi
    +
    +

    FastAPI router for LiQuer server

    +
    liquer.server.handlers

    Handlers for LiQuer server @@ -69,6 +73,7 @@

    Index

  • Sub-modules

    diff --git a/site/guide/index.html b/site/guide/index.html index 479a85c..2ab71d3 100644 --- a/site/guide/index.html +++ b/site/guide/index.html @@ -101,7 +101,7 @@

    Instalation

    -

    LiQuer requires at least python 3.6 with flask. It can be installed by

    +

    LiQuer requires (at minimum) python 3.6 with flask. It can be installed by

    python3 -m pip install liquer-framework
     
    @@ -115,7 +115,7 @@

    Instalation

  • Getting started

    -

    The good tradition is to start with a Hello, world! example:

    +

    The good tradition is starting with a Hello, world! example:

    from liquer import *
     
     @first_command
    @@ -170,7 +170,7 @@ 

    Getting started

    In this example we just evaluate them in the script by the evaluate function.

    What did we actually gain?

    -

    Link query syntax allows to represent pipelines are relatively short strings. +

    Link query syntax allows to represent pipelines as short strings. More importantly, link query can be used as a path part of the URL. Unlike the more conventional web services typically a separate request @@ -180,12 +180,7 @@

    What did we actually gain?

    LiQuer has a well-defined web service API

    A server version of the same example:

    from liquer import *
    -
    -### Create Flask app and register LiQuer blueprint
    -from flask import Flask
    -import liquer.blueprint as bp
    -app = Flask(__name__)
    -app.register_blueprint(bp.app, url_prefix='/liquer')
    +from liquer.app import quickstart
     
     @first_command
     def hello():
    @@ -196,12 +191,15 @@ 

    What did we actually gain?

    return f"{greeting}, {who}!" if __name__ == '__main__': - app.run() + quickstart(index_link="/liquer/q/hello/greet/readme.txt")
    -

    This is a normal flask server, registering LiQuer -blueprint which makes all the LiQuer functionality available -in the web service.

    +

    This is a quick way how to start a liquer server. It should automatically call the link /liquer/q/hello/greet/readme.txt, +which executes the query hello/greet. The result is exposed as readme.txt. The name (readme) is arbitrary, but the file extension (txt) +is significant, since it determines the output format. +The /liquer/q is an endpoint for executing a query (see web service API).

    +

    The quickstart is one of the simplest methods to start the LiQuer in server mode. +LiQuer framework offers, however, many ways to configure and adapt the solution.

    Working with pandas

    Pandas example:

    from liquer import *
    diff --git a/site/index.html b/site/index.html
    index f3a4246..9451e52 100644
    --- a/site/index.html
    +++ b/site/index.html
    @@ -193,5 +193,5 @@ 

    Hello, world!

    diff --git a/site/search/search_index.json b/site/search/search_index.json index e62fe2f..a5f246c 100644 --- a/site/search/search_index.json +++ b/site/search/search_index.json @@ -1 +1 @@ -{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Welcome to LiQuer LiQuer is a leightweighted open-source framework (see repo ) covering a large number of usecases associated with machine learning, data science and other computational experimentation tasks requiring flexible analysis. LiQuer is a versatile tool - it helps to create interactive dashboards and web applications, working with tables, creating charts, images, reports - but as well non-interactive batch processes. The core of Liquer is a minimalistic query language, that represents a sequence of actions as a compact (but still human readable) string, or as URL \"link\". (Hence the name Link Query.) The second pillar of LiQuer is metadata: LiQuer always keeps track of metadata associated with the data. LiQuer queries can * execute interactively in a browser, * execute non-interactively in a batch, * referenced in reports, * efficiently cache the final and intermediate results, * improve the transparency, traceability and discoverability by the use of metadata - and more. Design of LiQuer is guided by the following principles: Simplicity and flexibility - Make simple things simple, complex things possible. Batteries included - Provide useful features and integration of 3rd party libraries out of the box in a modular way. Don't stand in the way - collaborate! - Do not force one way of doing things. Be technology neutral and integrate well with other libraries and frameworks. LiQuer-enabled code should run as well without the framework and thus using LiQuer should be low risk in terms of dependencies. Make all parts modular, replaceable and customizable. LiQuer is extremely easy to use - just decorate ordinary python functions with a simple decorator. LiQuer provides integration of essential data-science tools like Pandas, Scikit-Learn and Keras without having a hard dependency on these frameworks - you need them only when you are going to use them. LiQuer's main web-framework is Flask (because of its simplicity), but other frameworks can easily be supported (there is a basic Tornado support available, others will follow as needed). LiQuer enabled code can be used (in most cases) exactly the same way as if LiQuer would not be there - so no LiQuer knowledge is needed to use your code. That makes it easy for newcommers to use the existing code, but as well start quickly contributing to a LiQuer-enabled code base. Hello, world! Let's start with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) To learn more, please continue to the user guide .","title":"Home"},{"location":"#welcome-to-liquer","text":"LiQuer is a leightweighted open-source framework (see repo ) covering a large number of usecases associated with machine learning, data science and other computational experimentation tasks requiring flexible analysis. LiQuer is a versatile tool - it helps to create interactive dashboards and web applications, working with tables, creating charts, images, reports - but as well non-interactive batch processes. The core of Liquer is a minimalistic query language, that represents a sequence of actions as a compact (but still human readable) string, or as URL \"link\". (Hence the name Link Query.) The second pillar of LiQuer is metadata: LiQuer always keeps track of metadata associated with the data. LiQuer queries can * execute interactively in a browser, * execute non-interactively in a batch, * referenced in reports, * efficiently cache the final and intermediate results, * improve the transparency, traceability and discoverability by the use of metadata - and more. Design of LiQuer is guided by the following principles: Simplicity and flexibility - Make simple things simple, complex things possible. Batteries included - Provide useful features and integration of 3rd party libraries out of the box in a modular way. Don't stand in the way - collaborate! - Do not force one way of doing things. Be technology neutral and integrate well with other libraries and frameworks. LiQuer-enabled code should run as well without the framework and thus using LiQuer should be low risk in terms of dependencies. Make all parts modular, replaceable and customizable. LiQuer is extremely easy to use - just decorate ordinary python functions with a simple decorator. LiQuer provides integration of essential data-science tools like Pandas, Scikit-Learn and Keras without having a hard dependency on these frameworks - you need them only when you are going to use them. LiQuer's main web-framework is Flask (because of its simplicity), but other frameworks can easily be supported (there is a basic Tornado support available, others will follow as needed). LiQuer enabled code can be used (in most cases) exactly the same way as if LiQuer would not be there - so no LiQuer knowledge is needed to use your code. That makes it easy for newcommers to use the existing code, but as well start quickly contributing to a LiQuer-enabled code base.","title":"Welcome to LiQuer"},{"location":"#hello-world","text":"Let's start with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) To learn more, please continue to the user guide .","title":"Hello, world!"},{"location":"commands/","text":"Commands By decorating a function with @command or @first_command , the function is registered in a command registry. Function is only registered, not modified or wrapped in any way - therefore it can be used as if it would not be decorated at all. Commands (command functions) typically need to be enabled in a LiQuer application simply by importing a module with command-decorated functions. Built-in modules need to be imported as well - this gives control about enabled features and as well allows to limit dependencies (e.g. in principle LiQuer application only requires pandas when liquer.ext.lq_pandas is imported.) When a command function is registered, metadata are extracted based on available informations and conventions: Function name becomes a name of the command. Modules can not be distinguished inside the query, therefore command (and hence functions) should have unique names even when they are defined in multiple modules. When decorated with @command , the first argument of the function will always be a state. If the first argument is called state , command function will receive the state as an instance of State , otherwise it will be just plain data. Plain data can be obtained from state by state.get() . When decorated with @first_command , command will not receive a state at all. Command registration tries to identify all the arguments and their types. The types are guessed either from type annotations (if available) or from default values. Default values and *args are suported, the **kwargs are not supported in commands. Parsed string arguments are converted to estimated types before they are passed to the command. This is done with help of argument parsers (see liquer.commands.ArgumentParser ). Command function may return any data type. If it does not return an instance of State , the returned data is automatically wrapped as a State when evaluated. The main purpose of State instance is to add metadata to the data (e.g. the query executed sofar, data sources used, type of the data, file name). It as well provides a logging functionality, which can record messages and errors during the execution of the query. See liquer.state for more info.","title":"Commands"},{"location":"commands/#commands","text":"By decorating a function with @command or @first_command , the function is registered in a command registry. Function is only registered, not modified or wrapped in any way - therefore it can be used as if it would not be decorated at all. Commands (command functions) typically need to be enabled in a LiQuer application simply by importing a module with command-decorated functions. Built-in modules need to be imported as well - this gives control about enabled features and as well allows to limit dependencies (e.g. in principle LiQuer application only requires pandas when liquer.ext.lq_pandas is imported.) When a command function is registered, metadata are extracted based on available informations and conventions: Function name becomes a name of the command. Modules can not be distinguished inside the query, therefore command (and hence functions) should have unique names even when they are defined in multiple modules. When decorated with @command , the first argument of the function will always be a state. If the first argument is called state , command function will receive the state as an instance of State , otherwise it will be just plain data. Plain data can be obtained from state by state.get() . When decorated with @first_command , command will not receive a state at all. Command registration tries to identify all the arguments and their types. The types are guessed either from type annotations (if available) or from default values. Default values and *args are suported, the **kwargs are not supported in commands. Parsed string arguments are converted to estimated types before they are passed to the command. This is done with help of argument parsers (see liquer.commands.ArgumentParser ). Command function may return any data type. If it does not return an instance of State , the returned data is automatically wrapped as a State when evaluated. The main purpose of State instance is to add metadata to the data (e.g. the query executed sofar, data sources used, type of the data, file name). It as well provides a logging functionality, which can record messages and errors during the execution of the query. See liquer.state for more info.","title":"Commands"},{"location":"examples/","text":"HDX disaggregation wizard LiQuer is a small server-side framework that can be quite helpful when building data-oriented web applications. One such example is HDX disaggregation wizard. It is a tool solving a simple task: splitting (disaggregating) a single data sheet (csv or xlsx) into multiple sheets. Sheet is split by values in the specified column (or multiple columns). Since this is a quite generic task (related to group by ), this functionality is built into liquer.ext.lq_pandas . The core of this feature is the eq command, filtering dataframe by specific values in a column, e.g. eq-a-123 keeps in the dataframe only rows where column a is 123. Command eq (equal) accepts multiple column-value pairs, e.g. eq-a-123-b-234 . In HDX the convention is to use the first row for tags. To support this comvention, teq command (tag equal) always keeps the first row of the dataframe. The disaggregation service supports both tagged and untagged data, using either eq or teq for filtering, depending on the user input. The complete flow is simple: * fetch data (command df_from ) * find unique values in a column (or multiple columns) and use them to create a list (table) of queries (command split_df ) * the queries use eq (or teq ) to filter dataframe by value(s). So, use a query like df_from-URL/split_df-COLUMN and you will get a table with queries like df_from-URL/eq-COLUMN-VALUE1 , df_from-URL/eq-COLUMN-VALUE2 . A little detail regarding the split function: There are actually four versions of this function - depending whether it is used for tagged or untagged document and whether it is quick (or query ) splits or full splits. The quick version only provides raw LiQuer queries (not the complete URL), The full split ( split_df for untagged and tsplit_df for tagged data) execute all the split queries, which might be slow. As a side effect, the results are cached (depending on the configuration, the example is using FileCache('cache') ). The complete user interface is in a single html file hdx_wizard.html , served by the flask server. Inside the user interface, LiQuer service is called multiple times e.g. to get previews or metadata: * Data previews uses the ability of LiQuer lq_pandas to convert dataframes to json, which can easily be read into javascript on the browser. First preview uses head_df command to display only a restricted part of the dataframe ( head ) * columns_info command is used to get lit of columns and eventual tags * /api/build service is used to build valid queries from javascript lists. This could be implemented directly in javascript, build service is a way to remotely call liquer.parser.encode . Integration of libhxl (example of a custom state type) Pandas is great, but there are other good libraries too e.g. tabulate . If you want to to use other data type (tabular or other), it will typically require (besides some useful commands) defining how that data can be serialized. This is done by implementing a state type . State type does several things associated with state type handling, but the most important role is handling serialization and deserialization. One excelent library used for working with humanitarian data is libhxl . Libhxl plays somewhat similar role as pandas: it reads, writes and manipulates tabular data - but it does as well understand HXL , which pandas doesn't - hence the liquer.ext.lq_hxl module. In order to allow libhxl objects to be used in liquer, we need to define a state type: HxlStateType . import hxl from liquer.state_types import StateType , register_state_type , mimetype_from_extension class HxlStateType ( StateType ): def identifier ( self ): \"Define an unique string identifier for the state type\" return \"hxl_dataset\" The identifier is important e.g. for caching, where it is stored as a part of metadata and it tells what StateType should be used for deserialization. def default_extension ( self ): \"Default file extension for the state type\" return \"csv\" def is_type_of ( self , data ): \"Check if data is of this state type\" return isinstance ( data , hxl . model . Dataset ) Default extension is used when the extension is not specified otherwise - for example if query does not end with a filename. The as_bytes and from_bytes are two most important methods, which take care of the serialization and deserialization. A state data can be serialized into multiple formats (e.g. csv, html, json...), therefore as_bytes optionally accepts a file extension and returns (besides the bytes) as well the mimetype. Th mimetype (when queried through the liquer server) becomes a part of the web service response. Note that serialization and deserialization do not necessarily need to support the same formats. E.g. html is quite nice to support in serialization, but it is too unspecific for a deserialization. def as_bytes ( self , data , extension = None ): \"\"\"Serialize data as bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () assert self . is_type_of ( data ) mimetype = mimetype_from_extension ( extension ) if extension == \"csv\" : output = \"\" . join ( data . gen_csv ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype elif extension == \"json\" : output = \"\" . join ( data . gen_json ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype else : raise Exception ( f \"Serialization: file extension { extension } is not supported by HXL dataset type.\" ) def from_bytes ( self , b : bytes , extension = None ): \"\"\"De-serialize data from bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () f = BytesIO () f . write ( b ) f . seek ( 0 ) if extension == \"csv\" : return hxl . data ( f ) raise Exception ( f \"Deserialization: file extension { extension } is not supported by HXL dataset type.\" ) Sometimes a deep copy of state data is needed - e.g. to assure that the data in the cache will not become unintentionally modified. That's why the state type should define copy method. Since libhxl dataset is immutable (?), it is OK to return just the data without making a copy. def copy ( self , data ): \"\"\"Make a deep copy of the data\"\"\" return data Once the state type class is defined, a state type instance is created and registered HXL_DATASET_STATE_TYPE = HxlStateType () register_state_type ( hxl . Dataset , HXL_DATASET_STATE_TYPE ) register_state_type ( hxl . io . HXLReader , HXL_DATASET_STATE_TYPE ) This is (currently) done for all relevant types. State types are registered in a global StateTypesRegistry object, which is responsible for registering and finding a state type instance for any state data. For more details see liquer.ext.lq_hxl module. Actually, the state type may not define a serialization and/or deserialization. There are objects that either can't be reliably serialized (e.g. matplotlib figure - as of time of writing) or serialization is otherwise undesirable. Such state types would be perfectly legal - they just could be neither cached nor served by the liquer web server. However, they could be inside the query, e.g. if matplotlib figure would be followed by image creation command, the image could be both served and cached. Reports and visualizations With the help of LiQuer, it is very easy to create both resuable visualizations with multiple views as well as documents viewable offline or suitable for printing. There are multiple markups suitable for creating reports and visualisations, but probably the easiest and most flexible are HTML documents. In LiQuer html can be easily created by returning a html text from a command. Creation of text is simplified by evaluate_template function, which processes a string ( template ) containing LiQuer queries and replaces those queries by their results. Report example is processing data from Global Food Prices Database (WFP) . It contains monthly prices for various commodities. To adapt the data to our needs we need a cople of extra commands: Month and year are in two separate columns mp_year and mp_month . For charts we need dates in YYYY-MM-DD format, which we achieve with the following command: @command def datemy ( df , y = \"mp_year\" , m = \"mp_month\" , target = \"date\" ): df . loc [:, target ] = [ \" %04d - %02d -01\" % ( int ( year ), int ( month )) for year , month in zip ( df [ y ], df [ m ])] return df To make statistics, it's handy to use pandas groupby. As an example we show count of groups, which used in the report to show number of observed prices in various markets: @command def count ( df , * groupby_columns ): df . loc [:, \"count\" ] = 1 return df . groupby ( groupby_columns ) . count () . reset_index () . loc [:, list ( groupby_columns ) + [ \"count\" ]] An example of a custom filter is a greater or equal command geq , used in the report to cut away years before a start year: @command def geq ( df , column , value : float ): index = df . loc [:, column ] >= value return df . loc [ index ,:] This is somewhat similar to eq command from the pandas support module liquer.ext.lq_pandas , but only supports numerical values, while the eq command is somewhat more general. Pandas dataframe supports quite flexible method to_html for converting dataframes to html format. Report uses for styling the popular css framework bootstrap and to display the tables nicely we just need to add some bootstrap css classes . Command as well prepends a link to the dataframe itself by the link command. This tends to be very useful in practice, allowing to conviniently import underlying raw data into a spreadsheet. @command def table ( state ): df = state . get () html = evaluate_template ( f \"\"\"(data) \"\"\" ) return html + df . to_html ( index = False , classes = \"table table-striped\" ) The core of the report is a report command. It can be applied on any dataframe containing suitable fields. This allows a large degree of flexibility - arbitrary filters can be inserted into a command chain before the report. For example, the current report can be restricted to specific markets, time periods or commodities without any additional code, just by modifying the URL. Report embeds a possibility to remove data pefore a from_year . This in principle could be done by inserting a geq command before the report (which would work fine). Passing from_year as an argument has an advantage, that the start year can become a part of the report (e.g. it can be used as a part of the title). Main part of the report is a single template, evaluated with evaluate_template . Note that LiQuer template uses as well string interpolation by python f string (PEP 498) , which is a very powerful combination. @command def report ( state , from_year = 2017 , linktype = None ): state = state . with_caching ( False ) def makelink ( url ): if linktype is None : return url extension = url . split ( \".\" )[ - 1 ] return evaluate ( f \"fetch- { encode_token ( url ) } /link- { linktype } - { extension } \" ) . get () try : source = state . sources [ 0 ] except : source = \"???\" LiQuer = ' LiQuer ' df = state . get () try : title = \",\" . join ( sorted ( df . adm0_name . unique ())) + f \" since { from_year } \" except : title = \"report\" return state . with_filename ( \"report.html\" ) . with_data ( evaluate_template ( f \"\"\" { title }
    \"HDX\"
    Generated with { LiQuer } © 2019 Orest Dubay




    { title }

    Data originate from   { source }   were processed via a { LiQuer } service. Only data after { from_year } are shown (data), complete data are  here.
    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-date-cm_name/plotly_chart-xys-date-mp_price-cm_name$
    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/plotly_chart-piexs-count-adm1_name$

    Average prices

    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-cm_name/table$

    Observations

    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/table$
    \"\"\" )) Inside the report command some more magic is used to handle links and external resources. Links are created by a nested function makelink . The main purpose is to allow three different regimes of working with links: * links to original sources (default), * serving (proxying) resources through LiQuer service and * dataurls. Links to original sources are useful if the report is used from a web service: the report size is then relatively small and thus the loading time is faster than for dataurls. Proxying resources through LiQuer service allows to cache resources by LiQuer. This may be useful on slower internet connections, when running the service without internet or behind a firewall. Dataurl link type allows saving the report as a single html file. Such a report can be used e.g. for offline browsing, archiving or sending by e-mail. All the assets are embedded inside the html file, so the report will work even when the LiQuer service is not available. Note: The embedded LiQuer queries will of course not work offline, but if necessary, the data comming out from LiQuer can be turned to a dataurl with link command; type of the link can be controlled by linktype state variable. Assuming linktype is not hardcoded (as in table command) all query links in the report could be turned to dataurls like this: filter - params / let - linktype - dataurl / report This of course could lead to extremply large report files, so it should be used carefully.","title":"Examples"},{"location":"examples/#hdx-disaggregation-wizard","text":"LiQuer is a small server-side framework that can be quite helpful when building data-oriented web applications. One such example is HDX disaggregation wizard. It is a tool solving a simple task: splitting (disaggregating) a single data sheet (csv or xlsx) into multiple sheets. Sheet is split by values in the specified column (or multiple columns). Since this is a quite generic task (related to group by ), this functionality is built into liquer.ext.lq_pandas . The core of this feature is the eq command, filtering dataframe by specific values in a column, e.g. eq-a-123 keeps in the dataframe only rows where column a is 123. Command eq (equal) accepts multiple column-value pairs, e.g. eq-a-123-b-234 . In HDX the convention is to use the first row for tags. To support this comvention, teq command (tag equal) always keeps the first row of the dataframe. The disaggregation service supports both tagged and untagged data, using either eq or teq for filtering, depending on the user input. The complete flow is simple: * fetch data (command df_from ) * find unique values in a column (or multiple columns) and use them to create a list (table) of queries (command split_df ) * the queries use eq (or teq ) to filter dataframe by value(s). So, use a query like df_from-URL/split_df-COLUMN and you will get a table with queries like df_from-URL/eq-COLUMN-VALUE1 , df_from-URL/eq-COLUMN-VALUE2 . A little detail regarding the split function: There are actually four versions of this function - depending whether it is used for tagged or untagged document and whether it is quick (or query ) splits or full splits. The quick version only provides raw LiQuer queries (not the complete URL), The full split ( split_df for untagged and tsplit_df for tagged data) execute all the split queries, which might be slow. As a side effect, the results are cached (depending on the configuration, the example is using FileCache('cache') ). The complete user interface is in a single html file hdx_wizard.html , served by the flask server. Inside the user interface, LiQuer service is called multiple times e.g. to get previews or metadata: * Data previews uses the ability of LiQuer lq_pandas to convert dataframes to json, which can easily be read into javascript on the browser. First preview uses head_df command to display only a restricted part of the dataframe ( head ) * columns_info command is used to get lit of columns and eventual tags * /api/build service is used to build valid queries from javascript lists. This could be implemented directly in javascript, build service is a way to remotely call liquer.parser.encode .","title":"HDX disaggregation wizard"},{"location":"examples/#integration-of-libhxl-example-of-a-custom-state-type","text":"Pandas is great, but there are other good libraries too e.g. tabulate . If you want to to use other data type (tabular or other), it will typically require (besides some useful commands) defining how that data can be serialized. This is done by implementing a state type . State type does several things associated with state type handling, but the most important role is handling serialization and deserialization. One excelent library used for working with humanitarian data is libhxl . Libhxl plays somewhat similar role as pandas: it reads, writes and manipulates tabular data - but it does as well understand HXL , which pandas doesn't - hence the liquer.ext.lq_hxl module. In order to allow libhxl objects to be used in liquer, we need to define a state type: HxlStateType . import hxl from liquer.state_types import StateType , register_state_type , mimetype_from_extension class HxlStateType ( StateType ): def identifier ( self ): \"Define an unique string identifier for the state type\" return \"hxl_dataset\" The identifier is important e.g. for caching, where it is stored as a part of metadata and it tells what StateType should be used for deserialization. def default_extension ( self ): \"Default file extension for the state type\" return \"csv\" def is_type_of ( self , data ): \"Check if data is of this state type\" return isinstance ( data , hxl . model . Dataset ) Default extension is used when the extension is not specified otherwise - for example if query does not end with a filename. The as_bytes and from_bytes are two most important methods, which take care of the serialization and deserialization. A state data can be serialized into multiple formats (e.g. csv, html, json...), therefore as_bytes optionally accepts a file extension and returns (besides the bytes) as well the mimetype. Th mimetype (when queried through the liquer server) becomes a part of the web service response. Note that serialization and deserialization do not necessarily need to support the same formats. E.g. html is quite nice to support in serialization, but it is too unspecific for a deserialization. def as_bytes ( self , data , extension = None ): \"\"\"Serialize data as bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () assert self . is_type_of ( data ) mimetype = mimetype_from_extension ( extension ) if extension == \"csv\" : output = \"\" . join ( data . gen_csv ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype elif extension == \"json\" : output = \"\" . join ( data . gen_json ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype else : raise Exception ( f \"Serialization: file extension { extension } is not supported by HXL dataset type.\" ) def from_bytes ( self , b : bytes , extension = None ): \"\"\"De-serialize data from bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () f = BytesIO () f . write ( b ) f . seek ( 0 ) if extension == \"csv\" : return hxl . data ( f ) raise Exception ( f \"Deserialization: file extension { extension } is not supported by HXL dataset type.\" ) Sometimes a deep copy of state data is needed - e.g. to assure that the data in the cache will not become unintentionally modified. That's why the state type should define copy method. Since libhxl dataset is immutable (?), it is OK to return just the data without making a copy. def copy ( self , data ): \"\"\"Make a deep copy of the data\"\"\" return data Once the state type class is defined, a state type instance is created and registered HXL_DATASET_STATE_TYPE = HxlStateType () register_state_type ( hxl . Dataset , HXL_DATASET_STATE_TYPE ) register_state_type ( hxl . io . HXLReader , HXL_DATASET_STATE_TYPE ) This is (currently) done for all relevant types. State types are registered in a global StateTypesRegistry object, which is responsible for registering and finding a state type instance for any state data. For more details see liquer.ext.lq_hxl module. Actually, the state type may not define a serialization and/or deserialization. There are objects that either can't be reliably serialized (e.g. matplotlib figure - as of time of writing) or serialization is otherwise undesirable. Such state types would be perfectly legal - they just could be neither cached nor served by the liquer web server. However, they could be inside the query, e.g. if matplotlib figure would be followed by image creation command, the image could be both served and cached.","title":"Integration of libhxl (example of a custom state type)"},{"location":"examples/#reports-and-visualizations","text":"With the help of LiQuer, it is very easy to create both resuable visualizations with multiple views as well as documents viewable offline or suitable for printing. There are multiple markups suitable for creating reports and visualisations, but probably the easiest and most flexible are HTML documents. In LiQuer html can be easily created by returning a html text from a command. Creation of text is simplified by evaluate_template function, which processes a string ( template ) containing LiQuer queries and replaces those queries by their results. Report example is processing data from Global Food Prices Database (WFP) . It contains monthly prices for various commodities. To adapt the data to our needs we need a cople of extra commands: Month and year are in two separate columns mp_year and mp_month . For charts we need dates in YYYY-MM-DD format, which we achieve with the following command: @command def datemy ( df , y = \"mp_year\" , m = \"mp_month\" , target = \"date\" ): df . loc [:, target ] = [ \" %04d - %02d -01\" % ( int ( year ), int ( month )) for year , month in zip ( df [ y ], df [ m ])] return df To make statistics, it's handy to use pandas groupby. As an example we show count of groups, which used in the report to show number of observed prices in various markets: @command def count ( df , * groupby_columns ): df . loc [:, \"count\" ] = 1 return df . groupby ( groupby_columns ) . count () . reset_index () . loc [:, list ( groupby_columns ) + [ \"count\" ]] An example of a custom filter is a greater or equal command geq , used in the report to cut away years before a start year: @command def geq ( df , column , value : float ): index = df . loc [:, column ] >= value return df . loc [ index ,:] This is somewhat similar to eq command from the pandas support module liquer.ext.lq_pandas , but only supports numerical values, while the eq command is somewhat more general. Pandas dataframe supports quite flexible method to_html for converting dataframes to html format. Report uses for styling the popular css framework bootstrap and to display the tables nicely we just need to add some bootstrap css classes . Command as well prepends a link to the dataframe itself by the link command. This tends to be very useful in practice, allowing to conviniently import underlying raw data into a spreadsheet. @command def table ( state ): df = state . get () html = evaluate_template ( f \"\"\"(data) \"\"\" ) return html + df . to_html ( index = False , classes = \"table table-striped\" ) The core of the report is a report command. It can be applied on any dataframe containing suitable fields. This allows a large degree of flexibility - arbitrary filters can be inserted into a command chain before the report. For example, the current report can be restricted to specific markets, time periods or commodities without any additional code, just by modifying the URL. Report embeds a possibility to remove data pefore a from_year . This in principle could be done by inserting a geq command before the report (which would work fine). Passing from_year as an argument has an advantage, that the start year can become a part of the report (e.g. it can be used as a part of the title). Main part of the report is a single template, evaluated with evaluate_template . Note that LiQuer template uses as well string interpolation by python f string (PEP 498) , which is a very powerful combination. @command def report ( state , from_year = 2017 , linktype = None ): state = state . with_caching ( False ) def makelink ( url ): if linktype is None : return url extension = url . split ( \".\" )[ - 1 ] return evaluate ( f \"fetch- { encode_token ( url ) } /link- { linktype } - { extension } \" ) . get () try : source = state . sources [ 0 ] except : source = \"???\" LiQuer = ' LiQuer ' df = state . get () try : title = \",\" . join ( sorted ( df . adm0_name . unique ())) + f \" since { from_year } \" except : title = \"report\" return state . with_filename ( \"report.html\" ) . with_data ( evaluate_template ( f \"\"\" { title }
    \"HDX\"
    Generated with { LiQuer } © 2019 Orest Dubay




    { title }

    Data originate from   { source }   were processed via a { LiQuer } service. Only data after { from_year } are shown (data), complete data are  here.
    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-date-cm_name/plotly_chart-xys-date-mp_price-cm_name$
    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/plotly_chart-piexs-count-adm1_name$

    Average prices

    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-cm_name/table$

    Observations

    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/table$
    \"\"\" )) Inside the report command some more magic is used to handle links and external resources. Links are created by a nested function makelink . The main purpose is to allow three different regimes of working with links: * links to original sources (default), * serving (proxying) resources through LiQuer service and * dataurls. Links to original sources are useful if the report is used from a web service: the report size is then relatively small and thus the loading time is faster than for dataurls. Proxying resources through LiQuer service allows to cache resources by LiQuer. This may be useful on slower internet connections, when running the service without internet or behind a firewall. Dataurl link type allows saving the report as a single html file. Such a report can be used e.g. for offline browsing, archiving or sending by e-mail. All the assets are embedded inside the html file, so the report will work even when the LiQuer service is not available. Note: The embedded LiQuer queries will of course not work offline, but if necessary, the data comming out from LiQuer can be turned to a dataurl with link command; type of the link can be controlled by linktype state variable. Assuming linktype is not hardcoded (as in table command) all query links in the report could be turned to dataurls like this: filter - params / let - linktype - dataurl / report This of course could lead to extremply large report files, so it should be used carefully.","title":"Reports and visualizations"},{"location":"guide/","text":"Instalation LiQuer requires at least python 3.6 with flask. It can be installed by python3 -m pip install liquer-framework Alternatively you can get LiQuer from repository : git clone https://github.com/orest-d/liquer.git python3 -m venv venv source venv/bin/activate cd liquer python3 setup.py install Getting started The good tradition is to start with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) In this example we first create to commands - hello and greet . Commands are ordinary python functions decorated with either @first_command or @command . A sequence of commands can be written as a link query (the main concept of LiQuer). Simple query is a sequence of actions (commands with parameters) separated by slash (\"/\"). A query is evaluated from left to right, always passing the output as a first argument to the next action. For example the query hello/greet is roughly equivalent to evaluating greet ( hello ()) Query hello/greet-everybody (in the end of the example) is equivalent to greet ( hello (), \"everybody\" ) Here we specify the second argument to the function greet in the query. the arguments are separated by dash (\"-\"). (This choice might look unusual, but it allows using such a query as a part of URL . (Link query syntax requires treating \"/\" and \"-\" as special characters and escape them when needed - as we will explain in the query chapter. If the actions are always passing the result into the next action, what is passed into the very first action? The very first action in the pipeline will not receive anything as a first argument (more precisely, it will receive None ). To avoid having such a useless argument in commands that are used at the beginning of the query, (in our example the hello function), we can use the decorator @first_command instead of @command . This is more a convenience than necessity though. Commands and actions are explained in this chapter Queries can be executed in multiple ways in LiQuer (programatically from scripts or commands, from recipes/batch jobs or interactively from a web API). In this example we just evaluate them in the script by the evaluate function. What did we actually gain? Link query syntax allows to represent pipelines are relatively short strings. More importantly, link query can be used as a path part of the URL . Unlike the more conventional web services typically a separate request for each action, link query can specify sequences of actions (pipelines) in the URL. This gives LiQuer an incredible amount of flexibility and expressiveness. LiQuer has a well-defined web service API A server version of the same example: from liquer import * ### Create Flask app and register LiQuer blueprint from flask import Flask import liquer.blueprint as bp app = Flask ( __name__ ) app . register_blueprint ( bp . app , url_prefix = '/liquer' ) @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" if __name__ == '__main__' : app . run () This is a normal flask server, registering LiQuer blueprint which makes all the LiQuer functionality available in the web service. Working with pandas Pandas example: from liquer import * import liquer.ext.lq_pandas @first_command def data (): return pd . DataFrame ( dict ( a = [ 1 , 2 , 3 ], b = [ 40 , 50 , 60 ])) @command def sum_columns ( df , column1 = \"a\" , column2 = \"b\" , target = \"c\" ): df . loc [:, target ] = df [ column1 ] + df [ column2 ] return df When queried via a web interface, the data is automatically converted to a most suitable format. If the last element of a query is a command without arguments containing a dot in the name, it is interpreted as a file name and the extension is used to determine the format. The format conversion only happens when query is run over the service, not when using the evaluate function. data - default format is used (which is csv) data/data.html - data is converted to html and displayed in the browser data/data.csv - data is converted to csv data/data.xlsx - dataframe is converted to xlsx data/eq-b-50 - built in equality filter selects rows with b==50 data/sum_columns - sum_columns is applied to a dataframe; this is equivalent to sum_columns(data()) data/sum_columns/sum_columns-a-c-d/sum2.html - multiple actions are chained: sum_columns(sum_columns(data()),\"a\",\"c\",\"d\") and result is served as html. df_from-URL - built in command loads a dataframe from URL Charts LiQuer has a rudimentary support for matplotlib and plotly residing in packages liquer.ext.lq_matplotlib and liquer.ext.lq_plotly Examples are in matplotlib_chart.py and plotly_chart.py show how to make simple plots with the commands already built in. This functionality is very basic at the moment and is likely to change.","title":"Introduction"},{"location":"guide/#instalation","text":"LiQuer requires at least python 3.6 with flask. It can be installed by python3 -m pip install liquer-framework Alternatively you can get LiQuer from repository : git clone https://github.com/orest-d/liquer.git python3 -m venv venv source venv/bin/activate cd liquer python3 setup.py install","title":"Instalation"},{"location":"guide/#getting-started","text":"The good tradition is to start with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) In this example we first create to commands - hello and greet . Commands are ordinary python functions decorated with either @first_command or @command . A sequence of commands can be written as a link query (the main concept of LiQuer). Simple query is a sequence of actions (commands with parameters) separated by slash (\"/\"). A query is evaluated from left to right, always passing the output as a first argument to the next action. For example the query hello/greet is roughly equivalent to evaluating greet ( hello ()) Query hello/greet-everybody (in the end of the example) is equivalent to greet ( hello (), \"everybody\" ) Here we specify the second argument to the function greet in the query. the arguments are separated by dash (\"-\"). (This choice might look unusual, but it allows using such a query as a part of URL . (Link query syntax requires treating \"/\" and \"-\" as special characters and escape them when needed - as we will explain in the query chapter. If the actions are always passing the result into the next action, what is passed into the very first action? The very first action in the pipeline will not receive anything as a first argument (more precisely, it will receive None ). To avoid having such a useless argument in commands that are used at the beginning of the query, (in our example the hello function), we can use the decorator @first_command instead of @command . This is more a convenience than necessity though. Commands and actions are explained in this chapter Queries can be executed in multiple ways in LiQuer (programatically from scripts or commands, from recipes/batch jobs or interactively from a web API). In this example we just evaluate them in the script by the evaluate function.","title":"Getting started"},{"location":"guide/#what-did-we-actually-gain","text":"Link query syntax allows to represent pipelines are relatively short strings. More importantly, link query can be used as a path part of the URL . Unlike the more conventional web services typically a separate request for each action, link query can specify sequences of actions (pipelines) in the URL. This gives LiQuer an incredible amount of flexibility and expressiveness. LiQuer has a well-defined web service API A server version of the same example: from liquer import * ### Create Flask app and register LiQuer blueprint from flask import Flask import liquer.blueprint as bp app = Flask ( __name__ ) app . register_blueprint ( bp . app , url_prefix = '/liquer' ) @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" if __name__ == '__main__' : app . run () This is a normal flask server, registering LiQuer blueprint which makes all the LiQuer functionality available in the web service.","title":"What did we actually gain?"},{"location":"guide/#working-with-pandas","text":"Pandas example: from liquer import * import liquer.ext.lq_pandas @first_command def data (): return pd . DataFrame ( dict ( a = [ 1 , 2 , 3 ], b = [ 40 , 50 , 60 ])) @command def sum_columns ( df , column1 = \"a\" , column2 = \"b\" , target = \"c\" ): df . loc [:, target ] = df [ column1 ] + df [ column2 ] return df When queried via a web interface, the data is automatically converted to a most suitable format. If the last element of a query is a command without arguments containing a dot in the name, it is interpreted as a file name and the extension is used to determine the format. The format conversion only happens when query is run over the service, not when using the evaluate function. data - default format is used (which is csv) data/data.html - data is converted to html and displayed in the browser data/data.csv - data is converted to csv data/data.xlsx - dataframe is converted to xlsx data/eq-b-50 - built in equality filter selects rows with b==50 data/sum_columns - sum_columns is applied to a dataframe; this is equivalent to sum_columns(data()) data/sum_columns/sum_columns-a-c-d/sum2.html - multiple actions are chained: sum_columns(sum_columns(data()),\"a\",\"c\",\"d\") and result is served as html. df_from-URL - built in command loads a dataframe from URL","title":"Working with pandas"},{"location":"guide/#charts","text":"LiQuer has a rudimentary support for matplotlib and plotly residing in packages liquer.ext.lq_matplotlib and liquer.ext.lq_plotly Examples are in matplotlib_chart.py and plotly_chart.py show how to make simple plots with the commands already built in. This functionality is very basic at the moment and is likely to change.","title":"Charts"},{"location":"metadata/","text":"State variables In some situations it is useful to pass some values along the query. For example if we want to specify some value once and use it in multiple commands. from liquer import * from liquer.state import set_var import liquer.ext.basic @command def hello ( state , who = None ): if who is None : who = state . vars . get ( \"greet\" , \"???\" ) return f \"Hello, { who } !\" set_var ( \"greet\" , \"world\" ) print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : uses state variable defined above print ( evaluate ( \"state_variable-greet\" ) . get ()) # world : shows the content of the state variable print ( evaluate ( \"hello-everybody\" ) . get ()) # Hello, everybody! : uses the argument print ( evaluate ( \"let-greet-variable/hello\" ) . get ()) # Hello, variable! : defines the variable in the query print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : let is local to a query There are two variables that are important to set up in some cases: * server should contain the URL of the LiQuer server * api_path should contain the path to the query service So server + api_path + query should become a valid url that would yield a query result. Several commands (e.g. link or split_df) depend on correct definition of these variables, so they should be set together with setting up the flask blueprint - e.g. url_prefix = '/liquer' app . register_blueprint ( bp . app , url_prefix = url_prefix ) set_var ( \"api_path\" , url_prefix + \"/q/\" ) set_var ( \"server\" , \"http://localhost:5000\" )","title":"Metadata and state"},{"location":"metadata/#state-variables","text":"In some situations it is useful to pass some values along the query. For example if we want to specify some value once and use it in multiple commands. from liquer import * from liquer.state import set_var import liquer.ext.basic @command def hello ( state , who = None ): if who is None : who = state . vars . get ( \"greet\" , \"???\" ) return f \"Hello, { who } !\" set_var ( \"greet\" , \"world\" ) print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : uses state variable defined above print ( evaluate ( \"state_variable-greet\" ) . get ()) # world : shows the content of the state variable print ( evaluate ( \"hello-everybody\" ) . get ()) # Hello, everybody! : uses the argument print ( evaluate ( \"let-greet-variable/hello\" ) . get ()) # Hello, variable! : defines the variable in the query print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : let is local to a query There are two variables that are important to set up in some cases: * server should contain the URL of the LiQuer server * api_path should contain the path to the query service So server + api_path + query should become a valid url that would yield a query result. Several commands (e.g. link or split_df) depend on correct definition of these variables, so they should be set together with setting up the flask blueprint - e.g. url_prefix = '/liquer' app . register_blueprint ( bp . app , url_prefix = url_prefix ) set_var ( \"api_path\" , url_prefix + \"/q/\" ) set_var ( \"server\" , \"http://localhost:5000\" )","title":"State variables"},{"location":"query/","text":"Query Basic query Query is in the simplest case composed out of a sequence of actions. Action is nothing else than a function (closure) with all arguments specified except the first one. All actions consume a single input and produce a single output - so they can be chained into a pipeline. In the Hello, world! example from earlier, the hello/greet-everybody is a sequence of two actions. The first action is hello does not have any explicit parameters. (Technically it accepts an input - but that input is ignored, so this function is suitable to be at the beginning of the pipeline.) The greet-everybody is and action calling a command greet with an argument \"everybody\". The general structure of a query is actions are separated by \"/\", arguments are separated by \"-\": identifier1-arg11-arg12/identifier2-arg21-arg22-arg23/identifier3-arg31... Action starts with an identifier, which is interpreted as a command name. In practice, the command is always defined via a python function with the same name. Therefore identifier must be a valid python identifier plus it can't start with an upper case. Thus \"my_command\" is a valid identifier, but \"MyCommand\", \"1command\" or \"my.command\" is not. Query is optionaly terminated by a filename. A valid filename must fulfill the following conditions: Filename is the last element in a query, i.e. there is no \"/\" after the filename. Filename must contain \".\". Filename can not contain \"-\". These rules assure that a filename can be recognized from a command name or an argument. For example readme.txt is a valid filename, but readme-now.txt would be interpreted as an action composed from a command readme with an argument \"now.txt\". The main role of a filename is to specify the file extension, which determines the format in which the data are serialized when saved or returned via a web interface. The filename before the extension is arbitrary. Escaping and query entities Query only allows to use the characters allowed in the path part of the URL, and the following characters have a special meaning: slash \"/\" separates the actions, dash \"-\" separates action arguments and query segments, tilde \"~\" is used as an escape character. There are two mechanisms that can be used for escaping: * Percentage encoding used for encoding special characters in URL - see e.g. https://en.wikipedia.org/wiki/Percent-encoding * Query entities are constructs starting with the tilde character \"~\". Query entities have special meaning, e.g. they can be used for encoding of \"-\", \"/\" and \"~\". Though encoding of these characters with the percentage encoding might work as well, it is safer to use query entities (tilde encoding). The following entities are defined: tilde entity \"~~\" expands to \"~\" minus entity \"~_\" expands to \"-\" slash entities \"~I\" and \"~/\" expand to \"/\" https entity \"~H\" expands to \"https://\" http entity \"~h\" expands to \"http://\" file entity \"~f\" expands to \"file://\" protocol entity \"~P\" expands to \"://\" negative number entities \"~0\",\"~1\",\"~2\" ... \"~9\" expand to \"-0\", \"-1\", \"-2\" ... \"-9\". (This is a more convenient alternative syntax for writing negative numbers like \"~123\" instead of \"~_123\". space entity \"~.\" expands to \" \" expand entity \"~X~ query ~E\" evaluates the query and expands to a result end_entity \"~E\" is not a real entity, but can only be part of a complex entity like the expand entity . Expand entity supports two types of queries - absolute starting with \"/\" and relative (not starting with \"/\"). Absolute entities are simply evaluated as they are, but relative entities are pre-pended with the current query before the execution. For example hello/greet-~X~/everybody~E is interpreted as greet(hello(), everybody()) , but the relative query in an argument hello/greet-~X~everybody~E is interpreted as greet(hello(), everybody(hello())) .","title":"Query syntax"},{"location":"query/#query","text":"","title":"Query"},{"location":"query/#basic-query","text":"Query is in the simplest case composed out of a sequence of actions. Action is nothing else than a function (closure) with all arguments specified except the first one. All actions consume a single input and produce a single output - so they can be chained into a pipeline. In the Hello, world! example from earlier, the hello/greet-everybody is a sequence of two actions. The first action is hello does not have any explicit parameters. (Technically it accepts an input - but that input is ignored, so this function is suitable to be at the beginning of the pipeline.) The greet-everybody is and action calling a command greet with an argument \"everybody\". The general structure of a query is actions are separated by \"/\", arguments are separated by \"-\": identifier1-arg11-arg12/identifier2-arg21-arg22-arg23/identifier3-arg31... Action starts with an identifier, which is interpreted as a command name. In practice, the command is always defined via a python function with the same name. Therefore identifier must be a valid python identifier plus it can't start with an upper case. Thus \"my_command\" is a valid identifier, but \"MyCommand\", \"1command\" or \"my.command\" is not. Query is optionaly terminated by a filename. A valid filename must fulfill the following conditions: Filename is the last element in a query, i.e. there is no \"/\" after the filename. Filename must contain \".\". Filename can not contain \"-\". These rules assure that a filename can be recognized from a command name or an argument. For example readme.txt is a valid filename, but readme-now.txt would be interpreted as an action composed from a command readme with an argument \"now.txt\". The main role of a filename is to specify the file extension, which determines the format in which the data are serialized when saved or returned via a web interface. The filename before the extension is arbitrary.","title":"Basic query"},{"location":"query/#escaping-and-query-entities","text":"Query only allows to use the characters allowed in the path part of the URL, and the following characters have a special meaning: slash \"/\" separates the actions, dash \"-\" separates action arguments and query segments, tilde \"~\" is used as an escape character. There are two mechanisms that can be used for escaping: * Percentage encoding used for encoding special characters in URL - see e.g. https://en.wikipedia.org/wiki/Percent-encoding * Query entities are constructs starting with the tilde character \"~\". Query entities have special meaning, e.g. they can be used for encoding of \"-\", \"/\" and \"~\". Though encoding of these characters with the percentage encoding might work as well, it is safer to use query entities (tilde encoding). The following entities are defined: tilde entity \"~~\" expands to \"~\" minus entity \"~_\" expands to \"-\" slash entities \"~I\" and \"~/\" expand to \"/\" https entity \"~H\" expands to \"https://\" http entity \"~h\" expands to \"http://\" file entity \"~f\" expands to \"file://\" protocol entity \"~P\" expands to \"://\" negative number entities \"~0\",\"~1\",\"~2\" ... \"~9\" expand to \"-0\", \"-1\", \"-2\" ... \"-9\". (This is a more convenient alternative syntax for writing negative numbers like \"~123\" instead of \"~_123\". space entity \"~.\" expands to \" \" expand entity \"~X~ query ~E\" evaluates the query and expands to a result end_entity \"~E\" is not a real entity, but can only be part of a complex entity like the expand entity . Expand entity supports two types of queries - absolute starting with \"/\" and relative (not starting with \"/\"). Absolute entities are simply evaluated as they are, but relative entities are pre-pended with the current query before the execution. For example hello/greet-~X~/everybody~E is interpreted as greet(hello(), everybody()) , but the relative query in an argument hello/greet-~X~everybody~E is interpreted as greet(hello(), everybody(hello())) .","title":"Escaping and query entities"},{"location":"recipes/","text":"","title":"Recipes"},{"location":"security/","text":"Security LiQuer was so far only deployed on intranet. More development is needed to make interent deployment of LiQuer safe. LiQuer exposes only services defined in the liquer.blueprint module - and by extension all the registered commands. Only enable commands that do not put your system to risk. A big source of security concerns are DOS attacks: * It is easy to overload LiQuer server with huge queries. To solve this issue, queries need to be validated in some way. * Badly implemented cache may quickly exceed the storage capacity. (Default NoCache is a safe choice in this respect.)","title":"Security"},{"location":"security/#security","text":"LiQuer was so far only deployed on intranet. More development is needed to make interent deployment of LiQuer safe. LiQuer exposes only services defined in the liquer.blueprint module - and by extension all the registered commands. Only enable commands that do not put your system to risk. A big source of security concerns are DOS attacks: * It is easy to overload LiQuer server with huge queries. To solve this issue, queries need to be validated in some way. * Badly implemented cache may quickly exceed the storage capacity. (Default NoCache is a safe choice in this respect.)","title":"Security"},{"location":"store/","text":"Store Store is a configurable virtual file system inside liquer. Store is designed to be able to deal with states. One notable extension of the Store compared to a regular file system is the ability to store (and work with) the metadata, which is essential for dealing with data in liquer. Store is basically a key/value store mapping a path to a sequence of bytes. By itself, Store does not define (or care) about serialization of the data. This differenciates it from Cache . Even though the interface to Cache and Store is intentionaly very similar, these two mechanisms are different: Cache keeps State (i.e. data object with metadata). Cache deals with objects and stores State perhaps in a non-serialized form (e.g. MemoryStore). Store keeps resources - i.e. arbitrary binary data ( bytes ) complemented with metadata. One purpose of a Store is to provide an option to serve files into the pipeline. The pipeline may start with a resource path followed by a sequence of actions. Cache By default there is no cache - i.e. the queries are always re-evaluated. There are several cache implementations available in liquer.cache . They are configured by set_cache function, for example set_cache ( FileCache ( \"cache\" )) configures a cache that will store all the (chache-able) results of queries in a dictionary cache . Cache should be configured before the queries are evaluated - and before state variables are set. Currently there are three cache implementations: NoCache is a trivial do-nothing cache, FileCache stores data in files, MemoryCache caches the object in the memory. Custom cache can be created by defining a cache interface, see above mentioned classes. Cache will typically use query as a key and utilize the mechanism of serializing data into a bytes sequence (defined in liquer.state_types ), thus implementing a cache based either on a key-value store or blob-storage in SQL databases should be fairly straightforward (and probably quite similar to FileCache ). Command may optionally decide not to cache its output. This may be useful when command produces volatile data, e.g. time. In such a case command (operating on a state) can disable cache by state.with_caching(False) .","title":"Store and Cache"},{"location":"store/#store","text":"Store is a configurable virtual file system inside liquer. Store is designed to be able to deal with states. One notable extension of the Store compared to a regular file system is the ability to store (and work with) the metadata, which is essential for dealing with data in liquer. Store is basically a key/value store mapping a path to a sequence of bytes. By itself, Store does not define (or care) about serialization of the data. This differenciates it from Cache . Even though the interface to Cache and Store is intentionaly very similar, these two mechanisms are different: Cache keeps State (i.e. data object with metadata). Cache deals with objects and stores State perhaps in a non-serialized form (e.g. MemoryStore). Store keeps resources - i.e. arbitrary binary data ( bytes ) complemented with metadata. One purpose of a Store is to provide an option to serve files into the pipeline. The pipeline may start with a resource path followed by a sequence of actions.","title":"Store"},{"location":"store/#cache","text":"By default there is no cache - i.e. the queries are always re-evaluated. There are several cache implementations available in liquer.cache . They are configured by set_cache function, for example set_cache ( FileCache ( \"cache\" )) configures a cache that will store all the (chache-able) results of queries in a dictionary cache . Cache should be configured before the queries are evaluated - and before state variables are set. Currently there are three cache implementations: NoCache is a trivial do-nothing cache, FileCache stores data in files, MemoryCache caches the object in the memory. Custom cache can be created by defining a cache interface, see above mentioned classes. Cache will typically use query as a key and utilize the mechanism of serializing data into a bytes sequence (defined in liquer.state_types ), thus implementing a cache based either on a key-value store or blob-storage in SQL databases should be fairly straightforward (and probably quite similar to FileCache ). Command may optionally decide not to cache its output. This may be useful when command produces volatile data, e.g. time. In such a case command (operating on a state) can disable cache by state.with_caching(False) .","title":"Cache"},{"location":"web_service/","text":"Web service Web service is typically installed with an absolute paths starting with /liquer , e.g. /liquer/q/hello . Though this can be changed (and should be fully configurable in the future), some extensions currently (e.g. liquer gui ) currently rely on this absolute location. Core service for query execution Route /q/QUERY (GET, POST) Main service for evaluating queries. Service allows to supply named arguments, that will be passed to the last command in the query. These arguments can be passed as URL query or POSTed as JSON dictionary. Route /submit/QUERY (GET) Main service for evaluating queries. Like /q/QUERY , but the QUERY is executed in the background. Service returns status as a JSON document. Status contains status OK or ERROR message short text message describing the status of the submission query query that was submitted Cache interface Route/api/cache/get/QUERY (GET, POST) FIXME: POST not implemented(?) Get cached data. If the result of the QUERY is stored in cache, it is returned immediately, otherwise the call fails. POST method may be supported, which allows using the service as a remote cache. Route /api/cache/meta/QUERY (GET, POST) Get cached metadata as JSON for QUERY if available, a status JSON otherwise: POST method may be supported, which allows using the service as a remote cache. status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when cached. Route /api/cache/remove/QUERY (GET) FIXME: Support http DELETE ? Interface to cache remove. Removes the query from cache. Returns status JSON: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted removed boolean, True when the remove operation was successful. /api/cache/contains/QUERY (GET) Interface to cache contains. Returns whether QUERY is cached in a JSON status document: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when query is in the cache Route /api/cache/keys.json (GET) FIXME Remove .json or unify with /api/store Interface to cache keys. Returns list of all keys in the cache as a JSON list. Route /api/cache/clean (GET) Interface to cache clean. Cleans the whole cache. Returns a JSON document. status OK or ERROR message Short text describing the result of the operation. Miscellaneous services Route /api/commands.json (GET) Returns a list of commands in json format Route /api/debug-json/QUERY (GET) FIXME: Obsolete? Debug query - returns metadata from a state after a query is evaluated Route /api/build (POST) FIXME: Obsolete? Build a query from a posted decoded query (list of lists of strings). Result is a dictionary with encoded query and link. Route /api/register_command/DATA (GET, POST) Remote command registration service. This has to be enabled by liquer.commands.enable_remote_registration() WARNING : Remote command registration allows to deploy arbitrary python code on LiQuer server , therefore it is a HUGE SECURITY RISK and it only should be used if other security measures are taken ( e . g . on localhost or intranet where only trusted users have access ). This is on by default on Jupyter server extension . Store interface Route /api/store/data/QUERY, (GET, POST) Get or set data in store. GET method is equivalent to Store.get_bytes. Content type (MIME) is obtained from the metadata. POST method sets data in store. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument Route /api/store/upload/KEY (POST, optional GET) Upload data to store - similar to /api/store/data, but using upload. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument GET method (if supported) may return a basic html interface to facilitate the file upload. Route /api/store/metadata/KEY (GET, POST) FIXME: KEY, not QUERY Getting or setting the metadata for KEY. On successful GET returns the metadata as JSON. Otherwise a status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /web/KEY (GET) FIXME: it should be key, not query in the code Shortcut to the 'web' directory in the store. Similar to /store/data/web, except the index.html is automatically added if query is a directory. The 'web' directory hosts web applications and visualization tools, e.g. liquer-pcv or liquer-gui. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/stored_metadata/QUERY (GET) Get metadata stored in a store or cache. This will not trigger an execution of a query or recipe. FIXME: Make sure that recipes are not executed. Route /api/store/remove/KEY (GET) Remove key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/removedir/KEY (GET) Remove directory key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/contains/KEY (GET) Check whether the KEY exists in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) contains result of store.contains if operation is successful (true if KEY is in the store) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/is_dir/KEY (GET) Check whether the KEY is a directory in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) is_dir result of store.is_dir if operation is successful (true if KEY is a directory) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/keys (GET) Return the list of keys in the store. Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) keys list of keys (on success) Route /api/store/listdir/KEY (GET) Get list of names in a directory KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) listdir list of names (on success) FIXME key, query Route /api/store/makedir/KEY (GET) Make a directory specified by KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Web service"},{"location":"web_service/#web-service","text":"Web service is typically installed with an absolute paths starting with /liquer , e.g. /liquer/q/hello . Though this can be changed (and should be fully configurable in the future), some extensions currently (e.g. liquer gui ) currently rely on this absolute location.","title":"Web service"},{"location":"web_service/#core-service-for-query-execution","text":"","title":"Core service for query execution"},{"location":"web_service/#route-qquery-get-post","text":"Main service for evaluating queries. Service allows to supply named arguments, that will be passed to the last command in the query. These arguments can be passed as URL query or POSTed as JSON dictionary.","title":"Route /q/QUERY (GET, POST)"},{"location":"web_service/#route-submitquery-get","text":"Main service for evaluating queries. Like /q/QUERY , but the QUERY is executed in the background. Service returns status as a JSON document. Status contains status OK or ERROR message short text message describing the status of the submission query query that was submitted","title":"Route /submit/QUERY (GET)"},{"location":"web_service/#cache-interface","text":"","title":"Cache interface"},{"location":"web_service/#routeapicachegetquery-get-post","text":"FIXME: POST not implemented(?) Get cached data. If the result of the QUERY is stored in cache, it is returned immediately, otherwise the call fails. POST method may be supported, which allows using the service as a remote cache.","title":"Route/api/cache/get/QUERY (GET, POST)"},{"location":"web_service/#route-apicachemetaquery-get-post","text":"Get cached metadata as JSON for QUERY if available, a status JSON otherwise: POST method may be supported, which allows using the service as a remote cache. status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when cached.","title":"Route /api/cache/meta/QUERY (GET, POST)"},{"location":"web_service/#route-apicacheremovequery-get","text":"FIXME: Support http DELETE ? Interface to cache remove. Removes the query from cache. Returns status JSON: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted removed boolean, True when the remove operation was successful.","title":"Route /api/cache/remove/QUERY (GET)"},{"location":"web_service/#apicachecontainsquery-get","text":"Interface to cache contains. Returns whether QUERY is cached in a JSON status document: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when query is in the cache","title":"/api/cache/contains/QUERY (GET)"},{"location":"web_service/#route-apicachekeysjson-get","text":"FIXME Remove .json or unify with /api/store Interface to cache keys. Returns list of all keys in the cache as a JSON list.","title":"Route /api/cache/keys.json (GET)"},{"location":"web_service/#route-apicacheclean-get","text":"Interface to cache clean. Cleans the whole cache. Returns a JSON document. status OK or ERROR message Short text describing the result of the operation.","title":"Route /api/cache/clean (GET)"},{"location":"web_service/#miscellaneous-services","text":"","title":"Miscellaneous services"},{"location":"web_service/#route-apicommandsjson-get","text":"Returns a list of commands in json format","title":"Route /api/commands.json (GET)"},{"location":"web_service/#route-apidebug-jsonquery-get","text":"FIXME: Obsolete? Debug query - returns metadata from a state after a query is evaluated","title":"Route /api/debug-json/QUERY (GET)"},{"location":"web_service/#route-apibuild-post","text":"FIXME: Obsolete? Build a query from a posted decoded query (list of lists of strings). Result is a dictionary with encoded query and link.","title":"Route /api/build (POST)"},{"location":"web_service/#route-apiregister_commanddata-get-post","text":"Remote command registration service. This has to be enabled by liquer.commands.enable_remote_registration() WARNING : Remote command registration allows to deploy arbitrary python code on LiQuer server , therefore it is a HUGE SECURITY RISK and it only should be used if other security measures are taken ( e . g . on localhost or intranet where only trusted users have access ). This is on by default on Jupyter server extension .","title":"Route /api/register_command/DATA (GET, POST)"},{"location":"web_service/#store-interface","text":"","title":"Store interface"},{"location":"web_service/#route-apistoredataquery-get-post","text":"Get or set data in store. GET method is equivalent to Store.get_bytes. Content type (MIME) is obtained from the metadata. POST method sets data in store. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument","title":"Route /api/store/data/QUERY, (GET, POST)"},{"location":"web_service/#route-apistoreuploadkey-post-optional-get","text":"Upload data to store - similar to /api/store/data, but using upload. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument GET method (if supported) may return a basic html interface to facilitate the file upload.","title":"Route /api/store/upload/KEY (POST, optional GET)"},{"location":"web_service/#route-apistoremetadatakey-get-post","text":"FIXME: KEY, not QUERY Getting or setting the metadata for KEY. On successful GET returns the metadata as JSON. Otherwise a status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/metadata/KEY (GET, POST)"},{"location":"web_service/#route-webkey-get","text":"FIXME: it should be key, not query in the code Shortcut to the 'web' directory in the store. Similar to /store/data/web, except the index.html is automatically added if query is a directory. The 'web' directory hosts web applications and visualization tools, e.g. liquer-pcv or liquer-gui. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /web/KEY (GET)"},{"location":"web_service/#route-apistored_metadataquery-get","text":"Get metadata stored in a store or cache. This will not trigger an execution of a query or recipe. FIXME: Make sure that recipes are not executed.","title":"Route /api/stored_metadata/QUERY (GET)"},{"location":"web_service/#route-apistoreremovekey-get","text":"Remove key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/remove/KEY (GET)"},{"location":"web_service/#route-apistoreremovedirkey-get","text":"Remove directory key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/removedir/KEY (GET)"},{"location":"web_service/#route-apistorecontainskey-get","text":"Check whether the KEY exists in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) contains result of store.contains if operation is successful (true if KEY is in the store) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/contains/KEY (GET)"},{"location":"web_service/#route-apistoreis_dirkey-get","text":"Check whether the KEY is a directory in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) is_dir result of store.is_dir if operation is successful (true if KEY is a directory) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/is_dir/KEY (GET)"},{"location":"web_service/#route-apistorekeys-get","text":"Return the list of keys in the store. Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) keys list of keys (on success)","title":"Route /api/store/keys (GET)"},{"location":"web_service/#route-apistorelistdirkey-get","text":"Get list of names in a directory KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) listdir list of names (on success) FIXME key, query","title":"Route /api/store/listdir/KEY (GET)"},{"location":"web_service/#route-apistoremakedirkey-get","text":"Make a directory specified by KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/makedir/KEY (GET)"}]} \ No newline at end of file +{"config":{"indexing":"full","lang":["en"],"min_search_length":3,"prebuild_index":false,"separator":"[\\s\\-]+"},"docs":[{"location":"","text":"Welcome to LiQuer LiQuer is a leightweighted open-source framework (see repo ) covering a large number of usecases associated with machine learning, data science and other computational experimentation tasks requiring flexible analysis. LiQuer is a versatile tool - it helps to create interactive dashboards and web applications, working with tables, creating charts, images, reports - but as well non-interactive batch processes. The core of Liquer is a minimalistic query language, that represents a sequence of actions as a compact (but still human readable) string, or as URL \"link\". (Hence the name Link Query.) The second pillar of LiQuer is metadata: LiQuer always keeps track of metadata associated with the data. LiQuer queries can * execute interactively in a browser, * execute non-interactively in a batch, * referenced in reports, * efficiently cache the final and intermediate results, * improve the transparency, traceability and discoverability by the use of metadata - and more. Design of LiQuer is guided by the following principles: Simplicity and flexibility - Make simple things simple, complex things possible. Batteries included - Provide useful features and integration of 3rd party libraries out of the box in a modular way. Don't stand in the way - collaborate! - Do not force one way of doing things. Be technology neutral and integrate well with other libraries and frameworks. LiQuer-enabled code should run as well without the framework and thus using LiQuer should be low risk in terms of dependencies. Make all parts modular, replaceable and customizable. LiQuer is extremely easy to use - just decorate ordinary python functions with a simple decorator. LiQuer provides integration of essential data-science tools like Pandas, Scikit-Learn and Keras without having a hard dependency on these frameworks - you need them only when you are going to use them. LiQuer's main web-framework is Flask (because of its simplicity), but other frameworks can easily be supported (there is a basic Tornado support available, others will follow as needed). LiQuer enabled code can be used (in most cases) exactly the same way as if LiQuer would not be there - so no LiQuer knowledge is needed to use your code. That makes it easy for newcommers to use the existing code, but as well start quickly contributing to a LiQuer-enabled code base. Hello, world! Let's start with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) To learn more, please continue to the user guide .","title":"Home"},{"location":"#welcome-to-liquer","text":"LiQuer is a leightweighted open-source framework (see repo ) covering a large number of usecases associated with machine learning, data science and other computational experimentation tasks requiring flexible analysis. LiQuer is a versatile tool - it helps to create interactive dashboards and web applications, working with tables, creating charts, images, reports - but as well non-interactive batch processes. The core of Liquer is a minimalistic query language, that represents a sequence of actions as a compact (but still human readable) string, or as URL \"link\". (Hence the name Link Query.) The second pillar of LiQuer is metadata: LiQuer always keeps track of metadata associated with the data. LiQuer queries can * execute interactively in a browser, * execute non-interactively in a batch, * referenced in reports, * efficiently cache the final and intermediate results, * improve the transparency, traceability and discoverability by the use of metadata - and more. Design of LiQuer is guided by the following principles: Simplicity and flexibility - Make simple things simple, complex things possible. Batteries included - Provide useful features and integration of 3rd party libraries out of the box in a modular way. Don't stand in the way - collaborate! - Do not force one way of doing things. Be technology neutral and integrate well with other libraries and frameworks. LiQuer-enabled code should run as well without the framework and thus using LiQuer should be low risk in terms of dependencies. Make all parts modular, replaceable and customizable. LiQuer is extremely easy to use - just decorate ordinary python functions with a simple decorator. LiQuer provides integration of essential data-science tools like Pandas, Scikit-Learn and Keras without having a hard dependency on these frameworks - you need them only when you are going to use them. LiQuer's main web-framework is Flask (because of its simplicity), but other frameworks can easily be supported (there is a basic Tornado support available, others will follow as needed). LiQuer enabled code can be used (in most cases) exactly the same way as if LiQuer would not be there - so no LiQuer knowledge is needed to use your code. That makes it easy for newcommers to use the existing code, but as well start quickly contributing to a LiQuer-enabled code base.","title":"Welcome to LiQuer"},{"location":"#hello-world","text":"Let's start with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) To learn more, please continue to the user guide .","title":"Hello, world!"},{"location":"commands/","text":"Commands By decorating a function with @command or @first_command , the function is registered in a command registry. Function is only registered, not modified or wrapped in any way - therefore it can be used as if it would not be decorated at all. Commands (command functions) typically need to be enabled in a LiQuer application simply by importing a module with command-decorated functions. Built-in modules need to be imported as well - this gives control about enabled features and as well allows to limit dependencies (e.g. in principle LiQuer application only requires pandas when liquer.ext.lq_pandas is imported.) When a command function is registered, metadata are extracted based on available informations and conventions: Function name becomes a name of the command. Modules can not be distinguished inside the query, therefore command (and hence functions) should have unique names even when they are defined in multiple modules. When decorated with @command , the first argument of the function will always be a state. If the first argument is called state , command function will receive the state as an instance of State , otherwise it will be just plain data. Plain data can be obtained from state by state.get() . When decorated with @first_command , command will not receive a state at all. Command registration tries to identify all the arguments and their types. The types are guessed either from type annotations (if available) or from default values. Default values and *args are suported, the **kwargs are not supported in commands. Parsed string arguments are converted to estimated types before they are passed to the command. This is done with help of argument parsers (see liquer.commands.ArgumentParser ). Command function may return any data type. If it does not return an instance of State , the returned data is automatically wrapped as a State when evaluated. The main purpose of State instance is to add metadata to the data (e.g. the query executed sofar, data sources used, type of the data, file name). It as well provides a logging functionality, which can record messages and errors during the execution of the query. See liquer.state for more info.","title":"Commands"},{"location":"commands/#commands","text":"By decorating a function with @command or @first_command , the function is registered in a command registry. Function is only registered, not modified or wrapped in any way - therefore it can be used as if it would not be decorated at all. Commands (command functions) typically need to be enabled in a LiQuer application simply by importing a module with command-decorated functions. Built-in modules need to be imported as well - this gives control about enabled features and as well allows to limit dependencies (e.g. in principle LiQuer application only requires pandas when liquer.ext.lq_pandas is imported.) When a command function is registered, metadata are extracted based on available informations and conventions: Function name becomes a name of the command. Modules can not be distinguished inside the query, therefore command (and hence functions) should have unique names even when they are defined in multiple modules. When decorated with @command , the first argument of the function will always be a state. If the first argument is called state , command function will receive the state as an instance of State , otherwise it will be just plain data. Plain data can be obtained from state by state.get() . When decorated with @first_command , command will not receive a state at all. Command registration tries to identify all the arguments and their types. The types are guessed either from type annotations (if available) or from default values. Default values and *args are suported, the **kwargs are not supported in commands. Parsed string arguments are converted to estimated types before they are passed to the command. This is done with help of argument parsers (see liquer.commands.ArgumentParser ). Command function may return any data type. If it does not return an instance of State , the returned data is automatically wrapped as a State when evaluated. The main purpose of State instance is to add metadata to the data (e.g. the query executed sofar, data sources used, type of the data, file name). It as well provides a logging functionality, which can record messages and errors during the execution of the query. See liquer.state for more info.","title":"Commands"},{"location":"examples/","text":"HDX disaggregation wizard LiQuer is a small server-side framework that can be quite helpful when building data-oriented web applications. One such example is HDX disaggregation wizard. It is a tool solving a simple task: splitting (disaggregating) a single data sheet (csv or xlsx) into multiple sheets. Sheet is split by values in the specified column (or multiple columns). Since this is a quite generic task (related to group by ), this functionality is built into liquer.ext.lq_pandas . The core of this feature is the eq command, filtering dataframe by specific values in a column, e.g. eq-a-123 keeps in the dataframe only rows where column a is 123. Command eq (equal) accepts multiple column-value pairs, e.g. eq-a-123-b-234 . In HDX the convention is to use the first row for tags. To support this comvention, teq command (tag equal) always keeps the first row of the dataframe. The disaggregation service supports both tagged and untagged data, using either eq or teq for filtering, depending on the user input. The complete flow is simple: * fetch data (command df_from ) * find unique values in a column (or multiple columns) and use them to create a list (table) of queries (command split_df ) * the queries use eq (or teq ) to filter dataframe by value(s). So, use a query like df_from-URL/split_df-COLUMN and you will get a table with queries like df_from-URL/eq-COLUMN-VALUE1 , df_from-URL/eq-COLUMN-VALUE2 . A little detail regarding the split function: There are actually four versions of this function - depending whether it is used for tagged or untagged document and whether it is quick (or query ) splits or full splits. The quick version only provides raw LiQuer queries (not the complete URL), The full split ( split_df for untagged and tsplit_df for tagged data) execute all the split queries, which might be slow. As a side effect, the results are cached (depending on the configuration, the example is using FileCache('cache') ). The complete user interface is in a single html file hdx_wizard.html , served by the flask server. Inside the user interface, LiQuer service is called multiple times e.g. to get previews or metadata: * Data previews uses the ability of LiQuer lq_pandas to convert dataframes to json, which can easily be read into javascript on the browser. First preview uses head_df command to display only a restricted part of the dataframe ( head ) * columns_info command is used to get lit of columns and eventual tags * /api/build service is used to build valid queries from javascript lists. This could be implemented directly in javascript, build service is a way to remotely call liquer.parser.encode . Integration of libhxl (example of a custom state type) Pandas is great, but there are other good libraries too e.g. tabulate . If you want to to use other data type (tabular or other), it will typically require (besides some useful commands) defining how that data can be serialized. This is done by implementing a state type . State type does several things associated with state type handling, but the most important role is handling serialization and deserialization. One excelent library used for working with humanitarian data is libhxl . Libhxl plays somewhat similar role as pandas: it reads, writes and manipulates tabular data - but it does as well understand HXL , which pandas doesn't - hence the liquer.ext.lq_hxl module. In order to allow libhxl objects to be used in liquer, we need to define a state type: HxlStateType . import hxl from liquer.state_types import StateType , register_state_type , mimetype_from_extension class HxlStateType ( StateType ): def identifier ( self ): \"Define an unique string identifier for the state type\" return \"hxl_dataset\" The identifier is important e.g. for caching, where it is stored as a part of metadata and it tells what StateType should be used for deserialization. def default_extension ( self ): \"Default file extension for the state type\" return \"csv\" def is_type_of ( self , data ): \"Check if data is of this state type\" return isinstance ( data , hxl . model . Dataset ) Default extension is used when the extension is not specified otherwise - for example if query does not end with a filename. The as_bytes and from_bytes are two most important methods, which take care of the serialization and deserialization. A state data can be serialized into multiple formats (e.g. csv, html, json...), therefore as_bytes optionally accepts a file extension and returns (besides the bytes) as well the mimetype. Th mimetype (when queried through the liquer server) becomes a part of the web service response. Note that serialization and deserialization do not necessarily need to support the same formats. E.g. html is quite nice to support in serialization, but it is too unspecific for a deserialization. def as_bytes ( self , data , extension = None ): \"\"\"Serialize data as bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () assert self . is_type_of ( data ) mimetype = mimetype_from_extension ( extension ) if extension == \"csv\" : output = \"\" . join ( data . gen_csv ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype elif extension == \"json\" : output = \"\" . join ( data . gen_json ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype else : raise Exception ( f \"Serialization: file extension { extension } is not supported by HXL dataset type.\" ) def from_bytes ( self , b : bytes , extension = None ): \"\"\"De-serialize data from bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () f = BytesIO () f . write ( b ) f . seek ( 0 ) if extension == \"csv\" : return hxl . data ( f ) raise Exception ( f \"Deserialization: file extension { extension } is not supported by HXL dataset type.\" ) Sometimes a deep copy of state data is needed - e.g. to assure that the data in the cache will not become unintentionally modified. That's why the state type should define copy method. Since libhxl dataset is immutable (?), it is OK to return just the data without making a copy. def copy ( self , data ): \"\"\"Make a deep copy of the data\"\"\" return data Once the state type class is defined, a state type instance is created and registered HXL_DATASET_STATE_TYPE = HxlStateType () register_state_type ( hxl . Dataset , HXL_DATASET_STATE_TYPE ) register_state_type ( hxl . io . HXLReader , HXL_DATASET_STATE_TYPE ) This is (currently) done for all relevant types. State types are registered in a global StateTypesRegistry object, which is responsible for registering and finding a state type instance for any state data. For more details see liquer.ext.lq_hxl module. Actually, the state type may not define a serialization and/or deserialization. There are objects that either can't be reliably serialized (e.g. matplotlib figure - as of time of writing) or serialization is otherwise undesirable. Such state types would be perfectly legal - they just could be neither cached nor served by the liquer web server. However, they could be inside the query, e.g. if matplotlib figure would be followed by image creation command, the image could be both served and cached. Reports and visualizations With the help of LiQuer, it is very easy to create both resuable visualizations with multiple views as well as documents viewable offline or suitable for printing. There are multiple markups suitable for creating reports and visualisations, but probably the easiest and most flexible are HTML documents. In LiQuer html can be easily created by returning a html text from a command. Creation of text is simplified by evaluate_template function, which processes a string ( template ) containing LiQuer queries and replaces those queries by their results. Report example is processing data from Global Food Prices Database (WFP) . It contains monthly prices for various commodities. To adapt the data to our needs we need a cople of extra commands: Month and year are in two separate columns mp_year and mp_month . For charts we need dates in YYYY-MM-DD format, which we achieve with the following command: @command def datemy ( df , y = \"mp_year\" , m = \"mp_month\" , target = \"date\" ): df . loc [:, target ] = [ \" %04d - %02d -01\" % ( int ( year ), int ( month )) for year , month in zip ( df [ y ], df [ m ])] return df To make statistics, it's handy to use pandas groupby. As an example we show count of groups, which used in the report to show number of observed prices in various markets: @command def count ( df , * groupby_columns ): df . loc [:, \"count\" ] = 1 return df . groupby ( groupby_columns ) . count () . reset_index () . loc [:, list ( groupby_columns ) + [ \"count\" ]] An example of a custom filter is a greater or equal command geq , used in the report to cut away years before a start year: @command def geq ( df , column , value : float ): index = df . loc [:, column ] >= value return df . loc [ index ,:] This is somewhat similar to eq command from the pandas support module liquer.ext.lq_pandas , but only supports numerical values, while the eq command is somewhat more general. Pandas dataframe supports quite flexible method to_html for converting dataframes to html format. Report uses for styling the popular css framework bootstrap and to display the tables nicely we just need to add some bootstrap css classes . Command as well prepends a link to the dataframe itself by the link command. This tends to be very useful in practice, allowing to conviniently import underlying raw data into a spreadsheet. @command def table ( state ): df = state . get () html = evaluate_template ( f \"\"\"(data) \"\"\" ) return html + df . to_html ( index = False , classes = \"table table-striped\" ) The core of the report is a report command. It can be applied on any dataframe containing suitable fields. This allows a large degree of flexibility - arbitrary filters can be inserted into a command chain before the report. For example, the current report can be restricted to specific markets, time periods or commodities without any additional code, just by modifying the URL. Report embeds a possibility to remove data pefore a from_year . This in principle could be done by inserting a geq command before the report (which would work fine). Passing from_year as an argument has an advantage, that the start year can become a part of the report (e.g. it can be used as a part of the title). Main part of the report is a single template, evaluated with evaluate_template . Note that LiQuer template uses as well string interpolation by python f string (PEP 498) , which is a very powerful combination. @command def report ( state , from_year = 2017 , linktype = None ): state = state . with_caching ( False ) def makelink ( url ): if linktype is None : return url extension = url . split ( \".\" )[ - 1 ] return evaluate ( f \"fetch- { encode_token ( url ) } /link- { linktype } - { extension } \" ) . get () try : source = state . sources [ 0 ] except : source = \"???\" LiQuer = ' LiQuer ' df = state . get () try : title = \",\" . join ( sorted ( df . adm0_name . unique ())) + f \" since { from_year } \" except : title = \"report\" return state . with_filename ( \"report.html\" ) . with_data ( evaluate_template ( f \"\"\" { title }
    \"HDX\"
    Generated with { LiQuer } © 2019 Orest Dubay




    { title }

    Data originate from   { source }   were processed via a { LiQuer } service. Only data after { from_year } are shown (data), complete data are  here.
    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-date-cm_name/plotly_chart-xys-date-mp_price-cm_name$
    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/plotly_chart-piexs-count-adm1_name$

    Average prices

    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-cm_name/table$

    Observations

    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/table$
    \"\"\" )) Inside the report command some more magic is used to handle links and external resources. Links are created by a nested function makelink . The main purpose is to allow three different regimes of working with links: * links to original sources (default), * serving (proxying) resources through LiQuer service and * dataurls. Links to original sources are useful if the report is used from a web service: the report size is then relatively small and thus the loading time is faster than for dataurls. Proxying resources through LiQuer service allows to cache resources by LiQuer. This may be useful on slower internet connections, when running the service without internet or behind a firewall. Dataurl link type allows saving the report as a single html file. Such a report can be used e.g. for offline browsing, archiving or sending by e-mail. All the assets are embedded inside the html file, so the report will work even when the LiQuer service is not available. Note: The embedded LiQuer queries will of course not work offline, but if necessary, the data comming out from LiQuer can be turned to a dataurl with link command; type of the link can be controlled by linktype state variable. Assuming linktype is not hardcoded (as in table command) all query links in the report could be turned to dataurls like this: filter - params / let - linktype - dataurl / report This of course could lead to extremply large report files, so it should be used carefully.","title":"Examples"},{"location":"examples/#hdx-disaggregation-wizard","text":"LiQuer is a small server-side framework that can be quite helpful when building data-oriented web applications. One such example is HDX disaggregation wizard. It is a tool solving a simple task: splitting (disaggregating) a single data sheet (csv or xlsx) into multiple sheets. Sheet is split by values in the specified column (or multiple columns). Since this is a quite generic task (related to group by ), this functionality is built into liquer.ext.lq_pandas . The core of this feature is the eq command, filtering dataframe by specific values in a column, e.g. eq-a-123 keeps in the dataframe only rows where column a is 123. Command eq (equal) accepts multiple column-value pairs, e.g. eq-a-123-b-234 . In HDX the convention is to use the first row for tags. To support this comvention, teq command (tag equal) always keeps the first row of the dataframe. The disaggregation service supports both tagged and untagged data, using either eq or teq for filtering, depending on the user input. The complete flow is simple: * fetch data (command df_from ) * find unique values in a column (or multiple columns) and use them to create a list (table) of queries (command split_df ) * the queries use eq (or teq ) to filter dataframe by value(s). So, use a query like df_from-URL/split_df-COLUMN and you will get a table with queries like df_from-URL/eq-COLUMN-VALUE1 , df_from-URL/eq-COLUMN-VALUE2 . A little detail regarding the split function: There are actually four versions of this function - depending whether it is used for tagged or untagged document and whether it is quick (or query ) splits or full splits. The quick version only provides raw LiQuer queries (not the complete URL), The full split ( split_df for untagged and tsplit_df for tagged data) execute all the split queries, which might be slow. As a side effect, the results are cached (depending on the configuration, the example is using FileCache('cache') ). The complete user interface is in a single html file hdx_wizard.html , served by the flask server. Inside the user interface, LiQuer service is called multiple times e.g. to get previews or metadata: * Data previews uses the ability of LiQuer lq_pandas to convert dataframes to json, which can easily be read into javascript on the browser. First preview uses head_df command to display only a restricted part of the dataframe ( head ) * columns_info command is used to get lit of columns and eventual tags * /api/build service is used to build valid queries from javascript lists. This could be implemented directly in javascript, build service is a way to remotely call liquer.parser.encode .","title":"HDX disaggregation wizard"},{"location":"examples/#integration-of-libhxl-example-of-a-custom-state-type","text":"Pandas is great, but there are other good libraries too e.g. tabulate . If you want to to use other data type (tabular or other), it will typically require (besides some useful commands) defining how that data can be serialized. This is done by implementing a state type . State type does several things associated with state type handling, but the most important role is handling serialization and deserialization. One excelent library used for working with humanitarian data is libhxl . Libhxl plays somewhat similar role as pandas: it reads, writes and manipulates tabular data - but it does as well understand HXL , which pandas doesn't - hence the liquer.ext.lq_hxl module. In order to allow libhxl objects to be used in liquer, we need to define a state type: HxlStateType . import hxl from liquer.state_types import StateType , register_state_type , mimetype_from_extension class HxlStateType ( StateType ): def identifier ( self ): \"Define an unique string identifier for the state type\" return \"hxl_dataset\" The identifier is important e.g. for caching, where it is stored as a part of metadata and it tells what StateType should be used for deserialization. def default_extension ( self ): \"Default file extension for the state type\" return \"csv\" def is_type_of ( self , data ): \"Check if data is of this state type\" return isinstance ( data , hxl . model . Dataset ) Default extension is used when the extension is not specified otherwise - for example if query does not end with a filename. The as_bytes and from_bytes are two most important methods, which take care of the serialization and deserialization. A state data can be serialized into multiple formats (e.g. csv, html, json...), therefore as_bytes optionally accepts a file extension and returns (besides the bytes) as well the mimetype. Th mimetype (when queried through the liquer server) becomes a part of the web service response. Note that serialization and deserialization do not necessarily need to support the same formats. E.g. html is quite nice to support in serialization, but it is too unspecific for a deserialization. def as_bytes ( self , data , extension = None ): \"\"\"Serialize data as bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () assert self . is_type_of ( data ) mimetype = mimetype_from_extension ( extension ) if extension == \"csv\" : output = \"\" . join ( data . gen_csv ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype elif extension == \"json\" : output = \"\" . join ( data . gen_json ( show_headers = True , show_tags = True )) return output . encode ( \"utf-8\" ), mimetype else : raise Exception ( f \"Serialization: file extension { extension } is not supported by HXL dataset type.\" ) def from_bytes ( self , b : bytes , extension = None ): \"\"\"De-serialize data from bytes File extension may be provided and influence the serialization format. \"\"\" if extension is None : extension = self . default_extension () f = BytesIO () f . write ( b ) f . seek ( 0 ) if extension == \"csv\" : return hxl . data ( f ) raise Exception ( f \"Deserialization: file extension { extension } is not supported by HXL dataset type.\" ) Sometimes a deep copy of state data is needed - e.g. to assure that the data in the cache will not become unintentionally modified. That's why the state type should define copy method. Since libhxl dataset is immutable (?), it is OK to return just the data without making a copy. def copy ( self , data ): \"\"\"Make a deep copy of the data\"\"\" return data Once the state type class is defined, a state type instance is created and registered HXL_DATASET_STATE_TYPE = HxlStateType () register_state_type ( hxl . Dataset , HXL_DATASET_STATE_TYPE ) register_state_type ( hxl . io . HXLReader , HXL_DATASET_STATE_TYPE ) This is (currently) done for all relevant types. State types are registered in a global StateTypesRegistry object, which is responsible for registering and finding a state type instance for any state data. For more details see liquer.ext.lq_hxl module. Actually, the state type may not define a serialization and/or deserialization. There are objects that either can't be reliably serialized (e.g. matplotlib figure - as of time of writing) or serialization is otherwise undesirable. Such state types would be perfectly legal - they just could be neither cached nor served by the liquer web server. However, they could be inside the query, e.g. if matplotlib figure would be followed by image creation command, the image could be both served and cached.","title":"Integration of libhxl (example of a custom state type)"},{"location":"examples/#reports-and-visualizations","text":"With the help of LiQuer, it is very easy to create both resuable visualizations with multiple views as well as documents viewable offline or suitable for printing. There are multiple markups suitable for creating reports and visualisations, but probably the easiest and most flexible are HTML documents. In LiQuer html can be easily created by returning a html text from a command. Creation of text is simplified by evaluate_template function, which processes a string ( template ) containing LiQuer queries and replaces those queries by their results. Report example is processing data from Global Food Prices Database (WFP) . It contains monthly prices for various commodities. To adapt the data to our needs we need a cople of extra commands: Month and year are in two separate columns mp_year and mp_month . For charts we need dates in YYYY-MM-DD format, which we achieve with the following command: @command def datemy ( df , y = \"mp_year\" , m = \"mp_month\" , target = \"date\" ): df . loc [:, target ] = [ \" %04d - %02d -01\" % ( int ( year ), int ( month )) for year , month in zip ( df [ y ], df [ m ])] return df To make statistics, it's handy to use pandas groupby. As an example we show count of groups, which used in the report to show number of observed prices in various markets: @command def count ( df , * groupby_columns ): df . loc [:, \"count\" ] = 1 return df . groupby ( groupby_columns ) . count () . reset_index () . loc [:, list ( groupby_columns ) + [ \"count\" ]] An example of a custom filter is a greater or equal command geq , used in the report to cut away years before a start year: @command def geq ( df , column , value : float ): index = df . loc [:, column ] >= value return df . loc [ index ,:] This is somewhat similar to eq command from the pandas support module liquer.ext.lq_pandas , but only supports numerical values, while the eq command is somewhat more general. Pandas dataframe supports quite flexible method to_html for converting dataframes to html format. Report uses for styling the popular css framework bootstrap and to display the tables nicely we just need to add some bootstrap css classes . Command as well prepends a link to the dataframe itself by the link command. This tends to be very useful in practice, allowing to conviniently import underlying raw data into a spreadsheet. @command def table ( state ): df = state . get () html = evaluate_template ( f \"\"\"(data) \"\"\" ) return html + df . to_html ( index = False , classes = \"table table-striped\" ) The core of the report is a report command. It can be applied on any dataframe containing suitable fields. This allows a large degree of flexibility - arbitrary filters can be inserted into a command chain before the report. For example, the current report can be restricted to specific markets, time periods or commodities without any additional code, just by modifying the URL. Report embeds a possibility to remove data pefore a from_year . This in principle could be done by inserting a geq command before the report (which would work fine). Passing from_year as an argument has an advantage, that the start year can become a part of the report (e.g. it can be used as a part of the title). Main part of the report is a single template, evaluated with evaluate_template . Note that LiQuer template uses as well string interpolation by python f string (PEP 498) , which is a very powerful combination. @command def report ( state , from_year = 2017 , linktype = None ): state = state . with_caching ( False ) def makelink ( url ): if linktype is None : return url extension = url . split ( \".\" )[ - 1 ] return evaluate ( f \"fetch- { encode_token ( url ) } /link- { linktype } - { extension } \" ) . get () try : source = state . sources [ 0 ] except : source = \"???\" LiQuer = ' LiQuer ' df = state . get () try : title = \",\" . join ( sorted ( df . adm0_name . unique ())) + f \" since { from_year } \" except : title = \"report\" return state . with_filename ( \"report.html\" ) . with_data ( evaluate_template ( f \"\"\" { title }
    \"HDX\"
    Generated with { LiQuer } © 2019 Orest Dubay




    { title }

    Data originate from   { source }   were processed via a { LiQuer } service. Only data after { from_year } are shown (data), complete data are  here.
    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-date-cm_name/plotly_chart-xys-date-mp_price-cm_name$
    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/plotly_chart-piexs-count-adm1_name$

    Average prices

    $ { state . query } /datemy/geq-mp_year- { from_year } /groupby_mean-mp_price-cm_name/table$

    Observations

    $ { state . query } /datemy/geq-mp_year- { from_year } /count-adm1_name/table$
    \"\"\" )) Inside the report command some more magic is used to handle links and external resources. Links are created by a nested function makelink . The main purpose is to allow three different regimes of working with links: * links to original sources (default), * serving (proxying) resources through LiQuer service and * dataurls. Links to original sources are useful if the report is used from a web service: the report size is then relatively small and thus the loading time is faster than for dataurls. Proxying resources through LiQuer service allows to cache resources by LiQuer. This may be useful on slower internet connections, when running the service without internet or behind a firewall. Dataurl link type allows saving the report as a single html file. Such a report can be used e.g. for offline browsing, archiving or sending by e-mail. All the assets are embedded inside the html file, so the report will work even when the LiQuer service is not available. Note: The embedded LiQuer queries will of course not work offline, but if necessary, the data comming out from LiQuer can be turned to a dataurl with link command; type of the link can be controlled by linktype state variable. Assuming linktype is not hardcoded (as in table command) all query links in the report could be turned to dataurls like this: filter - params / let - linktype - dataurl / report This of course could lead to extremply large report files, so it should be used carefully.","title":"Reports and visualizations"},{"location":"guide/","text":"Instalation LiQuer requires (at minimum) python 3.6 with flask. It can be installed by python3 -m pip install liquer-framework Alternatively you can get LiQuer from repository : git clone https://github.com/orest-d/liquer.git python3 -m venv venv source venv/bin/activate cd liquer python3 setup.py install Getting started The good tradition is starting with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) In this example we first create to commands - hello and greet . Commands are ordinary python functions decorated with either @first_command or @command . A sequence of commands can be written as a link query (the main concept of LiQuer). Simple query is a sequence of actions (commands with parameters) separated by slash (\"/\"). A query is evaluated from left to right, always passing the output as a first argument to the next action. For example the query hello/greet is roughly equivalent to evaluating greet ( hello ()) Query hello/greet-everybody (in the end of the example) is equivalent to greet ( hello (), \"everybody\" ) Here we specify the second argument to the function greet in the query. the arguments are separated by dash (\"-\"). (This choice might look unusual, but it allows using such a query as a part of URL . (Link query syntax requires treating \"/\" and \"-\" as special characters and escape them when needed - as we will explain in the query chapter. If the actions are always passing the result into the next action, what is passed into the very first action? The very first action in the pipeline will not receive anything as a first argument (more precisely, it will receive None ). To avoid having such a useless argument in commands that are used at the beginning of the query, (in our example the hello function), we can use the decorator @first_command instead of @command . This is more a convenience than necessity though. Commands and actions are explained in this chapter Queries can be executed in multiple ways in LiQuer (programatically from scripts or commands, from recipes/batch jobs or interactively from a web API). In this example we just evaluate them in the script by the evaluate function. What did we actually gain? Link query syntax allows to represent pipelines as short strings. More importantly, link query can be used as a path part of the URL . Unlike the more conventional web services typically a separate request for each action, link query can specify sequences of actions (pipelines) in the URL. This gives LiQuer an incredible amount of flexibility and expressiveness. LiQuer has a well-defined web service API A server version of the same example: from liquer import * from liquer.app import quickstart @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" if __name__ == '__main__' : quickstart ( index_link = \"/liquer/q/hello/greet/readme.txt\" ) This is a quick way how to start a liquer server. It should automatically call the link /liquer/q/hello/greet/readme.txt , which executes the query hello/greet . The result is exposed as readme.txt . The name (readme) is arbitrary, but the file extension (txt) is significant, since it determines the output format. The /liquer/q is an endpoint for executing a query (see web service API ). The quickstart is one of the simplest methods to start the LiQuer in server mode. LiQuer framework offers, however, many ways to configure and adapt the solution. Working with pandas Pandas example: from liquer import * import liquer.ext.lq_pandas @first_command def data (): return pd . DataFrame ( dict ( a = [ 1 , 2 , 3 ], b = [ 40 , 50 , 60 ])) @command def sum_columns ( df , column1 = \"a\" , column2 = \"b\" , target = \"c\" ): df . loc [:, target ] = df [ column1 ] + df [ column2 ] return df When queried via a web interface, the data is automatically converted to a most suitable format. If the last element of a query is a command without arguments containing a dot in the name, it is interpreted as a file name and the extension is used to determine the format. The format conversion only happens when query is run over the service, not when using the evaluate function. data - default format is used (which is csv) data/data.html - data is converted to html and displayed in the browser data/data.csv - data is converted to csv data/data.xlsx - dataframe is converted to xlsx data/eq-b-50 - built in equality filter selects rows with b==50 data/sum_columns - sum_columns is applied to a dataframe; this is equivalent to sum_columns(data()) data/sum_columns/sum_columns-a-c-d/sum2.html - multiple actions are chained: sum_columns(sum_columns(data()),\"a\",\"c\",\"d\") and result is served as html. df_from-URL - built in command loads a dataframe from URL Charts LiQuer has a rudimentary support for matplotlib and plotly residing in packages liquer.ext.lq_matplotlib and liquer.ext.lq_plotly Examples are in matplotlib_chart.py and plotly_chart.py show how to make simple plots with the commands already built in. This functionality is very basic at the moment and is likely to change.","title":"Introduction"},{"location":"guide/#instalation","text":"LiQuer requires (at minimum) python 3.6 with flask. It can be installed by python3 -m pip install liquer-framework Alternatively you can get LiQuer from repository : git clone https://github.com/orest-d/liquer.git python3 -m venv venv source venv/bin/activate cd liquer python3 setup.py install","title":"Instalation"},{"location":"guide/#getting-started","text":"The good tradition is starting with a Hello, world! example: from liquer import * @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" print ( evaluate ( \"hello/greet\" ) . get ()) print ( evaluate ( \"hello/greet-everybody\" ) . get ()) In this example we first create to commands - hello and greet . Commands are ordinary python functions decorated with either @first_command or @command . A sequence of commands can be written as a link query (the main concept of LiQuer). Simple query is a sequence of actions (commands with parameters) separated by slash (\"/\"). A query is evaluated from left to right, always passing the output as a first argument to the next action. For example the query hello/greet is roughly equivalent to evaluating greet ( hello ()) Query hello/greet-everybody (in the end of the example) is equivalent to greet ( hello (), \"everybody\" ) Here we specify the second argument to the function greet in the query. the arguments are separated by dash (\"-\"). (This choice might look unusual, but it allows using such a query as a part of URL . (Link query syntax requires treating \"/\" and \"-\" as special characters and escape them when needed - as we will explain in the query chapter. If the actions are always passing the result into the next action, what is passed into the very first action? The very first action in the pipeline will not receive anything as a first argument (more precisely, it will receive None ). To avoid having such a useless argument in commands that are used at the beginning of the query, (in our example the hello function), we can use the decorator @first_command instead of @command . This is more a convenience than necessity though. Commands and actions are explained in this chapter Queries can be executed in multiple ways in LiQuer (programatically from scripts or commands, from recipes/batch jobs or interactively from a web API). In this example we just evaluate them in the script by the evaluate function.","title":"Getting started"},{"location":"guide/#what-did-we-actually-gain","text":"Link query syntax allows to represent pipelines as short strings. More importantly, link query can be used as a path part of the URL . Unlike the more conventional web services typically a separate request for each action, link query can specify sequences of actions (pipelines) in the URL. This gives LiQuer an incredible amount of flexibility and expressiveness. LiQuer has a well-defined web service API A server version of the same example: from liquer import * from liquer.app import quickstart @first_command def hello (): return \"Hello\" @command def greet ( greeting , who = \"world\" ): return f \" { greeting } , { who } !\" if __name__ == '__main__' : quickstart ( index_link = \"/liquer/q/hello/greet/readme.txt\" ) This is a quick way how to start a liquer server. It should automatically call the link /liquer/q/hello/greet/readme.txt , which executes the query hello/greet . The result is exposed as readme.txt . The name (readme) is arbitrary, but the file extension (txt) is significant, since it determines the output format. The /liquer/q is an endpoint for executing a query (see web service API ). The quickstart is one of the simplest methods to start the LiQuer in server mode. LiQuer framework offers, however, many ways to configure and adapt the solution.","title":"What did we actually gain?"},{"location":"guide/#working-with-pandas","text":"Pandas example: from liquer import * import liquer.ext.lq_pandas @first_command def data (): return pd . DataFrame ( dict ( a = [ 1 , 2 , 3 ], b = [ 40 , 50 , 60 ])) @command def sum_columns ( df , column1 = \"a\" , column2 = \"b\" , target = \"c\" ): df . loc [:, target ] = df [ column1 ] + df [ column2 ] return df When queried via a web interface, the data is automatically converted to a most suitable format. If the last element of a query is a command without arguments containing a dot in the name, it is interpreted as a file name and the extension is used to determine the format. The format conversion only happens when query is run over the service, not when using the evaluate function. data - default format is used (which is csv) data/data.html - data is converted to html and displayed in the browser data/data.csv - data is converted to csv data/data.xlsx - dataframe is converted to xlsx data/eq-b-50 - built in equality filter selects rows with b==50 data/sum_columns - sum_columns is applied to a dataframe; this is equivalent to sum_columns(data()) data/sum_columns/sum_columns-a-c-d/sum2.html - multiple actions are chained: sum_columns(sum_columns(data()),\"a\",\"c\",\"d\") and result is served as html. df_from-URL - built in command loads a dataframe from URL","title":"Working with pandas"},{"location":"guide/#charts","text":"LiQuer has a rudimentary support for matplotlib and plotly residing in packages liquer.ext.lq_matplotlib and liquer.ext.lq_plotly Examples are in matplotlib_chart.py and plotly_chart.py show how to make simple plots with the commands already built in. This functionality is very basic at the moment and is likely to change.","title":"Charts"},{"location":"metadata/","text":"State variables In some situations it is useful to pass some values along the query. For example if we want to specify some value once and use it in multiple commands. from liquer import * from liquer.state import set_var import liquer.ext.basic @command def hello ( state , who = None ): if who is None : who = state . vars . get ( \"greet\" , \"???\" ) return f \"Hello, { who } !\" set_var ( \"greet\" , \"world\" ) print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : uses state variable defined above print ( evaluate ( \"state_variable-greet\" ) . get ()) # world : shows the content of the state variable print ( evaluate ( \"hello-everybody\" ) . get ()) # Hello, everybody! : uses the argument print ( evaluate ( \"let-greet-variable/hello\" ) . get ()) # Hello, variable! : defines the variable in the query print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : let is local to a query There are two variables that are important to set up in some cases: * server should contain the URL of the LiQuer server * api_path should contain the path to the query service So server + api_path + query should become a valid url that would yield a query result. Several commands (e.g. link or split_df) depend on correct definition of these variables, so they should be set together with setting up the flask blueprint - e.g. url_prefix = '/liquer' app . register_blueprint ( bp . app , url_prefix = url_prefix ) set_var ( \"api_path\" , url_prefix + \"/q/\" ) set_var ( \"server\" , \"http://localhost:5000\" )","title":"Metadata and state"},{"location":"metadata/#state-variables","text":"In some situations it is useful to pass some values along the query. For example if we want to specify some value once and use it in multiple commands. from liquer import * from liquer.state import set_var import liquer.ext.basic @command def hello ( state , who = None ): if who is None : who = state . vars . get ( \"greet\" , \"???\" ) return f \"Hello, { who } !\" set_var ( \"greet\" , \"world\" ) print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : uses state variable defined above print ( evaluate ( \"state_variable-greet\" ) . get ()) # world : shows the content of the state variable print ( evaluate ( \"hello-everybody\" ) . get ()) # Hello, everybody! : uses the argument print ( evaluate ( \"let-greet-variable/hello\" ) . get ()) # Hello, variable! : defines the variable in the query print ( evaluate ( \"hello\" ) . get ()) # Hello, world! : let is local to a query There are two variables that are important to set up in some cases: * server should contain the URL of the LiQuer server * api_path should contain the path to the query service So server + api_path + query should become a valid url that would yield a query result. Several commands (e.g. link or split_df) depend on correct definition of these variables, so they should be set together with setting up the flask blueprint - e.g. url_prefix = '/liquer' app . register_blueprint ( bp . app , url_prefix = url_prefix ) set_var ( \"api_path\" , url_prefix + \"/q/\" ) set_var ( \"server\" , \"http://localhost:5000\" )","title":"State variables"},{"location":"query/","text":"Query Basic query Query is in the simplest case composed out of a sequence of actions. Action is nothing else than a function (closure) with all arguments specified except the first one. All actions consume a single input and produce a single output - so they can be chained into a pipeline. In the Hello, world! example from earlier, the hello/greet-everybody is a sequence of two actions. The first action is hello does not have any explicit parameters. (Technically it accepts an input - but that input is ignored, so this function is suitable to be at the beginning of the pipeline.) The greet-everybody is and action calling a command greet with an argument \"everybody\". The general structure of a query is actions are separated by \"/\", arguments are separated by \"-\": identifier1-arg11-arg12/identifier2-arg21-arg22-arg23/identifier3-arg31... Action starts with an identifier, which is interpreted as a command name. In practice, the command is always defined via a python function with the same name. Therefore identifier must be a valid python identifier plus it can't start with an upper case. Thus \"my_command\" is a valid identifier, but \"MyCommand\", \"1command\" or \"my.command\" is not. Query is optionaly terminated by a filename. A valid filename must fulfill the following conditions: Filename is the last element in a query, i.e. there is no \"/\" after the filename. Filename must contain \".\". Filename can not contain \"-\". These rules assure that a filename can be recognized from a command name or an argument. For example readme.txt is a valid filename, but readme-now.txt would be interpreted as an action composed from a command readme with an argument \"now.txt\". The main role of a filename is to specify the file extension, which determines the format in which the data are serialized when saved or returned via a web interface. The filename before the extension is arbitrary. Escaping and query entities Query only allows to use the characters allowed in the path part of the URL, and the following characters have a special meaning: slash \"/\" separates the actions, dash \"-\" separates action arguments and query segments, tilde \"~\" is used as an escape character. There are two mechanisms that can be used for escaping: * Percentage encoding used for encoding special characters in URL - see e.g. https://en.wikipedia.org/wiki/Percent-encoding * Query entities are constructs starting with the tilde character \"~\". Query entities have special meaning, e.g. they can be used for encoding of \"-\", \"/\" and \"~\". Though encoding of these characters with the percentage encoding might work as well, it is safer to use query entities (tilde encoding). The following entities are defined: tilde entity \"~~\" expands to \"~\" minus entity \"~_\" expands to \"-\" slash entities \"~I\" and \"~/\" expand to \"/\" https entity \"~H\" expands to \"https://\" http entity \"~h\" expands to \"http://\" file entity \"~f\" expands to \"file://\" protocol entity \"~P\" expands to \"://\" negative number entities \"~0\",\"~1\",\"~2\" ... \"~9\" expand to \"-0\", \"-1\", \"-2\" ... \"-9\". (This is a more convenient alternative syntax for writing negative numbers like \"~123\" instead of \"~_123\". space entity \"~.\" expands to \" \" expand entity \"~X~ query ~E\" evaluates the query and expands to a result end_entity \"~E\" is not a real entity, but can only be part of a complex entity like the expand entity . Expand entity supports two types of queries - absolute starting with \"/\" and relative (not starting with \"/\"). Absolute entities are simply evaluated as they are, but relative entities are pre-pended with the current query before the execution. For example hello/greet-~X~/everybody~E is interpreted as greet(hello(), everybody()) , but the relative query in an argument hello/greet-~X~everybody~E is interpreted as greet(hello(), everybody(hello())) .","title":"Query syntax"},{"location":"query/#query","text":"","title":"Query"},{"location":"query/#basic-query","text":"Query is in the simplest case composed out of a sequence of actions. Action is nothing else than a function (closure) with all arguments specified except the first one. All actions consume a single input and produce a single output - so they can be chained into a pipeline. In the Hello, world! example from earlier, the hello/greet-everybody is a sequence of two actions. The first action is hello does not have any explicit parameters. (Technically it accepts an input - but that input is ignored, so this function is suitable to be at the beginning of the pipeline.) The greet-everybody is and action calling a command greet with an argument \"everybody\". The general structure of a query is actions are separated by \"/\", arguments are separated by \"-\": identifier1-arg11-arg12/identifier2-arg21-arg22-arg23/identifier3-arg31... Action starts with an identifier, which is interpreted as a command name. In practice, the command is always defined via a python function with the same name. Therefore identifier must be a valid python identifier plus it can't start with an upper case. Thus \"my_command\" is a valid identifier, but \"MyCommand\", \"1command\" or \"my.command\" is not. Query is optionaly terminated by a filename. A valid filename must fulfill the following conditions: Filename is the last element in a query, i.e. there is no \"/\" after the filename. Filename must contain \".\". Filename can not contain \"-\". These rules assure that a filename can be recognized from a command name or an argument. For example readme.txt is a valid filename, but readme-now.txt would be interpreted as an action composed from a command readme with an argument \"now.txt\". The main role of a filename is to specify the file extension, which determines the format in which the data are serialized when saved or returned via a web interface. The filename before the extension is arbitrary.","title":"Basic query"},{"location":"query/#escaping-and-query-entities","text":"Query only allows to use the characters allowed in the path part of the URL, and the following characters have a special meaning: slash \"/\" separates the actions, dash \"-\" separates action arguments and query segments, tilde \"~\" is used as an escape character. There are two mechanisms that can be used for escaping: * Percentage encoding used for encoding special characters in URL - see e.g. https://en.wikipedia.org/wiki/Percent-encoding * Query entities are constructs starting with the tilde character \"~\". Query entities have special meaning, e.g. they can be used for encoding of \"-\", \"/\" and \"~\". Though encoding of these characters with the percentage encoding might work as well, it is safer to use query entities (tilde encoding). The following entities are defined: tilde entity \"~~\" expands to \"~\" minus entity \"~_\" expands to \"-\" slash entities \"~I\" and \"~/\" expand to \"/\" https entity \"~H\" expands to \"https://\" http entity \"~h\" expands to \"http://\" file entity \"~f\" expands to \"file://\" protocol entity \"~P\" expands to \"://\" negative number entities \"~0\",\"~1\",\"~2\" ... \"~9\" expand to \"-0\", \"-1\", \"-2\" ... \"-9\". (This is a more convenient alternative syntax for writing negative numbers like \"~123\" instead of \"~_123\". space entity \"~.\" expands to \" \" expand entity \"~X~ query ~E\" evaluates the query and expands to a result end_entity \"~E\" is not a real entity, but can only be part of a complex entity like the expand entity . Expand entity supports two types of queries - absolute starting with \"/\" and relative (not starting with \"/\"). Absolute entities are simply evaluated as they are, but relative entities are pre-pended with the current query before the execution. For example hello/greet-~X~/everybody~E is interpreted as greet(hello(), everybody()) , but the relative query in an argument hello/greet-~X~everybody~E is interpreted as greet(hello(), everybody(hello())) .","title":"Escaping and query entities"},{"location":"recipes/","text":"","title":"Recipes"},{"location":"security/","text":"Security LiQuer was so far only deployed on intranet. More development is needed to make interent deployment of LiQuer safe. LiQuer exposes only services defined in the liquer.blueprint module - and by extension all the registered commands. Only enable commands that do not put your system to risk. A big source of security concerns are DOS attacks: * It is easy to overload LiQuer server with huge queries. To solve this issue, queries need to be validated in some way. * Badly implemented cache may quickly exceed the storage capacity. (Default NoCache is a safe choice in this respect.)","title":"Security"},{"location":"security/#security","text":"LiQuer was so far only deployed on intranet. More development is needed to make interent deployment of LiQuer safe. LiQuer exposes only services defined in the liquer.blueprint module - and by extension all the registered commands. Only enable commands that do not put your system to risk. A big source of security concerns are DOS attacks: * It is easy to overload LiQuer server with huge queries. To solve this issue, queries need to be validated in some way. * Badly implemented cache may quickly exceed the storage capacity. (Default NoCache is a safe choice in this respect.)","title":"Security"},{"location":"store/","text":"Store Store is a configurable virtual file system inside liquer. Store is designed to be able to deal with states. One notable extension of the Store compared to a regular file system is the ability to store (and work with) the metadata, which is essential for dealing with data in liquer. Store is basically a key/value store mapping a path to a sequence of bytes. By itself, Store does not define (or care) about serialization of the data. This differenciates it from Cache . Even though the interface to Cache and Store is intentionaly very similar, these two mechanisms are different: Cache keeps State (i.e. data object with metadata). Cache deals with objects and stores State perhaps in a non-serialized form (e.g. MemoryStore). Store keeps resources - i.e. arbitrary binary data ( bytes ) complemented with metadata. One purpose of a Store is to provide an option to serve files into the pipeline. The pipeline may start with a resource path followed by a sequence of actions. Cache By default there is no cache - i.e. the queries are always re-evaluated. There are several cache implementations available in liquer.cache . They are configured by set_cache function, for example set_cache ( FileCache ( \"cache\" )) configures a cache that will store all the (chache-able) results of queries in a dictionary cache . Cache should be configured before the queries are evaluated - and before state variables are set. Currently there are three cache implementations: NoCache is a trivial do-nothing cache, FileCache stores data in files, MemoryCache caches the object in the memory. Custom cache can be created by defining a cache interface, see above mentioned classes. Cache will typically use query as a key and utilize the mechanism of serializing data into a bytes sequence (defined in liquer.state_types ), thus implementing a cache based either on a key-value store or blob-storage in SQL databases should be fairly straightforward (and probably quite similar to FileCache ). Command may optionally decide not to cache its output. This may be useful when command produces volatile data, e.g. time. In such a case command (operating on a state) can disable cache by state.with_caching(False) .","title":"Store and Cache"},{"location":"store/#store","text":"Store is a configurable virtual file system inside liquer. Store is designed to be able to deal with states. One notable extension of the Store compared to a regular file system is the ability to store (and work with) the metadata, which is essential for dealing with data in liquer. Store is basically a key/value store mapping a path to a sequence of bytes. By itself, Store does not define (or care) about serialization of the data. This differenciates it from Cache . Even though the interface to Cache and Store is intentionaly very similar, these two mechanisms are different: Cache keeps State (i.e. data object with metadata). Cache deals with objects and stores State perhaps in a non-serialized form (e.g. MemoryStore). Store keeps resources - i.e. arbitrary binary data ( bytes ) complemented with metadata. One purpose of a Store is to provide an option to serve files into the pipeline. The pipeline may start with a resource path followed by a sequence of actions.","title":"Store"},{"location":"store/#cache","text":"By default there is no cache - i.e. the queries are always re-evaluated. There are several cache implementations available in liquer.cache . They are configured by set_cache function, for example set_cache ( FileCache ( \"cache\" )) configures a cache that will store all the (chache-able) results of queries in a dictionary cache . Cache should be configured before the queries are evaluated - and before state variables are set. Currently there are three cache implementations: NoCache is a trivial do-nothing cache, FileCache stores data in files, MemoryCache caches the object in the memory. Custom cache can be created by defining a cache interface, see above mentioned classes. Cache will typically use query as a key and utilize the mechanism of serializing data into a bytes sequence (defined in liquer.state_types ), thus implementing a cache based either on a key-value store or blob-storage in SQL databases should be fairly straightforward (and probably quite similar to FileCache ). Command may optionally decide not to cache its output. This may be useful when command produces volatile data, e.g. time. In such a case command (operating on a state) can disable cache by state.with_caching(False) .","title":"Cache"},{"location":"web_service/","text":"Web service Web service is typically installed with an absolute paths starting with /liquer , e.g. /liquer/q/hello . Though this can be changed (and should be fully configurable in the future), some extensions currently (e.g. liquer gui ) currently rely on this absolute location. Core service for query execution Route /q/QUERY (GET, POST) Main service for evaluating queries. Service allows to supply named arguments, that will be passed to the last command in the query. These arguments can be passed as URL query or POSTed as JSON dictionary. Route /submit/QUERY (GET) Main service for evaluating queries. Like /q/QUERY , but the QUERY is executed in the background. Service returns status as a JSON document. Status contains status OK or ERROR message short text message describing the status of the submission query query that was submitted Cache interface Route/api/cache/get/QUERY (GET, POST) FIXME: POST not implemented(?) Get cached data. If the result of the QUERY is stored in cache, it is returned immediately, otherwise the call fails. POST method may be supported, which allows using the service as a remote cache. Route /api/cache/meta/QUERY (GET, POST) Get cached metadata as JSON for QUERY if available, a status JSON otherwise: POST method may be supported, which allows using the service as a remote cache. status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when cached. Route /api/cache/remove/QUERY (GET) FIXME: Support http DELETE ? Interface to cache remove. Removes the query from cache. Returns status JSON: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted removed boolean, True when the remove operation was successful. /api/cache/contains/QUERY (GET) Interface to cache contains. Returns whether QUERY is cached in a JSON status document: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when query is in the cache Route /api/cache/keys.json (GET) FIXME Remove .json or unify with /api/store Interface to cache keys. Returns list of all keys in the cache as a JSON list. Route /api/cache/clean (GET) Interface to cache clean. Cleans the whole cache. Returns a JSON document. status OK or ERROR message Short text describing the result of the operation. Miscellaneous services Route /api/commands.json (GET) Returns a list of commands in json format Route /api/debug-json/QUERY (GET) FIXME: Obsolete? Debug query - returns metadata from a state after a query is evaluated Route /api/build (POST) FIXME: Obsolete? Build a query from a posted decoded query (list of lists of strings). Result is a dictionary with encoded query and link. Route /api/register_command/DATA (GET, POST) Remote command registration service. This has to be enabled by liquer.commands.enable_remote_registration() WARNING : Remote command registration allows to deploy arbitrary python code on LiQuer server , therefore it is a HUGE SECURITY RISK and it only should be used if other security measures are taken ( e . g . on localhost or intranet where only trusted users have access ). This is on by default on Jupyter server extension . Store interface Route /api/store/data/QUERY, (GET, POST) Get or set data in store. GET method is equivalent to Store.get_bytes. Content type (MIME) is obtained from the metadata. POST method sets data in store. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument Route /api/store/upload/KEY (POST, optional GET) Upload data to store - similar to /api/store/data, but using upload. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument GET method (if supported) may return a basic html interface to facilitate the file upload. Route /api/store/metadata/KEY (GET, POST) FIXME: KEY, not QUERY Getting or setting the metadata for KEY. On successful GET returns the metadata as JSON. Otherwise a status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /web/KEY (GET) FIXME: it should be key, not query in the code Shortcut to the 'web' directory in the store. Similar to /store/data/web, except the index.html is automatically added if query is a directory. The 'web' directory hosts web applications and visualization tools, e.g. liquer-pcv or liquer-gui. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/stored_metadata/QUERY (GET) Get metadata stored in a store or cache. This will not trigger an execution of a query or recipe. FIXME: Make sure that recipes are not executed. Route /api/store/remove/KEY (GET) Remove key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/removedir/KEY (GET) Remove directory key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/contains/KEY (GET) Check whether the KEY exists in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) contains result of store.contains if operation is successful (true if KEY is in the store) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/is_dir/KEY (GET) Check whether the KEY is a directory in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) is_dir result of store.is_dir if operation is successful (true if KEY is a directory) query QUERY passed as an argument key FIXME: KEY passed as an argument Route /api/store/keys (GET) Return the list of keys in the store. Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) keys list of keys (on success) Route /api/store/listdir/KEY (GET) Get list of names in a directory KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) listdir list of names (on success) FIXME key, query Route /api/store/makedir/KEY (GET) Make a directory specified by KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Web service"},{"location":"web_service/#web-service","text":"Web service is typically installed with an absolute paths starting with /liquer , e.g. /liquer/q/hello . Though this can be changed (and should be fully configurable in the future), some extensions currently (e.g. liquer gui ) currently rely on this absolute location.","title":"Web service"},{"location":"web_service/#core-service-for-query-execution","text":"","title":"Core service for query execution"},{"location":"web_service/#route-qquery-get-post","text":"Main service for evaluating queries. Service allows to supply named arguments, that will be passed to the last command in the query. These arguments can be passed as URL query or POSTed as JSON dictionary.","title":"Route /q/QUERY (GET, POST)"},{"location":"web_service/#route-submitquery-get","text":"Main service for evaluating queries. Like /q/QUERY , but the QUERY is executed in the background. Service returns status as a JSON document. Status contains status OK or ERROR message short text message describing the status of the submission query query that was submitted","title":"Route /submit/QUERY (GET)"},{"location":"web_service/#cache-interface","text":"","title":"Cache interface"},{"location":"web_service/#routeapicachegetquery-get-post","text":"FIXME: POST not implemented(?) Get cached data. If the result of the QUERY is stored in cache, it is returned immediately, otherwise the call fails. POST method may be supported, which allows using the service as a remote cache.","title":"Route/api/cache/get/QUERY (GET, POST)"},{"location":"web_service/#route-apicachemetaquery-get-post","text":"Get cached metadata as JSON for QUERY if available, a status JSON otherwise: POST method may be supported, which allows using the service as a remote cache. status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when cached.","title":"Route /api/cache/meta/QUERY (GET, POST)"},{"location":"web_service/#route-apicacheremovequery-get","text":"FIXME: Support http DELETE ? Interface to cache remove. Removes the query from cache. Returns status JSON: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted removed boolean, True when the remove operation was successful.","title":"Route /api/cache/remove/QUERY (GET)"},{"location":"web_service/#apicachecontainsquery-get","text":"Interface to cache contains. Returns whether QUERY is cached in a JSON status document: status FIXME message (FIXME: missing) short text message describing the status of the submission query query that was submitted cached boolean, True when query is in the cache","title":"/api/cache/contains/QUERY (GET)"},{"location":"web_service/#route-apicachekeysjson-get","text":"FIXME Remove .json or unify with /api/store Interface to cache keys. Returns list of all keys in the cache as a JSON list.","title":"Route /api/cache/keys.json (GET)"},{"location":"web_service/#route-apicacheclean-get","text":"Interface to cache clean. Cleans the whole cache. Returns a JSON document. status OK or ERROR message Short text describing the result of the operation.","title":"Route /api/cache/clean (GET)"},{"location":"web_service/#miscellaneous-services","text":"","title":"Miscellaneous services"},{"location":"web_service/#route-apicommandsjson-get","text":"Returns a list of commands in json format","title":"Route /api/commands.json (GET)"},{"location":"web_service/#route-apidebug-jsonquery-get","text":"FIXME: Obsolete? Debug query - returns metadata from a state after a query is evaluated","title":"Route /api/debug-json/QUERY (GET)"},{"location":"web_service/#route-apibuild-post","text":"FIXME: Obsolete? Build a query from a posted decoded query (list of lists of strings). Result is a dictionary with encoded query and link.","title":"Route /api/build (POST)"},{"location":"web_service/#route-apiregister_commanddata-get-post","text":"Remote command registration service. This has to be enabled by liquer.commands.enable_remote_registration() WARNING : Remote command registration allows to deploy arbitrary python code on LiQuer server , therefore it is a HUGE SECURITY RISK and it only should be used if other security measures are taken ( e . g . on localhost or intranet where only trusted users have access ). This is on by default on Jupyter server extension .","title":"Route /api/register_command/DATA (GET, POST)"},{"location":"web_service/#store-interface","text":"","title":"Store interface"},{"location":"web_service/#route-apistoredataquery-get-post","text":"Get or set data in store. GET method is equivalent to Store.get_bytes. Content type (MIME) is obtained from the metadata. POST method sets data in store. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument","title":"Route /api/store/data/QUERY, (GET, POST)"},{"location":"web_service/#route-apistoreuploadkey-post-optional-get","text":"Upload data to store - similar to /api/store/data, but using upload. Equivalent to Store.store. Unlike store method, which stores both data and metadata in one call, the api/store/data POST only stores the data. The metadata needs to be set in a separate POST of api/store/metadata either before or after the api/store/data POST. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument GET method (if supported) may return a basic html interface to facilitate the file upload.","title":"Route /api/store/upload/KEY (POST, optional GET)"},{"location":"web_service/#route-apistoremetadatakey-get-post","text":"FIXME: KEY, not QUERY Getting or setting the metadata for KEY. On successful GET returns the metadata as JSON. Otherwise a status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/metadata/KEY (GET, POST)"},{"location":"web_service/#route-webkey-get","text":"FIXME: it should be key, not query in the code Shortcut to the 'web' directory in the store. Similar to /store/data/web, except the index.html is automatically added if query is a directory. The 'web' directory hosts web applications and visualization tools, e.g. liquer-pcv or liquer-gui. On failure, a 404 error is returned with a JSON in the body: status ERROR message Short text describing the error. (typically a python traceback) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /web/KEY (GET)"},{"location":"web_service/#route-apistored_metadataquery-get","text":"Get metadata stored in a store or cache. This will not trigger an execution of a query or recipe. FIXME: Make sure that recipes are not executed.","title":"Route /api/stored_metadata/QUERY (GET)"},{"location":"web_service/#route-apistoreremovekey-get","text":"Remove key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/remove/KEY (GET)"},{"location":"web_service/#route-apistoreremovedirkey-get","text":"Remove directory key from store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/removedir/KEY (GET)"},{"location":"web_service/#route-apistorecontainskey-get","text":"Check whether the KEY exists in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) contains result of store.contains if operation is successful (true if KEY is in the store) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/contains/KEY (GET)"},{"location":"web_service/#route-apistoreis_dirkey-get","text":"Check whether the KEY is a directory in the store. FIXME KEY FIXME support http DELETE Status JSON document is returned: status OK or ERROR message Short text describing the status. (typically a python traceback on error) is_dir result of store.is_dir if operation is successful (true if KEY is a directory) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/is_dir/KEY (GET)"},{"location":"web_service/#route-apistorekeys-get","text":"Return the list of keys in the store. Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) keys list of keys (on success)","title":"Route /api/store/keys (GET)"},{"location":"web_service/#route-apistorelistdirkey-get","text":"Get list of names in a directory KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) listdir list of names (on success) FIXME key, query","title":"Route /api/store/listdir/KEY (GET)"},{"location":"web_service/#route-apistoremakedirkey-get","text":"Make a directory specified by KEY . FIXME KEY Returns JSON document with a result: status OK or ERROR message Short text describing the status. (typically a python traceback on error) query QUERY passed as an argument key FIXME: KEY passed as an argument","title":"Route /api/store/makedir/KEY (GET)"}]} \ No newline at end of file diff --git a/site/sitemap.xml.gz b/site/sitemap.xml.gz index ad296c29c102f3330a4271565164004aa2209724..58a6a936044f79056f9fe1578959fa691d5bc501 100644 GIT binary patch delta 14 Vcmb=g=aBE_;9!iYPo2n74gejN1QGxM delta 14 Vcmb=g=aBE_;CLGII%Oh9IRGW<1z`XH