Fix execution on UNIX systems and release v0.0.3. (#4)

pytask-dev · Jan 16, 2021 · 1e0e854 · 1e0e854
1 parent 7b797f2
commit 1e0e854
Show file tree

Hide file tree

Showing 17 changed files with 178 additions and 65 deletions.
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -3,12 +3,20 @@ repos:
     rev: v3.4.0
     hooks:
     -   id: check-added-large-files
-        args: ['--maxkb=100']
+        args: ['--maxkb=25']
+    -   id: check-case-conflict
     -   id: check-merge-conflict
+    -   id: check-vcs-permalinks
     -   id: check-yaml
         exclude: meta.yaml
     -   id: debug-statements
     -   id: end-of-file-fixer
+    -   id: fix-byte-order-marker
+    -   id: forbid-new-submodules
+    -   id: mixed-line-ending
+    -   id: no-commit-to-branch
+        args: [--branch, main]
+    -   id: trailing-whitespace
 -   repo: https://github.com/pre-commit/pygrep-hooks
     rev: v1.7.0  # Use the ref you want to point at
     hooks:

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -6,6 +6,11 @@ chronological order. Releases follow `semantic versioning <https://semver.org/>`
 all releases are available on `Anaconda.org <https://anaconda.org/pytask/pytask-stata>`_.
 
 
+0.0.3 - 2021-01-16
+------------------
+
+- :gh:`4` removes log file handling on UNIX and raises an error if run in parallel.
+
 0.0.2 - 2020-10-30
 ------------------
 

diff --git a/README.rst b/README.rst
@@ -10,6 +10,10 @@
 .. image:: https://codecov.io/gh/pytask-dev/pytask-stata/branch/main/graph/badge.svg
     :target: https://codecov.io/gh/pytask-dev/pytask-stata
 
+.. image:: https://results.pre-commit.ci/badge/github/pytask-dev/pytask-stata/main.svg
+    :target: https://results.pre-commit.ci/latest/github/pytask-dev/pytask-stata/main
+    :alt: pre-commit.ci status
+
 .. image:: https://img.shields.io/badge/code%20style-black-000000.svg
     :target: https://github.com/psf/black
 
@@ -57,6 +61,9 @@ Here is an example where you want to run ``script.do``.
     def task_run_do_file():
         pass
 
+When executing a do-file, the current working directory changes to the directory of the
+script which is executed.
+
 
 Multiple dependencies and products
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
@@ -103,8 +110,8 @@ for a ``"source"`` key in the dictionary and, secondly, under the key ``0``.
 Command Line Arguments
 ~~~~~~~~~~~~~~~~~~~~~~
 
-The decorator can be used to pass command line arguments to your Stata executable which
-is not done, by default, but you could pass the path of the product with
+The decorator can be used to pass command line arguments to your Stata executable. For
+example, pass the path of the product with
 
 .. code-block:: python
 
@@ -124,10 +131,12 @@ And in your ``script.do``, you can intercept the value with
     sysuse auto, clear
     save "`produces'"
 
-Note that this solution only works if your current working directory is the same as the
-directory where the task file lives. It is because Stata does not swap directories. To
-make the task independent from the current working directory, pass the full path as an
-command line argument. Here is an example.
+The relative path inside the do-file works only because the pytask-stata switches the
+current working directory to the directory of the do-file before the task is executed.
+This is necessary precaution.
+
+To make the task independent from the current working directory, pass the full path as
+an command line argument. Here is an example.
 
 .. code-block:: python
 
@@ -178,7 +187,7 @@ include the ``@pytask.mark.stata`` decorator in the parametrization just like wi
 Configuration
 -------------
 
-pytask-stata offers new some new configuration values.
+pytask-stata can be configured with the following options.
 
 stata_keep_log
     Use this option to keep the ``.log`` files which are produced for every task. This

diff --git a/environment.yml b/environment.yml
@@ -24,3 +24,4 @@ dependencies:
   - pytest-cov
   - pytest-xdist
   - tox-conda
+  - virtualenv=20.0.33
diff --git a/setup.cfg b/setup.cfg
@@ -1,7 +1,7 @@
 [bumpversion]
-current_version = 0.0.2
+current_version = 0.0.3
 parse = (?P<major>\d+)\.(?P<minor>\d+)(\.(?P<patch>\d+))(\-?((dev)?(?P<dev>\d+))?)
-serialize = 
+serialize =
 	{major}.{minor}.{patch}dev{dev}
 	{major}.{minor}.{patch}
 

diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 
 setup(
     name="pytask-stata",
-    version="0.0.2",
+    version="0.0.3",
     packages=find_packages(where="src"),
     package_dir={"": "src"},
     entry_points={"pytask": ["pytask_stata = pytask_stata.plugin"]},

diff --git a/src/pytask_stata/__init__.py b/src/pytask_stata/__init__.py
@@ -1 +1 @@
-__version__ = "0.0.2"
+__version__ = "0.0.3"
diff --git a/src/pytask_stata/collect.py b/src/pytask_stata/collect.py
@@ -2,9 +2,9 @@
 import copy
 import functools
 import subprocess
-from pathlib import Path
 from typing import Iterable
 from typing import Optional
+from typing import Sequence
 from typing import Union
 
 from _pytask.config import hookimpl
@@ -14,6 +14,7 @@
 from _pytask.nodes import PythonFunctionTask
 from _pytask.parametrize import _copy_func
 from pytask_stata.shared import convert_task_id_to_name_of_log_file
+from pytask_stata.shared import get_node_from_dictionary
 
 
 def stata(options: Optional[Union[str, Iterable[str]]] = None):
@@ -25,16 +26,15 @@ def stata(options: Optional[Union[str, Iterable[str]]] = None):
         One or multiple command line options passed to Stata.
 
     """
-    if options is None:
-        options = []
-    elif isinstance(options, str):
-        options = [options]
+    options = _to_list(options) if options is not None else []
+    options = [str(i) for i in options]
     return options
 
 
-def run_stata_script(stata):
+def run_stata_script(stata, cwd):
     """Run an R script."""
-    subprocess.run(stata, check=True)
+    print("Executing " + " ".join(stata) + ".")  # noqa: T001
+    subprocess.run(stata, cwd=cwd, check=True)
 
 
 @hookimpl
@@ -58,7 +58,7 @@ def pytask_collect_task(session, path, name, obj):
 def pytask_collect_task_teardown(session, task):
     """Perform some checks and prepare the task function."""
     if get_specific_markers_from_task(task, "stata"):
-        source = _get_node_from_dictionary(
+        source = get_node_from_dictionary(
             task.depends_on, session.config["stata_source_key"]
         )
         if not (isinstance(source, FilePathNode) and source.value.suffix == ".do"):
@@ -72,19 +72,13 @@ def pytask_collect_task_teardown(session, task):
         merged_marks = _merge_all_markers(task)
         args = stata(*merged_marks.args, **merged_marks.kwargs)
         options = _prepare_cmd_options(session, task, args)
-        stata_function = functools.partial(stata_function, stata=options)
+        stata_function = functools.partial(
+            stata_function, stata=options, cwd=task.path.parent
+        )
 
         task.function = stata_function
 
 
-def _get_node_from_dictionary(obj, key, fallback=0):
-    if isinstance(obj, Path):
-        pass
-    elif isinstance(obj, dict):
-        obj = obj.get(key) or obj.get(fallback)
-    return obj
-
-
 def _merge_all_markers(task):
     """Combine all information from markers for the Stata function."""
     stata_marks = get_specific_markers_from_task(task, "stata")
@@ -101,15 +95,45 @@ def _prepare_cmd_options(session, task, args):
     is unique and does not cause any errors when parallelizing the execution.
 
     """
-    source = _get_node_from_dictionary(
+    source = get_node_from_dictionary(
         task.depends_on, session.config["stata_source_key"]
     )
-    log_name = convert_task_id_to_name_of_log_file(task.name)
-    return [
+
+    cmd_options = [
         session.config["stata"],
         "-e",
         "do",
-        source.value.as_posix(),
+        source.path.as_posix(),
         *args,
-        f"-{log_name}",
     ]
+    if session.config["platform"] == "win32":
+        log_name = convert_task_id_to_name_of_log_file(task.name)
+        cmd_options.append(f"-{log_name}")
+
+    return cmd_options
+
+
+def _to_list(scalar_or_iter):
+    """Convert scalars and iterables to list.
+
+    Parameters
+    ----------
+    scalar_or_iter : str or list
+
+    Returns
+    -------
+    list
+
+    Examples
+    --------
+    >>> _to_list("a")
+    ['a']
+    >>> _to_list(["b"])
+    ['b']
+
+    """
+    return (
+        [scalar_or_iter]
+        if isinstance(scalar_or_iter, str) or not isinstance(scalar_or_iter, Sequence)
+        else list(scalar_or_iter)
+    )
diff --git a/src/pytask_stata/config.py b/src/pytask_stata/config.py
@@ -1,5 +1,6 @@
 """Configure pytask."""
 import shutil
+import sys
 
 from _pytask.config import hookimpl
 from _pytask.shared import convert_truthy_or_falsy_to_bool
@@ -11,6 +12,7 @@
 def pytask_parse_config(config, config_from_cli, config_from_file):
     """Register the r marker."""
     config["markers"]["stata"] = "Tasks which are executed with Stata."
+    config["platform"] = sys.platform
 
     if config_from_file.get("stata"):
         config["stata"] = config_from_file["stata"]

diff --git a/src/pytask_stata/execute.py b/src/pytask_stata/execute.py
@@ -4,6 +4,7 @@
 from _pytask.config import hookimpl
 from _pytask.mark import get_specific_markers_from_task
 from pytask_stata.shared import convert_task_id_to_name_of_log_file
+from pytask_stata.shared import get_node_from_dictionary
 from pytask_stata.shared import STATA_COMMANDS
 
 
@@ -34,8 +35,14 @@ def pytask_execute_task_teardown(session, task):
 
     """
     if get_specific_markers_from_task(task, "stata"):
-        log_name = convert_task_id_to_name_of_log_file(task.name)
-        path_to_log = task.path.with_name(log_name).with_suffix(".log")
+        if session.config["platform"] == "win32":
+            log_name = convert_task_id_to_name_of_log_file(task.name)
+            path_to_log = task.path.with_name(log_name).with_suffix(".log")
+        else:
+            source = get_node_from_dictionary(
+                task.depends_on, session.config["stata_source_key"]
+            )
+            path_to_log = source.path.with_suffix(".log")
 
         n_lines = session.config["stata_check_log_lines"]
 

diff --git a/src/pytask_stata/parametrize.py b/src/pytask_stata/parametrize.py
@@ -8,4 +8,4 @@ def pytask_parametrize_kwarg_to_marker(obj, kwargs):
     """Attach parametrized stata arguments to the function with a marker."""
     if callable(obj):
         if "stata" in kwargs:
-            mark.stata(*kwargs.pop("stata"))(obj)
+            mark.stata(kwargs.pop("stata"))(obj)
diff --git a/src/pytask_stata/shared.py b/src/pytask_stata/shared.py
@@ -1,6 +1,7 @@
 """Shared functions and variables."""
 import sys
 
+
 if sys.platform == "darwin":
     STATA_COMMANDS = [
         "Stata64MP",
@@ -54,3 +55,9 @@ def convert_task_id_to_name_of_log_file(id_):
     id_without_parent_directories = id_.rsplit("/")[-1]
     converted_id = id_without_parent_directories.replace(".", "_").replace("::", "_")
     return converted_id
+
+
+def get_node_from_dictionary(obj, key, fallback=0):
+    if isinstance(obj, dict):
+        obj = obj.get(key) or obj.get(fallback)
+    return obj
diff --git a/tests/test_collect.py b/tests/test_collect.py
@@ -4,12 +4,12 @@
 import pytest
 from _pytask.mark import Mark
 from _pytask.nodes import FilePathNode
-from pytask_stata.collect import _get_node_from_dictionary
 from pytask_stata.collect import _merge_all_markers
 from pytask_stata.collect import _prepare_cmd_options
 from pytask_stata.collect import pytask_collect_task
 from pytask_stata.collect import pytask_collect_task_teardown
 from pytask_stata.collect import stata
+from pytask_stata.shared import get_node_from_dictionary
 
 
 class DummyClass:
@@ -65,26 +65,34 @@ def test_merge_all_markers(marks, expected):
     ],
 )
 @pytest.mark.parametrize("stata_source_key", ["source", "do"])
-def test_prepare_cmd_options(args, stata_source_key):
+@pytest.mark.parametrize("platform", ["win32", "linux", "darwin"])
+def test_prepare_cmd_options(args, stata_source_key, platform):
     session = DummyClass()
-    session.config = {"stata": "stata", "stata_source_key": stata_source_key}
+    session.config = {
+        "stata": "stata",
+        "stata_source_key": stata_source_key,
+        "platform": platform,
+    }
 
     node = DummyClass()
-    node.value = Path("script.do")
+    node.path = Path("script.do")
     task = DummyClass()
     task.depends_on = {stata_source_key: node}
     task.name = "task"
 
     result = _prepare_cmd_options(session, task, args)
 
-    assert result == [
+    expected = [
         "stata",
         "-e",
         "do",
         "script.do",
         *args,
-        "-task",
     ]
+    if platform == "win32":
+        expected.append("-task")
+
+    assert result == expected
 
 
 @pytest.mark.unit
@@ -115,9 +123,14 @@ def test_pytask_collect_task(name, expected):
         (["input.dta", "script.do"], ["any_out.dta"], pytest.raises(ValueError)),
     ],
 )
-def test_pytask_collect_task_teardown(depends_on, produces, expectation):
+@pytest.mark.parametrize("platform", ["win32", "darwin", "linux"])
+def test_pytask_collect_task_teardown(depends_on, produces, platform, expectation):
     session = DummyClass()
-    session.config = {"stata": "stata", "stata_source_key": "source"}
+    session.config = {
+        "stata": "stata",
+        "stata_source_key": "source",
+        "platform": platform,
+    }
 
     task = DummyClass()
     task.depends_on = {
@@ -126,6 +139,7 @@ def test_pytask_collect_task_teardown(depends_on, produces, expectation):
     task.produces = {i: FilePathNode.from_path(Path(n)) for i, n in enumerate(produces)}
     task.function = task_dummy
     task.name = "task_dummy"
+    task.path = Path()
 
     markers = [Mark("stata", (), {})]
     task.markers = markers
@@ -146,5 +160,5 @@ def test_pytask_collect_task_teardown(depends_on, produces, expectation):
     ],
 )
 def test_get_node_from_dictionary(obj, key, expected):
-    result = _get_node_from_dictionary(obj, key)
+    result = get_node_from_dictionary(obj, key)
     assert result == expected