Skip to content

Commit

Permalink
Merge pull request #27 from UCSD-E4E/21-staging-files-does-not-work-w…
Browse files Browse the repository at this point in the history
…ith-absolute-path

21 staging files does not work with absolute path
  • Loading branch information
ntlhui authored Mar 26, 2023
2 parents 412d223 + 3557b7b commit d5ee9f3
Show file tree
Hide file tree
Showing 6 changed files with 154 additions and 55 deletions.
2 changes: 1 addition & 1 deletion e4e_data_management/__init__.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
'''E4E Data Management Tools
'''
__version__ = '0.0.0.11'
__version__ = '0.0.0.12'
18 changes: 13 additions & 5 deletions e4e_data_management/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,15 +146,22 @@ def list_datasets_cmd(self) -> None:
print(dataset)

def add_files_cmd(self,
paths: List[str], readme: bool,
paths: List[str],
readme: bool,
start: Optional[dt.datetime] = None,
end: Optional[dt.datetime] = None):
end: Optional[dt.datetime] = None,
destination: Optional[Path] = None):
"""Add files parsing
Args:
app (DataManager): App
paths (List[str]): Paths
paths (List[str]): Paths to add
readme (bool): Readme flag
start (Optional[dt.datetime], optional): Earliest timestamp to stage. Defaults to None.
end (Optional[dt.datetime], optional): Latest timestamp to stage. Defaults to None.
destination (Optional[Path], optional): Destination directory within the dataset.
Defaults to None.
"""
# pylint: disable=too-many-arguments
resolved_paths: List[Path] = []
for path in paths:
resolved_paths.extend(Path(file) for file in glob(path))
Expand All @@ -166,7 +173,7 @@ def add_files_cmd(self,
resolved_paths = [path
for path in resolved_paths
if dt.datetime.fromtimestamp(path.stat().st_mtime) <= end]
self.app.add(paths=resolved_paths, readme=readme)
self.app.add(paths=resolved_paths, readme=readme, destination=destination)

def status_cmd(self):
"""Handles status cmd
Expand Down Expand Up @@ -245,6 +252,7 @@ def __configure_add_parser(self, parser: argparse.ArgumentParser):
parser.add_argument('--readme', action='store_true')
parser.add_argument('--start', default=None, type=dt.datetime.fromisoformat)
parser.add_argument('--end', default=None, type=dt.datetime.fromisoformat)
parser.add_argument('--destination', default=None, type=Path)
parser.set_defaults(func=self.add_files_cmd)

def __configure_list_parser(self, parser: argparse.ArgumentParser):
Expand Down
20 changes: 16 additions & 4 deletions e4e_data_management/core.py
Original file line number Diff line number Diff line change
Expand Up @@ -145,8 +145,11 @@ def status(self) -> str:
output += '\n'
if len(self.active_mission.staged_files) > 0:
output += f'{len(self.active_mission.staged_files)} staged files:\n\t'
output += '\n\t'.join(file.relative_to(Path('.')).as_posix()
for file in sorted(self.active_mission.staged_files))
staged_files = ((f"{file.origin_path.as_posix()} -> "
f"{file.target_path.relative_to(self.active_mission.path).as_posix()}")
for file in self.active_mission.staged_files)

output += '\n\t'.join(staged_files)
return output

def activate(self,
Expand Down Expand Up @@ -187,11 +190,20 @@ def activate(self,
else:
self.active_mission = None

def add(self, paths: Iterable[Path], readme: bool = False) -> None:
def add(self, paths: Iterable[Path],
readme: bool = False,
destination: Optional[Path] = None) -> None:
"""This adds a file or directory to the staging area.
Args:
paths (Iterable[Path]): List of paths to add
readme (bool, optional): Readme flag. Defaults to False.
destination (Optional[Path], optional): Directory in the dataset to add paths to.
Defaults to None.
Raises:
RuntimeError: Dataset not active
RuntimeError: Mission not active
"""
if self.active_dataset is None:
raise RuntimeError('Dataset not active')
Expand All @@ -202,7 +214,7 @@ def add(self, paths: Iterable[Path], readme: bool = False) -> None:
return
if self.active_mission is None:
raise RuntimeError('Mission not active')
self.active_mission.stage(paths)
self.active_mission.stage(paths, destination=destination)
self.save()

def commit(self, readme: bool = False) -> None:
Expand Down
110 changes: 71 additions & 39 deletions e4e_data_management/data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import datetime as dt
import json
import pickle
from dataclasses import dataclass
from hashlib import sha256
from pathlib import Path
from shutil import copy2
Expand All @@ -14,6 +15,18 @@
from e4e_data_management.metadata import Metadata


@dataclass
class StagedFile:
"""Staged File data type
"""
origin_path: Path
target_path: Path
hash: str

def __hash__(self) -> int:
return hash((self.origin_path, self.target_path, self.hash))

class Manifest:
"""Manifest of files
"""
Expand Down Expand Up @@ -49,7 +62,7 @@ def validate(self,
if file_key not in manifest:
return False
if method == 'hash':
computed_hash = self.__hash(file)
computed_hash = self.compute_file_hash(file)
if computed_hash != manifest[file_key]['sha256sum']:
return False
elif method == 'size':
Expand Down Expand Up @@ -128,7 +141,7 @@ def compute_hashes(self,
Dict[str, Dict[str, Union[str, int]]]: Hash results
"""
if not hash_fn:
hash_fn = self.__hash
hash_fn = self.compute_file_hash
data: Dict[str, Dict[str, Union[str, int]]] = {}
for file in files:
rel_path = file.relative_to(root).as_posix()
Expand All @@ -142,7 +155,15 @@ def compute_hashes(self,
return data

@classmethod
def __hash(cls, file: Path):
def compute_file_hash(cls, file: Path) -> str:
"""Computes a file hash
Args:
file (Path): Path to file
Returns:
str: Hash digest
"""
cksum = sha256()
with open(file, 'rb') as handle:
for byte_block in iter(lambda: handle.read(4096), b''):
Expand All @@ -158,7 +179,7 @@ def __init__(self, path: Path, mission_metadata: Metadata) -> None:
self.path = path
self.metadata = mission_metadata
self.committed_files: List[Path] = []
self.staged_files: List[Path] = []
self.staged_files: Set[StagedFile] = set()
self.manifest = Manifest(self.path.joinpath(self.__MANIFEST_NAME))

def create(self) -> None:
Expand Down Expand Up @@ -197,13 +218,46 @@ def load(cls, path: Path) -> Mission:
metadata = Metadata.load(path)
return Mission(path=path, mission_metadata=metadata)

def stage(self, paths: Iterable[Path]):
"""Add paths to the staging area
def stage(self, paths: Iterable[Path], destination: Optional[Path] = None):
"""Add paths to the staging area.
This function will iterate and recursively seek all normal files in the specification. This
is stored as a mapping from the original path to the destination path, as well as the
expected hash for the final file.
Args:
paths (Iterable[Path]): Paths to stage
paths (Iterable[Path]): Collection of paths to stage
destination (Optional[Path], optional): Destination directory in mission to place
assets. Defaults to None.
Raises:
RuntimeWarning: Unsupported file type
"""
self.staged_files.extend(paths)
if not destination:
destination = Path('.')
dst = self.path.joinpath(destination)
for path in paths:
if path.is_file():
self.staged_files.add(
StagedFile(
origin_path=path.resolve(),
target_path=dst.joinpath(path.name).resolve(),
hash=Manifest.compute_file_hash(path.absolute())
)
)
elif path.is_dir():
for file in path.rglob('*'):
if file.is_dir():
continue
self.staged_files.add(
StagedFile(
origin_path=file.resolve(),
target_path=dst.joinpath(file.relative_to(path)).resolve(),
hash=Manifest.compute_file_hash(file.resolve())
)
)
else:
raise RuntimeWarning('Not a normal file')

@property
def name(self) -> str:
Expand All @@ -220,38 +274,16 @@ def commit(self) -> List[Path]:
Raises:
RuntimeError: Copy fail
"""
# Discover files
committed_files: List[Path] = []
for path in self.staged_files:
added_files: List[Path] = []
if path.is_file():
# this goes into the root
added_files.append(path)
root = path.parent
elif path.is_dir():
# This should get recursively copied in
for file in path.rglob('*'):
if file.is_dir():
continue
added_files.append(file)
root = path
original_manifest = self.manifest.compute_hashes(
root=root,
files=added_files
)
new_files: List[Path] = []
for file in added_files:
src = file
dest = self.path.joinpath(file.relative_to(root)).absolute()
dest.parent.mkdir(parents=True, exist_ok=True)
copy2(src=src, dst=dest)
new_files.append(dest)
if not self.manifest.validate(manifest=original_manifest, files=new_files):
raise RuntimeError(f'Failed to copy {path.as_posix()}')
self.manifest.update(new_files)
self.committed_files.extend(new_files)
committed_files.extend(new_files)
self.staged_files = []
for staged_file in self.staged_files:
staged_file.target_path.parent.mkdir(parents=True, exist_ok=True)
copy2(src=staged_file.origin_path, dst=staged_file.target_path)
if Manifest.compute_file_hash(staged_file.target_path) != staged_file.hash:
raise RuntimeError(f'Failed to copy {staged_file.origin_path.as_posix()}')
committed_files.append(staged_file.target_path)
self.manifest.update(committed_files)
self.committed_files.extend([file.relative_to(self.path) for file in committed_files])
self.staged_files = set()
return committed_files

class Dataset:
Expand Down
15 changes: 9 additions & 6 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def test_add_files(single_mission: Tuple[Mock, DataManager, Path],
args = split(f'e4edm add {bin_files[0].as_posix()} {bin_files[1].as_posix()}')
with patch('sys.argv', args):
main()
mock.add.assert_called_once_with(paths=bin_files, readme=False)
mock.add.assert_called_once_with(paths=bin_files, readme=False, destination=None)

def test_add_files_start(single_mission: Tuple[Mock, DataManager, Path],
test_data: Tuple[Path, int, int]):
Expand All @@ -122,7 +122,7 @@ def test_add_files_start(single_mission: Tuple[Mock, DataManager, Path],
f'--start {start_time.isoformat()}')
with patch('sys.argv', args):
main()
mock.add.assert_called_once_with(paths=[], readme=False)
mock.add.assert_called_once_with(paths=[], readme=False, destination=None)

def test_add_files_end(single_mission: Tuple[Mock, DataManager, Path],
test_data: Tuple[Path, int, int]):
Expand All @@ -145,7 +145,8 @@ def test_add_files_end(single_mission: Tuple[Mock, DataManager, Path],
f'--end {start_time.isoformat()}')
with patch('sys.argv', args):
main()
mock.add.assert_called_once_with(paths=bin_files, readme=False)
mock.add.assert_called_once_with(paths=bin_files, readme=False, destination=None)

def test_add_glob(single_mission: Tuple[Mock, DataManager, Path],
test_data: Tuple[Path, int, int]):
"""Tests adding files
Expand All @@ -160,7 +161,9 @@ def test_add_glob(single_mission: Tuple[Mock, DataManager, Path],
args = split(f'e4edm add {data_dir.as_posix()}/*.bin')
with patch('sys.argv', args):
main()
mock.add.assert_called_once_with(paths=list(data_dir.glob('*.bin')), readme=False)
mock.add.assert_called_once_with(paths=list(data_dir.glob('*.bin')),
readme=False,
destination=None)

def test_add_multifile(single_mission: Tuple[Mock, DataManager, Path]):
"""Tests adding multiple files at the same time
Expand All @@ -179,7 +182,7 @@ def test_add_multifile(single_mission: Tuple[Mock, DataManager, Path]):
args = split(f'e4edm add {file1.as_posix()} {file2.as_posix()}')
with patch('sys.argv', args):
main()
mock.add.assert_called_once_with(paths=[file1, file2], readme=False)
mock.add.assert_called_once_with(paths=[file1, file2], readme=False, destination=None)

def test_commit_files(single_mission: Tuple[Mock, DataManager, Path],
test_data: Tuple[Path, int, int]):
Expand Down Expand Up @@ -230,7 +233,7 @@ def test_add_readme(single_mission: Tuple[Mock, DataManager, Path], test_readme:
args = split(f'e4edm add --readme {test_readme.as_posix()}')
with patch('sys.argv', args):
main()
mock.add.assert_called_once_with(paths=[test_readme], readme=True)
mock.add.assert_called_once_with(paths=[test_readme], readme=True, destination=None)

def test_commit_readme(single_mission: Tuple[Mock, DataManager, Path], test_readme: Path):
"""Tests pushing readmes
Expand Down
44 changes: 44 additions & 0 deletions tests/test_staging.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
'''Data staging tests
'''
import datetime as dt
import os
from pathlib import Path
from typing import Tuple
from unittest.mock import Mock
Expand Down Expand Up @@ -109,3 +110,46 @@ def test_stage_commit_readme(test_app: Tuple[Mock, DataManager, Path],
assert current_files == sorted(expected_files)

assert app.validate()

def test_relative_path(test_app: Tuple[Mock, DataManager, Path], test_data: Tuple[Path, int, int]):
"""Tests that adding a relative path retains the origin of the relative path and doesn't throw
an exception
Args:
test_app (Tuple[Mock, DataManager, Path]): Test application
test_data (Tuple[Path, int, int]): Test data
"""
_, app, root_dir = test_app
data_dir, _, _ = test_data

app.initialize_dataset(
date=dt.date.fromisoformat('2023-03-25'),
project='Test Relative Path',
location='San Diego',
directory=root_dir
)
app.initialize_mission(
metadata=Metadata(
timestamp=dt.datetime.fromisoformat('2023-03-25T15:10-07:00'),
device='DUT',
country='USA',
region='Southern California',
site='e4edm',
mission='test_relative_path'
)
)

original_working_dir = Path.cwd()

os.chdir(data_dir)

app.add([Path('0000.bin')])

os.chdir(original_working_dir)

app.commit()

assert root_dir.joinpath('2023.03.Test Relative Path.San Diego',
'ED-00',
'test_relative_path',
'0000.bin').exists()

0 comments on commit d5ee9f3

Please sign in to comment.