Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

JP-3768: Fix intermediate issues with non-resampled outlier methods #8853

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions changes/8853.outlier_detection.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Avoid modifying input and saving duplicate files when resample_data=False.
87 changes: 42 additions & 45 deletions jwst/outlier_detection/tests/test_outlier_detection.py
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,7 @@ def we_three_sci():
def test_outlier_step_no_outliers(we_three_sci, do_resample, tmp_cwd):
"""Test whole step, no outliers"""
container = ModelContainer(list(we_three_sci))
container[0].var_rnoise[10, 10] = 1E9
pristine = ModelContainer([m.copy() for m in container])
OutlierDetectionStep.call(container, in_memory=True, resample_data=do_resample)

Expand Down Expand Up @@ -261,7 +262,9 @@ def test_outlier_step_base(we_three_sci, tmp_cwd):
assert len(median_files) != 0


def test_outlier_step_spec(tmp_cwd, tmp_path):
@pytest.mark.parametrize('resample', [True, False])
@pytest.mark.parametrize('save_intermediate', [True, False])
def test_outlier_step_spec(tmp_cwd, tmp_path, resample, save_intermediate):
"""Test outlier step for spec data including saving intermediate results."""
output_dir = tmp_path / 'output'
output_dir.mkdir(exist_ok=True)
Expand All @@ -275,50 +278,33 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
miri_cal.meta.exposure.type = "MIR_LRS-FIXEDSLIT"

# Make a couple copies, give them unique exposure numbers and filename
container = ModelContainer([miri_cal, miri_cal.copy(), miri_cal.copy()])
container = ModelContainer([miri_cal.copy(), miri_cal.copy(), miri_cal.copy()])
for i, model in enumerate(container):
model.meta.filename = f'test_{i}_cal.fits'

# Drop a CR on the science array in the first image
container[0].data[209, 37] += 1

# Verify that intermediate files are removed when not saved
# (s2d files are expected, i2d files are not, but we'll check
# for them to make sure the imaging extension didn't creep back in)
OutlierDetectionStep.call(container, output_dir=output_dir, save_results=True)
for dirname in [output_dir, tmp_cwd]:
result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
i2d_files = glob(os.path.join(dirname, '*i2d*.fits'))
s2d_files = glob(os.path.join(dirname, '*outlier_s2d.fits'))
median_files = glob(os.path.join(dirname, '*median.fits'))
blot_files = glob(os.path.join(dirname, '*blot.fits'))

# intermediate files are removed
assert len(i2d_files) == 0
assert len(s2d_files) == 0
assert len(median_files) == 0
assert len(blot_files) == 0

# result files are written to the output directory
if dirname == output_dir:
assert len(result_files) == len(container)
else:
assert len(result_files) == 0

# Call again, but save intermediate to the output path
# Call outlier detection
result = OutlierDetectionStep.call(
container, save_results=True, save_intermediate_results=True,
output_dir=output_dir
)
container, resample_data=resample,
output_dir=output_dir, save_results=True,
save_intermediate_results=save_intermediate)

# Make sure nothing changed in SCI array
for image, corrected in zip(container, result):
np.testing.assert_allclose(image.data, corrected.data)
for image in result:
nn = ~np.isnan(image.data)
np.testing.assert_allclose(image.data[nn], miri_cal.data[nn])

# Verify CR is flagged
assert np.isnan(result[0].data[209, 37])
assert result[0].dq[209, 37] == OUTLIER_DO_NOT_USE

# Verify that intermediate files are saved at the specified location
if save_intermediate:
expected_intermediate = len(container)
else:
expected_intermediate = 0
for dirname in [output_dir, tmp_cwd]:
all_files = glob(os.path.join(dirname, '*.fits'))
result_files = glob(os.path.join(dirname, '*outlierdetectionstep.fits'))
Expand All @@ -327,24 +313,35 @@ def test_outlier_step_spec(tmp_cwd, tmp_path):
median_files = glob(os.path.join(dirname, '*median.fits'))
blot_files = glob(os.path.join(dirname, '*blot.fits'))
if dirname == output_dir:
# result files are written to the output directory
# Result files are always written to the output directory
assert len(result_files) == len(container)

# s2d, median, and blot files are written to the output directory
assert len(s2d_files) == len(container)
assert len(blot_files) == len(container)
assert len(median_files) == 1

# i2d files not written
# s2d and blot files are written to the output directory
# if save_intermediate is True and resampling is set
if resample:
assert len(s2d_files) == expected_intermediate
assert len(blot_files) == expected_intermediate
else:
assert len(s2d_files) == 0
assert len(blot_files) == 0

# Only one median file is saved if save_intermediate is True,
# no matter how many input files there are
if save_intermediate:
assert len(median_files) == 1
else:
assert len(median_files) == 0

# i2d files are never written
assert len(i2d_files) == 0

# nothing else was written
assert len(all_files) == len(s2d_files) + \
len(median_files) + \
len(result_files) + \
len(blot_files)
# Nothing else was written
assert len(all_files) == (len(s2d_files)
+ len(median_files)
+ len(result_files)
+ len(blot_files))
else:
# nothing should be written to the current directory
# Nothing should be written to the current directory
assert len(result_files) == 0
assert len(s2d_files) == 0
assert len(median_files) == 0
Expand Down Expand Up @@ -674,4 +671,4 @@ def make_resamp(input_models):
asn_id="test",
allowed_memory=None,
)
return resamp
return resamp
29 changes: 13 additions & 16 deletions jwst/outlier_detection/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,24 +79,21 @@ def median_without_resampling(input_models,
for i in range(len(input_models)):

drizzled_model = input_models.borrow(i)
drizzled_model.wht = build_driz_weight(drizzled_model,
weight_type=weight_type,
good_bits=good_bits)
median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
input_models.shelve(drizzled_model, i, modify=True)

if save_intermediate_results:
# write the drizzled model to file
_fileio.save_drizzled(drizzled_model, make_output_path)

drizzled_data = drizzled_model.data.copy()
weight = build_driz_weight(drizzled_model,
weight_type=weight_type,
good_bits=good_bits)
if i == 0:
input_shape = (ngroups,)+drizzled_model.data.shape
dtype = drizzled_model.data.dtype
median_wcs = copy.deepcopy(drizzled_model.meta.wcs)
input_shape = (ngroups,) + drizzled_data.shape
dtype = drizzled_data.dtype
computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)

weight_threshold = compute_weight_threshold(drizzled_model.wht, maskpt)
drizzled_model.data[drizzled_model.wht < weight_threshold] = np.nan
computer.append(drizzled_model.data, i)
weight_threshold = compute_weight_threshold(weight, maskpt)
drizzled_data[weight < weight_threshold] = np.nan
computer.append(drizzled_data, i)

input_models.shelve(drizzled_model, i, modify=False)

# Perform median combination on set of drizzled mosaics
median_data = computer.evaluate()
Expand Down Expand Up @@ -154,14 +151,14 @@ def median_with_resampling(input_models,
with input_models:
for i, indices in enumerate(indices_by_group):

median_wcs = resamp.output_wcs
drizzled_model = resamp.resample_group(input_models, indices)

if save_intermediate_results:
# write the drizzled model to file
_fileio.save_drizzled(drizzled_model, make_output_path)

if i == 0:
median_wcs = resamp.output_wcs
input_shape = (ngroups,)+drizzled_model.data.shape
dtype = drizzled_model.data.dtype
computer = MedianComputer(input_shape, in_memory, buffer_size, dtype)
Expand Down
Loading