Skip to content

Commit

Permalink
Merge pull request #2258 from opensafely-core/evansd/dummy-data-timeout
Browse files Browse the repository at this point in the history
Make the dummy data timeout configurable
  • Loading branch information
evansd authored Nov 28, 2024
2 parents 95197de + ccea2e5 commit 1552455
Show file tree
Hide file tree
Showing 9 changed files with 56 additions and 11 deletions.
10 changes: 8 additions & 2 deletions docs/includes/generated_docs/language__dataset.md
Original file line number Diff line number Diff line change
Expand Up @@ -61,18 +61,24 @@ over a dictionary. For more details see the guide on
</div>

<div class="attr-heading" id="Dataset.configure_dummy_data">
<tt><strong>configure_dummy_data</strong>(<em>population_size</em>, <em>legacy=False</em>)</tt>
<tt><strong>configure_dummy_data</strong>(<em>population_size=10</em>, <em>legacy=False</em>, <em>timeout=60</em>)</tt>
<a class="headerlink" href="#Dataset.configure_dummy_data" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Configure the dummy data to be generated.

_population_size_<br>
Number of patients to generate (default 10).
Maximum number of patients to generate.

Note that you may get fewer patients than this if the generator runs out of time
– see `timeout` below.

_legacy_<br>
Use legacy dummy data.

_timeout_<br>
Maximum time in seconds to spend generating dummy data.

```py
dataset.configure_dummy_data(population_size=10000)
```
Expand Down
10 changes: 8 additions & 2 deletions docs/includes/generated_docs/language__measures.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,18 +80,24 @@ this method more than once is an error.
</div>

<div class="attr-heading" id="Measures.configure_dummy_data">
<tt><strong>configure_dummy_data</strong>(<em>population_size</em>, <em>legacy=False</em>)</tt>
<tt><strong>configure_dummy_data</strong>(<em>population_size=10</em>, <em>legacy=False</em>, <em>timeout=60</em>)</tt>
<a class="headerlink" href="#Measures.configure_dummy_data" title="Permanent link">🔗</a>
</div>
<div markdown="block" class="indent">
Configure the dummy data to be generated.

_population_size_<br>
Number of patients to generate (default 10).
Maximum number of patients to generate.

Note that you may get fewer patients than this if the generator runs out of time
– see `timeout` below.

_legacy_<br>
Use legacy dummy data.

_timeout_<br>
Maximum time in seconds to spend generating dummy data.

```py
measures.configure_dummy_data(population_size=10000)
```
Expand Down
1 change: 1 addition & 0 deletions ehrql/dummy_data/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self, measures, dummy_data_config, **kwargs):
self.generator = DummyDataGenerator(
get_dataset_variables(combined),
population_size=get_population_size(dummy_data_config, combined),
timeout=dummy_data_config.timeout,
**kwargs,
)

Expand Down
1 change: 1 addition & 0 deletions ehrql/dummy_data_nextgen/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ def __init__(self, measures, dummy_data_config, **kwargs):
self.generator = DummyDataGenerator(
get_dataset_variables(combined),
population_size=get_population_size(dummy_data_config, combined),
timeout=dummy_data_config.timeout,
**kwargs,
)

Expand Down
2 changes: 2 additions & 0 deletions ehrql/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def generate_dataset_with_dummy_data(
generator = get_dummy_data_class(dummy_data_config)(
variable_definitions,
population_size=dummy_data_config.population_size,
timeout=dummy_data_config.timeout,
)
results = generator.get_results()

Expand All @@ -152,6 +153,7 @@ def create_dummy_tables(definition_file, dummy_tables_path, user_args, environ):
generator = get_dummy_data_class(dummy_data_config)(
variable_definitions,
population_size=dummy_data_config.population_size,
timeout=dummy_data_config.timeout,
)
table_data = generator.get_data()

Expand Down
17 changes: 15 additions & 2 deletions ehrql/measures/measures.py
Original file line number Diff line number Diff line change
Expand Up @@ -286,22 +286,35 @@ def _validate_group_by(self, group_by):
if disallowed:
raise Error(f"disallowed `group_by` column name: {', '.join(disallowed)}")

def configure_dummy_data(self, *, population_size, legacy=False):
def configure_dummy_data(
self,
*,
population_size=DummyDataConfig.population_size,
legacy=DummyDataConfig.legacy,
timeout=DummyDataConfig.timeout,
):
"""
Configure the dummy data to be generated.
_population_size_<br>
Number of patients to generate (default 10).
Maximum number of patients to generate.
Note that you may get fewer patients than this if the generator runs out of time
– see `timeout` below.
_legacy_<br>
Use legacy dummy data.
_timeout_<br>
Maximum time in seconds to spend generating dummy data.
```py
measures.configure_dummy_data(population_size=10000)
```
"""
self.dummy_data_config.population_size = population_size
self.dummy_data_config.legacy = legacy
self.dummy_data_config.timeout = timeout

def configure_disclosure_control(self, *, enabled=True):
"""
Expand Down
18 changes: 16 additions & 2 deletions ehrql/query_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class Error(Exception):
class DummyDataConfig:
population_size: int = 10
legacy: bool = False
timeout: int = 60


class Dataset:
Expand Down Expand Up @@ -103,22 +104,35 @@ def add_column(self, column_name: str, ehrql_query):
"""
setattr(self, column_name, ehrql_query)

def configure_dummy_data(self, *, population_size, legacy=False):
def configure_dummy_data(
self,
*,
population_size=DummyDataConfig.population_size,
legacy=DummyDataConfig.legacy,
timeout=DummyDataConfig.timeout,
):
"""
Configure the dummy data to be generated.
_population_size_<br>
Number of patients to generate (default 10).
Maximum number of patients to generate.
Note that you may get fewer patients than this if the generator runs out of time
– see `timeout` below.
_legacy_<br>
Use legacy dummy data.
_timeout_<br>
Maximum time in seconds to spend generating dummy data.
```py
dataset.configure_dummy_data(population_size=10000)
```
"""
self.dummy_data_config.population_size = population_size
self.dummy_data_config.legacy = legacy
self.dummy_data_config.timeout = timeout

def __setattr__(self, name, value):
if name == "population":
Expand Down
5 changes: 3 additions & 2 deletions tests/unit/measures/test_dummy_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,8 @@ def test_configured_population_size(legacy):
intervals=years(1).starting_on("2020-01-01"),
)

measures.configure_dummy_data(population_size=10, legacy=legacy)
measures.configure_dummy_data(population_size=99, legacy=legacy, timeout=123)

generator = DummyMeasuresDataGenerator(measures, measures.dummy_data_config)
assert generator.generator.population_size == 10
assert generator.generator.population_size == 99
assert generator.generator.timeout == 123
3 changes: 2 additions & 1 deletion tests/unit/test_query_language.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,11 +127,12 @@ def test_dataset_configure_dummy_data(legacy):
dataset = Dataset()
dataset.define_population(year_of_birth <= 2000)
dataset.year_of_birth = year_of_birth
dataset.configure_dummy_data(population_size=234, legacy=legacy)
dataset.configure_dummy_data(population_size=234, legacy=legacy, timeout=123)

assert dataset.year_of_birth is year_of_birth
assert dataset.dummy_data_config.population_size == 234
assert dataset.dummy_data_config.legacy == legacy
assert dataset.dummy_data_config.timeout == 123


def test_dataset_dummy_data_configured_twice():
Expand Down

0 comments on commit 1552455

Please sign in to comment.