From af10c0dd9bb7e0bb8d65139bb0a2a4d327cbf6c8 Mon Sep 17 00:00:00 2001 From: Anna Gavrilman Date: Mon, 1 Jul 2024 23:51:01 +0200 Subject: [PATCH] Add an option to process only last rows (#159) * Add an option to process only last rows * docs * format * changelog * fixes * adding more logging * format * adjusting for index by 1 * format --- src/mitol/google_sheets/README.md | 1 + ...4_153353_annagav_process_only_last_rows.md | 43 +++++++++++++++++++ .../google_sheets/settings/google_sheets.py | 7 +++ src/mitol/google_sheets/sheet_handler_api.py | 35 +++++++++++++-- src/mitol/google_sheets/utils.py | 2 + 5 files changed, 84 insertions(+), 4 deletions(-) create mode 100644 src/mitol/google_sheets/changelog.d/20240624_153353_annagav_process_only_last_rows.md diff --git a/src/mitol/google_sheets/README.md b/src/mitol/google_sheets/README.md index 95024cf0..398111f1 100644 --- a/src/mitol/google_sheets/README.md +++ b/src/mitol/google_sheets/README.md @@ -39,6 +39,7 @@ MITOL_GOOGLE_SHEETS_DRIVE_CLIENT_SECRET= MITOL_GOOGLE_SHEETS_DRIVE_API_PROJECT_ID= MITOL_GOOGLE_SHEETS_PROCESSOR_APP_NAME= MITOL_GOOGLE_SHEETS_ENROLLMENT_CHANGE_SHEET_ID= +MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM= ``` diff --git a/src/mitol/google_sheets/changelog.d/20240624_153353_annagav_process_only_last_rows.md b/src/mitol/google_sheets/changelog.d/20240624_153353_annagav_process_only_last_rows.md new file mode 100644 index 00000000..52d8f5fe --- /dev/null +++ b/src/mitol/google_sheets/changelog.d/20240624_153353_annagav_process_only_last_rows.md @@ -0,0 +1,43 @@ + + + + + + + + diff --git a/src/mitol/google_sheets/settings/google_sheets.py b/src/mitol/google_sheets/settings/google_sheets.py index 1bc981a4..cde53e62 100644 --- a/src/mitol/google_sheets/settings/google_sheets.py +++ b/src/mitol/google_sheets/settings/google_sheets.py @@ -47,6 +47,13 @@ "ID of the Google Sheet that contains the enrollment change request worksheets (refunds, transfers, etc)" ), ) +MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM = get_string( + name="MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM", + default=30, + description=( + "Process only the last N rows of data. If set to 0 then process all rows. " + ), +) MITOL_GOOGLE_SHEETS_DATE_FORMAT = get_string( name="MITOL_GOOGLE_SHEETS_DATE_FORMAT", default="%m/%d/%Y", diff --git a/src/mitol/google_sheets/sheet_handler_api.py b/src/mitol/google_sheets/sheet_handler_api.py index c5782f8e..5007d39a 100644 --- a/src/mitol/google_sheets/sheet_handler_api.py +++ b/src/mitol/google_sheets/sheet_handler_api.py @@ -100,7 +100,7 @@ def worksheet(self): def get_enumerated_rows(self): """ - Yields enumerated data rows of a spreadsheet (excluding header row(s)) + Yields enumerated data rows of a spreadsheet (excluding header row(s)). Yields: Tuple[int, List[str]]: Row index (according to the Google Sheet, NOT zero-indexed) paired with the list @@ -146,9 +146,21 @@ def update_sheet_from_results(self, grouped_row_results): processed_row_results = grouped_row_results.get(ResultType.PROCESSED, []) if processed_row_results: self.update_completed_rows(processed_row_results) + log.warning( + "Successfully processed rows in %s (%s): %s", + self.sheet_metadata.sheet_name, + self.sheet_metadata.worksheet_name, + [row_result.row_index for row_result in processed_row_results], + ) failed_row_results = grouped_row_results.get(ResultType.FAILED, []) if failed_row_results: self.update_row_errors(failed_row_results) + log.warning( + "Processed rows with errors in %s (%s): %s", + self.sheet_metadata.sheet_name, + self.sheet_metadata.worksheet_name, + [row_result.row_index for row_result in failed_row_results], + ) out_of_sync_row_results = grouped_row_results.get(ResultType.OUT_OF_SYNC, []) if out_of_sync_row_results: log.warning( @@ -259,7 +271,7 @@ def process_sheet(self, limit_row_index=None): try: row_result = self.process_row(row_index, row_data) except Exception as exc: - log.exception("Error processing row from google sheets") + log.exception("Error processing row %s from google sheets", row_index) row_result = RowResult( row_index=row_index, row_db_record=None, @@ -314,14 +326,29 @@ def worksheet(self): def get_enumerated_rows(self): # Only yield rows in the spreadsheet that come after the legacy rows # (i.e.: the rows of data that were manually entered before we started automating this process) + row_count = len(self.worksheet.get_all_values(include_tailing_empty_rows=False)) + first_row_to_process = self.start_row + if int(settings.MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM) > 0: + # allow to choose to process only last few rows + new_first_row = ( + row_count + - int(settings.MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM) + + 1 + ) + first_row_to_process = ( + new_first_row if new_first_row > self.start_row else self.start_row + ) + logging.warning( + "Going to process the sheet starting with row %s", first_row_to_process + ) return enumerate( get_data_rows_after_start( self.worksheet, - start_row=self.start_row, + start_row=first_row_to_process, start_col=1, end_col=self.sheet_metadata.num_columns, ), - start=self.start_row, + start=first_row_to_process, ) def update_completed_rows(self, success_row_results): diff --git a/src/mitol/google_sheets/utils.py b/src/mitol/google_sheets/utils.py index 4040e098..cbc0266b 100644 --- a/src/mitol/google_sheets/utils.py +++ b/src/mitol/google_sheets/utils.py @@ -184,6 +184,8 @@ def get_data_rows_after_start( """ Yields the data rows of a spreadsheet starting with a given row and spanning a given column range until empty rows are encountered. + If MITOL_GOOGLE_SHEETS_PROCESS_ONLY_LAST_ROWS_NUM == 0 it will process all rows. Otherwise, it will process + only the last few rows. Args: worksheet (pygsheets.worksheet.Worksheet): Worksheet object