Skip to content

Commit

Permalink
Refactored billable invoice
Browse files Browse the repository at this point in the history
The billable invoice now subclasses the Invoice base class.
The computations involved in preparation, namely filtering out
nonbillable projects and PIs, and validating PI names, have been
moved to `util.py`.

The function for applying the New-PI credit (`apply_credits_new_pi`)
is also moved there, and the I/O needed to read and write out
the PI file has been moved out of this functions for ease of testing.
  • Loading branch information
QuanMPhm committed Jun 14, 2024
1 parent 537ab88 commit 342fb30
Show file tree
Hide file tree
Showing 5 changed files with 252 additions and 207 deletions.
22 changes: 22 additions & 0 deletions process_report/invoices/billable_invoice.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from dataclasses import dataclass

import process_report.invoices.invoice as invoice
import process_report.util as util


@dataclass
class BillableInvoice(invoice.Invoice):
nonbillable_pis: list[str]
nonbillable_projects: list[str]
old_pi_filepath: str

def _prepare(self):
self.data = util.remove_nonbillables(
self.data, self.nonbillable_pis, self.nonbillable_projects
)
self.data = util.validate_pi_names(self.data)

def _process(self):
old_pi_df = util.load_old_pis(self.old_pi_filepath)
self.data, updated_old_pi_df = util.apply_credits_new_pi(self.data, old_pi_df)
util.dump_old_pis(self.old_pi_filepath, updated_old_pi_df)
8 changes: 8 additions & 0 deletions process_report/invoices/invoice.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@
import process_report.util as util


### PI file field names
PI_PI_FIELD = "PI"
PI_FIRST_MONTH = "First Invoice Month"
PI_INITIAL_CREDITS = "Initial Credits"
PI_1ST_USED = "1st Month Used"
PI_2ND_USED = "2nd Month Used"
###

### Invoice field names
INVOICE_DATE_FIELD = "Invoice Month"
PROJECT_FIELD = "Project - Allocation"
Expand Down
185 changes: 23 additions & 162 deletions process_report/process_report.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,11 @@
import boto3
import pyarrow

from process_report.invoices import lenovo_invoice, nonbillable_invoice
from process_report.invoices import (
lenovo_invoice,
nonbillable_invoice,
billable_invoice,
)


### PI file field names
Expand Down Expand Up @@ -62,33 +66,6 @@ def load_institute_map() -> dict:
return institute_map


def load_old_pis(old_pi_file) -> pandas.DataFrame:
try:
old_pi_df = pandas.read_csv(
old_pi_file,
dtype={
PI_INITIAL_CREDITS: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
except FileNotFoundError:
sys.exit("Applying credit 0002 failed. Old PI file does not exist")

return old_pi_df


def dump_old_pis(old_pi_file, old_pi_df: pandas.DataFrame):
old_pi_df = old_pi_df.astype(
{
PI_INITIAL_CREDITS: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_1ST_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
PI_2ND_USED: pandas.ArrowDtype(pyarrow.decimal128(21, 2)),
},
)
old_pi_df.to_csv(old_pi_file, index=False)


def load_alias(alias_file):
alias_dict = dict()

Expand All @@ -104,31 +81,6 @@ def load_alias(alias_file):
return alias_dict


def get_pi_age(old_pi_df: pandas.DataFrame, pi, invoice_month):
"""Returns time difference between current invoice month and PI's first invoice month
I.e 0 for new PIs
Will raise an error if the PI'a age is negative, which suggests a faulty invoice, or a program bug"""
first_invoice_month = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi, PI_FIRST_MONTH]
if first_invoice_month.empty:
return 0

month_diff = get_month_diff(invoice_month, first_invoice_month.iat[0])
if month_diff < 0:
sys.exit(
f"PI {pi} from {first_invoice_month} found in {invoice_month} invoice!"
)
else:
return month_diff


def get_month_diff(month_1, month_2):
"""Returns a positive integer if month_1 is ahead in time of month_2"""
dt1 = datetime.datetime.strptime(month_1, "%Y-%m")
dt2 = datetime.datetime.strptime(month_2, "%Y-%m")
return (dt1.year - dt2.year) * 12 + (dt1.month - dt2.month)


def get_invoice_bucket():
try:
s3_resource = boto3.resource(
Expand Down Expand Up @@ -297,22 +249,29 @@ def main():
bucket = get_invoice_bucket()
invoice.export_s3(bucket)

billable_projects = remove_non_billables(merged_dataframe, pi, projects)
billable_projects = validate_pi_names(billable_projects)

if args.upload_to_s3:
backup_to_s3_old_pi_file(old_pi_file)
credited_projects = apply_credits_new_pi(billable_projects, old_pi_file)

export_billables(credited_projects, args.output_file)
export_pi_billables(credited_projects, args.output_folder, invoice_month)
export_BU_only(billable_projects, args.BU_invoice_file, args.BU_subsidy_amount)
export_HU_BU(credited_projects, args.HU_BU_invoice_file)
billable_inv = billable_invoice.BillableInvoice(
name=args.nonbillable_file,
invoice_month=invoice_month,
data=merged_dataframe.copy(),
nonbillable_pis=pi,
nonbillable_projects=projects,
old_pi_filepath=old_pi_file,
)
billable_inv.process()
billable_inv.export()
if args.upload_to_s3:
bucket = get_invoice_bucket()
billable_inv.export_s3(bucket)

export_pi_billables(billable_inv.data, args.output_folder, invoice_month)
export_BU_only(billable_inv.data, args.BU_invoice_file, args.BU_subsidy_amount)
export_HU_BU(billable_inv.data, args.HU_BU_invoice_file)

if args.upload_to_s3:
invoice_list = [
args.output_file,
]
invoice_list = list()

for pi_invoice in os.listdir(args.output_folder):
invoice_list.append(os.path.join(args.output_folder, pi_invoice))
Expand Down Expand Up @@ -377,23 +336,6 @@ def timed_projects(timed_projects_file, invoice_date):
return dataframe[mask]["Project"].to_list()


def remove_non_billables(dataframe, pi, projects):
"""Removes projects and PIs that should not be billed from the dataframe"""
filtered_dataframe = dataframe[
~dataframe[PI_FIELD].isin(pi) & ~dataframe[PROJECT_FIELD].isin(projects)
]
return filtered_dataframe


def validate_pi_names(dataframe):
invalid_pi_projects = dataframe[pandas.isna(dataframe[PI_FIELD])]
for i, row in invalid_pi_projects.iterrows():
print(f"Warning: Billable project {row[PROJECT_FIELD]} has empty PI field")
dataframe = dataframe[~pandas.isna(dataframe[PI_FIELD])]

return dataframe


def validate_pi_aliases(dataframe: pandas.DataFrame, alias_dict: dict):
for pi, pi_aliases in alias_dict.items():
dataframe.loc[dataframe[PI_FIELD].isin(pi_aliases), PI_FIELD] = pi
Expand All @@ -408,87 +350,6 @@ def fetch_s3_alias_file():
return local_name


def apply_credits_new_pi(dataframe, old_pi_file):
new_pi_credit_code = "0002"
INITIAL_CREDIT_AMOUNT = 1000
EXCLUDE_SU_TYPES = ["OpenShift GPUA100SXM4", "OpenStack GPUA100SXM4"]

dataframe[CREDIT_FIELD] = None
dataframe[CREDIT_CODE_FIELD] = None
dataframe[BALANCE_FIELD] = Decimal(0)

old_pi_df = load_old_pis(old_pi_file)

current_pi_set = set(dataframe[PI_FIELD])
invoice_month = dataframe[INVOICE_DATE_FIELD].iat[0]
invoice_pis = old_pi_df[old_pi_df[PI_FIRST_MONTH] == invoice_month]
if invoice_pis[PI_INITIAL_CREDITS].empty or pandas.isna(
new_pi_credit_amount := invoice_pis[PI_INITIAL_CREDITS].iat[0]
):
new_pi_credit_amount = INITIAL_CREDIT_AMOUNT

print(f"New PI Credit set at {new_pi_credit_amount} for {invoice_month}")

for pi in current_pi_set:
pi_projects = dataframe[dataframe[PI_FIELD] == pi]
pi_age = get_pi_age(old_pi_df, pi, invoice_month)
pi_old_pi_entry = old_pi_df.loc[old_pi_df[PI_PI_FIELD] == pi].squeeze()

if pi_age > 1:
for i, row in pi_projects.iterrows():
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD]
else:
if pi_age == 0:
if len(pi_old_pi_entry) == 0:
pi_entry = [pi, invoice_month, new_pi_credit_amount, 0, 0]
old_pi_df = pandas.concat(
[
pandas.DataFrame([pi_entry], columns=old_pi_df.columns),
old_pi_df,
],
ignore_index=True,
)
pi_old_pi_entry = old_pi_df.loc[
old_pi_df[PI_PI_FIELD] == pi
].squeeze()

remaining_credit = new_pi_credit_amount
credit_used_field = PI_1ST_USED
elif pi_age == 1:
remaining_credit = (
pi_old_pi_entry[PI_INITIAL_CREDITS] - pi_old_pi_entry[PI_1ST_USED]
)
credit_used_field = PI_2ND_USED

initial_credit = remaining_credit
for i, row in pi_projects.iterrows():
if remaining_credit == 0 or row[SU_TYPE_FIELD] in EXCLUDE_SU_TYPES:
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD]
else:
project_cost = row[COST_FIELD]
applied_credit = min(project_cost, remaining_credit)

dataframe.at[i, CREDIT_FIELD] = applied_credit
dataframe.at[i, CREDIT_CODE_FIELD] = new_pi_credit_code
dataframe.at[i, BALANCE_FIELD] = row[COST_FIELD] - applied_credit
remaining_credit -= applied_credit

credits_used = initial_credit - remaining_credit
if (pi_old_pi_entry[credit_used_field] != 0) and (
credits_used != pi_old_pi_entry[credit_used_field]
):
print(
f"Warning: PI file overwritten. PI {pi} previously used ${pi_old_pi_entry[credit_used_field]} of New PI credits, now uses ${credits_used}"
)
old_pi_df.loc[
old_pi_df[PI_PI_FIELD] == pi, credit_used_field
] = credits_used

dump_old_pis(old_pi_file, old_pi_df)

return dataframe


def fetch_s3_old_pi_file():
local_name = "PI.csv"
invoice_bucket = get_invoice_bucket()
Expand Down
Loading

0 comments on commit 342fb30

Please sign in to comment.