From 0f89722b4235f409603eb87d538cf7919abf6d8b Mon Sep 17 00:00:00 2001
From: drisspg <drisspguessous@gmail.com>
Date: Sun, 11 Feb 2024 12:03:42 -0800
Subject: [PATCH 1/4] alll the flake8s

---
 .github/workflows/python-app.yml        |  4 ++--
 benchmarks/fp8_sat_cast.py              |  7 +++----
 benchmarks/llama.py                     |  6 +-----
 benchmarks/qlora.py                     | 16 ++++++++------
 pyproject.toml                          | 19 ++++++++++++-----
 test/test_qlora.py                      |  8 +++----
 test/test_utils.py                      | 20 ++++++++----------
 transformer_nuggets/flash/__init__.py   |  2 +-
 transformer_nuggets/fp8/scaled_quant.py |  2 +-
 transformer_nuggets/llama/finetune.py   | 28 +++++++++++--------------
 transformer_nuggets/llama/model.py      |  4 ++--
 transformer_nuggets/quant/qlora.py      |  4 ++--
 transformer_nuggets/utils/benchmark.py  |  1 -
 transformer_nuggets/utils/tracing.py    |  2 +-
 14 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index f3d4fca..934f599 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -26,8 +26,8 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install flake8 pytest
-        if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
+        pip install -e .
+        pip install -e .'[dev]'
     - name: Lint with flake8
       run: |
         # stop the build if there are Python syntax errors or undefined names
diff --git a/benchmarks/fp8_sat_cast.py b/benchmarks/fp8_sat_cast.py
index cae9cdb..2442515 100644
--- a/benchmarks/fp8_sat_cast.py
+++ b/benchmarks/fp8_sat_cast.py
@@ -1,6 +1,7 @@
 import itertools
-from dataclasses import dataclass
 
+from contextlib import suppress
+from dataclasses import dataclass
 from typing import List
 
 import torch
@@ -80,10 +81,8 @@ def run_experiment(config: ExperimentConfig) -> ExperimentResult:
         high_precision_tensor, eager_abs_max, scale, config.low_precision_dtype, config.saturated
     ).to(config.high_precision_dtype)
     eager_out_hp = eager_out.to(config.high_precision_dtype)
-    try:
+    with suppress(AssertionError):
         torch.testing.assert_close(nuggets_out_hp, eager_out_hp, rtol=1e-3, atol=1e-3)
-    except AssertionError as e:
-        pass
         # investigate why we are seeing small deviations
         # Mismatched elements: 62577 / 2097152 (3.0%)
         # Greatest absolute difference: 2.0 at index (11111,) (up to 0.001 allowed)
diff --git a/benchmarks/llama.py b/benchmarks/llama.py
index 1b72dad..fd865a4 100644
--- a/benchmarks/llama.py
+++ b/benchmarks/llama.py
@@ -82,11 +82,7 @@ def __init__(self, config: LLaMAConfig) -> None:
         self.kv_caches: List[KVCache] = []
 
     def _init_weights(self, module: nn.Module) -> None:
-        if isinstance(module, nn.Linear):
-            torch.nn.init.normal_(
-                module.weight, mean=0.0, std=0.02 / math.sqrt(2 * self.config.n_layer)
-            )
-        elif isinstance(module, nn.Embedding):
+        if isinstance(module, (nn.Linear, nn.Embedding)):
             torch.nn.init.normal_(
                 module.weight, mean=0.0, std=0.02 / math.sqrt(2 * self.config.n_layer)
             )
diff --git a/benchmarks/qlora.py b/benchmarks/qlora.py
index d13e448..2115f2f 100644
--- a/benchmarks/qlora.py
+++ b/benchmarks/qlora.py
@@ -1,13 +1,13 @@
 import argparse
 import csv
-import gc
 import itertools
+
+import logging
 from dataclasses import asdict, dataclass
 from pathlib import Path
-from typing import List, Optional, Tuple
+from typing import List, Optional
 
 import torch
-import torch.nn as nn
 import torch.nn.functional as F
 
 import transformer_nuggets as nugs
@@ -16,12 +16,16 @@
 from transformer_nuggets.quant import NF4Tensor
 
 bnb_available = False
+
+
+logging.getLogger(__name__).addHandler(logging.NullHandler())
+
 try:
-    import bitsandbytes as bnb
+    import bitsandbytes as bnb  # noqa: F401
 
     bnb_available = True
 except ImportError:
-    raise (
+    logging.warning(
         "Could not import bitsandbytes, make sure you have installed it `pip install bitsandbytes` "
     )
 
@@ -137,7 +141,7 @@ def main(output_path: Optional[Path], profile_path: Optional[Path], dynamic: boo
         results = []
         for experiment_config in tqdm(gen_configs()):
             # Since we are changing between dynamic and not
-            import torch._dynamo
+            import torch._dynamo  # noqa: F402
 
             torch._dynamo.reset()
             experiment = experiment_types[experiment_config.op]
diff --git a/pyproject.toml b/pyproject.toml
index 2201bd5..b74c608 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -20,20 +20,21 @@ classifiers = [
 dependencies = [
     "torch >= 2.1.1",
     "scipy >= 1.9.1",
-    "tqdm",
-    "tabulate"
+    "tqdm >= 4.66",
+    "tabulate >= 0.8"
 ]
 
 [project.optional-dependencies]
 dev = [
     "black==23.3.0",
     "usort==1.0.6",
-    "ufmt==2.1.0",
-    "libcst==1.0.1",
+    "ufmt==2.3.0",
+    "libcst==1.1.0",
     "pre-commit==3.6.0",
     "bumpver",
     "pip-tools",
-    "pytest"
+    "pytest",
+    "flake8-pyproject"
 ]
 
 qlora = ['bitsandbytes']
@@ -46,6 +47,14 @@ llama = [
     "float8_experimental",
 ]
 
+# ---------- TOOL CONFIGURATIONS ------------
+[tool.flake8]
+max-line-length = 99
+ignore = ['E231', 'E241', 'E501', 'C408', 'E261', 'E731', 'G004', 'W503', 'E203']
+per-file-ignores = [
+    '__init__.py:F401',
+]
+
 [tool.usort]
 first_party_detection = false
 
diff --git a/test/test_qlora.py b/test/test_qlora.py
index 5c0dbad..cc1b9c4 100644
--- a/test/test_qlora.py
+++ b/test/test_qlora.py
@@ -71,11 +71,11 @@ def test_binning_distribution(embed_dim: int):
     bnb_first_elements = (bnb_data >> 4).to(torch.long)
     bnb_second_elements = (bnb_data & 0b1111).to(torch.long)
 
-    bnb_first_counts = torch.unique(bnb_first_elements, return_counts=True)[1]
-    bnb_second_counts = torch.unique(bnb_second_elements, return_counts=True)[1]
+    bnb_first_counts = torch.unique(bnb_first_elements, return_counts=True)[1]  # noqa: F841
+    bnb_second_counts = torch.unique(bnb_second_elements, return_counts=True)[1]  # noqa: F841
 
-    first_counts = torch.unique(first_elements, return_counts=True)[1]
-    second_counts = torch.unique(second_elements, return_counts=True)[1]
+    first_counts = torch.unique(first_elements, return_counts=True)[1]  # noqa: F841
+    second_counts = torch.unique(second_elements, return_counts=True)[1]  # noqa: F841
 
     # Why are these normally distributed and not uniform?
 
diff --git a/test/test_utils.py b/test/test_utils.py
index d3be4b8..01f8f16 100644
--- a/test/test_utils.py
+++ b/test/test_utils.py
@@ -11,9 +11,8 @@ def test_nan():
             0.0,
         ]
     )
-    with pytest.raises(RuntimeError, match="returned a NaN"):
-        with NanInfDetect():
-            print(torch.div(a, a))
+    with pytest.raises(RuntimeError, match="returned a NaN"), NanInfDetect():
+        print(torch.div(a, a))
 
 
 def test_inf():
@@ -23,9 +22,8 @@ def test_inf():
         ],
         dtype=torch.float16,
     )
-    with pytest.raises(RuntimeError, match="returned an Inf"):
-        with NanInfDetect():
-            print(torch.mul(a, 65537))
+    with pytest.raises(RuntimeError, match="returned an Inf"), NanInfDetect():
+        print(torch.mul(a, 65537))
 
 
 def test_breakpoint():
@@ -34,11 +32,11 @@ def test_breakpoint():
             0.0,
         ]
     )
-    with pytest.raises(RuntimeError, match="returned a NaN"):
-        with mock.patch("builtins.breakpoint") as mock_breakpoint:
-            with NanInfDetect(do_breakpoint=True):
-                print(torch.div(a, a))
-            mock_breakpoint.assert_called_once()
+    with pytest.raises(RuntimeError, match="returned a NaN"), mock.patch(
+        "builtins.breakpoint"
+    ) as mock_breakpoint, NanInfDetect(do_breakpoint=True):
+        print(torch.div(a, a))
+        mock_breakpoint.assert_called_once()
 
 
 if __name__ == "__main__":
diff --git a/transformer_nuggets/flash/__init__.py b/transformer_nuggets/flash/__init__.py
index 1241b75..7714bb9 100644
--- a/transformer_nuggets/flash/__init__.py
+++ b/transformer_nuggets/flash/__init__.py
@@ -1 +1 @@
-from transformer_nuggets.flash.flash_attention import *
+from transformer_nuggets.flash.flash_attention import *  # noqa: F403
diff --git a/transformer_nuggets/fp8/scaled_quant.py b/transformer_nuggets/fp8/scaled_quant.py
index 0f42c74..9ff6e96 100644
--- a/transformer_nuggets/fp8/scaled_quant.py
+++ b/transformer_nuggets/fp8/scaled_quant.py
@@ -90,5 +90,5 @@ def eager_scaled_quant(
         out = torch.where(
             out < -1 * torch.finfo(fp8_dtype).max, -1 * torch.finfo(fp8_dtype).max, out
         )
-    abs_max = torch.max(torch.abs(out))
+    _ = torch.max(torch.abs(out))
     return out.to(fp8_dtype)
diff --git a/transformer_nuggets/llama/finetune.py b/transformer_nuggets/llama/finetune.py
index cdfe31c..11f6b78 100644
--- a/transformer_nuggets/llama/finetune.py
+++ b/transformer_nuggets/llama/finetune.py
@@ -2,16 +2,11 @@
 Used to train a model from scratch on big dense blocks of text data using causal attention.
 """
 import argparse
-import csv
 import logging
-import math
 import os
 import random
-import time
-from contextlib import nullcontext
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from pathlib import Path
-from typing import List, Optional
 
 import numpy as np
 import torch
@@ -22,7 +17,6 @@
 from torch.distributed.fsdp import FullyShardedDataParallel as FSDP
 from torch.distributed.fsdp.wrap import ModuleWrapPolicy
 from torch.utils.data import DataLoader, IterableDataset
-from tqdm import tqdm
 from transformer_nuggets.llama.model import ModelArgs, Transformer, TransformerBlock
 from transformer_nuggets.llama.train import (
     calculate_loss,
@@ -222,15 +216,17 @@ def train(
             # TODO(future): fix this condition, eval currently only happens
             # if eval_interval and batch_size are multiples of each other
             if not is_accumulating and step_count % training_config.eval_interval == 0:
-                t0 = time.time()
-                val_loss = validate(
-                    model, val_data, val_loss_file, training_config, step_count, rank, world_size
-                )
-                t1 = time.time() - t0
-                if rank == 0:
-                    logging.info(
-                        f"step {iter_num}: val loss {val_loss:.4f}, val time: {t1 * 1000:.2f}ms"
-                    )
+                pass
+                # TODO: add validation loop
+                # t0 = time.time()
+                # val_loss = validate(
+                #     model, val_data, val_loss_file, training_config, step_count, rank, world_size
+                # )
+                # t1 = time.time() - t0
+                # if rank == 0:
+                #     logging.info(
+                #         f"step {iter_num}: val loss {val_loss:.4f}, val time: {t1 * 1000:.2f}ms"
+                #     )
 
             if not is_accumulating and step_count % training_config.save_interval == 0:
                 checkpoint_path = training_config.out_dir / f"iter-{iter_num:06d}-ckpt.pth"
diff --git a/transformer_nuggets/llama/model.py b/transformer_nuggets/llama/model.py
index 2008e21..b54d0aa 100644
--- a/transformer_nuggets/llama/model.py
+++ b/transformer_nuggets/llama/model.py
@@ -128,7 +128,7 @@ def forward(self, idx: Tensor, input_pos: Optional[Tensor] = None) -> Tensor:
         freqs_cis = self.freqs_cis[input_pos]
         x = self.tok_embeddings(idx)
 
-        for i, layer in enumerate(self.layers):
+        for _, layer in enumerate(self.layers):
             x = layer(x, input_pos, freqs_cis)
         x = self.norm(x)
         logits = self.output(x)
@@ -202,7 +202,7 @@ def forward(self, x: Tensor, freqs_cis: Tensor) -> Tensor:
         q = apply_rotary_emb(q, freqs_cis)
         k = apply_rotary_emb(k, freqs_cis)
 
-        q, k, v = map(lambda x: x.transpose(1, 2), (q, k, v))
+        q, k, v = (x.transpose(1, 2) for x in (q, k, v))
 
         k = k.repeat_interleave(self.n_head // self.n_local_heads, dim=1)
         v = v.repeat_interleave(self.n_head // self.n_local_heads, dim=1)
diff --git a/transformer_nuggets/quant/qlora.py b/transformer_nuggets/quant/qlora.py
index c257b4a..4b14197 100644
--- a/transformer_nuggets/quant/qlora.py
+++ b/transformer_nuggets/quant/qlora.py
@@ -1,6 +1,6 @@
 import logging
 import math
-from dataclasses import dataclass, field
+from dataclasses import dataclass
 from typing import Tuple
 
 import torch
@@ -343,7 +343,7 @@ def get_sample_inputs(
 
 
 def get_mlp_weights(
-    embed_dim: int, device: torch.dtype = torch.device("cuda:0")
+    embed_dim: int, device: torch.dtype = "cuda"
 ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]:
     """These three weights take up
     3 * (embed_dim * n_hidden) * 2 bytes of memory
diff --git a/transformer_nuggets/utils/benchmark.py b/transformer_nuggets/utils/benchmark.py
index e135f9a..6c7571b 100644
--- a/transformer_nuggets/utils/benchmark.py
+++ b/transformer_nuggets/utils/benchmark.py
@@ -2,7 +2,6 @@
 from contextlib import contextmanager, nullcontext
 from dataclasses import dataclass, field
 from pathlib import Path
-from pickle import dump
 from typing import Callable, Optional
 
 import torch
diff --git a/transformer_nuggets/utils/tracing.py b/transformer_nuggets/utils/tracing.py
index f377c01..51c5543 100644
--- a/transformer_nuggets/utils/tracing.py
+++ b/transformer_nuggets/utils/tracing.py
@@ -5,7 +5,7 @@
 import torch
 import torch.overrides
 from torch.utils._python_dispatch import TorchDispatchMode
-from torch.utils._pytree import tree_flatten, tree_map, tree_map_only
+from torch.utils._pytree import tree_map, tree_map_only
 from torch.utils.weak import WeakIdRef
 
 dtype_abbrs = {

From cd9b1c258dd857fc3141e440de47ce645de98210 Mon Sep 17 00:00:00 2001
From: drisspg <drisspguessous@gmail.com>
Date: Sun, 11 Feb 2024 12:05:10 -0800
Subject: [PATCH 2/4] fix flake8

---
 .github/workflows/python-app.yml | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index 934f599..fe0dc49 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -30,10 +30,7 @@ jobs:
         pip install -e .'[dev]'
     - name: Lint with flake8
       run: |
-        # stop the build if there are Python syntax errors or undefined names
-        flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
-        # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
-        flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
+        flake8
     - name: Test with pytest
       run: |
         pytest

From d5a81440957254e0df859bdbfbaf0091c964d318 Mon Sep 17 00:00:00 2001
From: drisspg <drisspguessous@gmail.com>
Date: Sun, 11 Feb 2024 12:58:41 -0800
Subject: [PATCH 3/4] add flake8 pre-commit and update tests with mroe skips

---
 .pre-commit-config.yaml |  6 ++++++
 pyproject.toml          |  3 ++-
 test/test_flash.py      |  1 +
 test/test_fp8.py        |  2 ++
 test/test_qlora.py      | 19 +++++++++++++------
 5 files changed, 24 insertions(+), 7 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index b3757c2..18108f3 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -18,3 +18,9 @@ repos:
           - usort == 1.0.6
           - ufmt == 2.1.0
           - libcst == 1.0.1
+
+-   repo: https://github.com/pycqa/flake8
+    rev: 7.0.0
+    hooks:
+    -   id: flake8
+        additional_dependencies: [flake8-pyproject]
diff --git a/pyproject.toml b/pyproject.toml
index b74c608..9efa481 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -34,7 +34,8 @@ dev = [
     "bumpver",
     "pip-tools",
     "pytest",
-    "flake8-pyproject"
+    "flake8==6.1.0",
+    "flake8-pyproject",
 ]
 
 qlora = ['bitsandbytes']
diff --git a/test/test_flash.py b/test/test_flash.py
index 60cb699..2fff3d6 100644
--- a/test/test_flash.py
+++ b/test/test_flash.py
@@ -7,6 +7,7 @@
 @pytest.mark.parametrize("causal", [True, False])
 @pytest.mark.parametrize("bias_choice", [BiasMode.rel_pos, BiasMode.none, BiasMode.alibi])
 @pytest.mark.parametrize("sm_scale", [None, 1])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_op(Z, H, N_CTX, D_HEAD, causal, bias_choice, sm_scale, dtype=torch.float16):
     torch.manual_seed(20)
     q = (
diff --git a/test/test_fp8.py b/test/test_fp8.py
index 33f9a27..73ae2b9 100644
--- a/test/test_fp8.py
+++ b/test/test_fp8.py
@@ -5,6 +5,7 @@
 
 
 @pytest.mark.parametrize("fp8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_basic_quant(fp8_dtype):
     torch.manual_seed(0)
     torch.cuda.manual_seed_all(0)
@@ -17,6 +18,7 @@ def test_basic_quant(fp8_dtype):
 
 
 @pytest.mark.parametrize("fp8_dtype", [torch.float8_e4m3fn, torch.float8_e5m2])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_saturated(fp8_dtype):
     torch.manual_seed(0)
     torch.cuda.manual_seed_all(0)
diff --git a/test/test_qlora.py b/test/test_qlora.py
index cc1b9c4..991947f 100644
--- a/test/test_qlora.py
+++ b/test/test_qlora.py
@@ -21,6 +21,7 @@
 @pytest.mark.parametrize(
     "inpt_size, block_size, scaler_block_size", [(16384, 64, 256), (256, 16, 16), (1024, 32, 32)]
 )
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_reconstruction(inpt_size: int, block_size: int, scaler_block_size: int):
     torch.manual_seed(0)
     device = "cuda"
@@ -37,9 +38,10 @@ def test_reconstruction(inpt_size: int, block_size: int, scaler_block_size: int)
 
 @unittest.skipIf(not bnb_available, "Bitsandbytes not available")
 @pytest.mark.parametrize("embed_dim", [256, 4096, 5120, 6656, 8192])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_reconstruction_qlora_vs_bnb(embed_dim: int):
     torch.manual_seed(0)
-    device = "cuda:0"
+    device = "cuda"
     input_weight = qlora.build_input_weight(embed_dim, device)
     nugs_qlora = NF4Tensor.from_tensor(input_weight)
     bnb_linear = qlora.build_bitsandbytes_linear(input_weight, device)
@@ -56,10 +58,11 @@ def test_reconstruction_qlora_vs_bnb(embed_dim: int):
     assert (nugs_diff - bnb_diff).abs() < 2e-1
 
 
-@unittest.skipIf(not bnb_available, "Bitsandbytes not available")
+@pytest.mark.skipIf(not bnb_available, "Bitsandbytes not available")
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 @pytest.mark.parametrize("embed_dim", [256, 4096, 5120, 6656, 8192])
 def test_binning_distribution(embed_dim: int):
-    device = "cuda:0"
+    device = "cuda"
     input_weight = qlora.build_input_weight(embed_dim, device)
     nugs_qlora = NF4Tensor.from_tensor(input_weight)
     first_elements = (nugs_qlora.quantized_data >> 4).to(torch.long)
@@ -83,9 +86,10 @@ def test_binning_distribution(embed_dim: int):
 @pytest.mark.parametrize("embed_dim", [256, 4096, 5120, 6656, 8192])
 @pytest.mark.parametrize("compile", [True, False])
 @pytest.mark.parametrize("requires_grad", [True, False])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_autograd_func_to_eager(embed_dim: int, compile: bool, requires_grad: bool):
     torch.manual_seed(0)
-    device = "cuda:0"
+    device = "cuda"
     input_weight = qlora.build_input_weight(embed_dim, device)
     sample_input = qlora.get_sample_inputs(8, 128, embed_dim, device, requires_grad=requires_grad)
     nugs_qlora = NF4Tensor.from_tensor(input_weight)
@@ -99,9 +103,10 @@ def test_autograd_func_to_eager(embed_dim: int, compile: bool, requires_grad: bo
         out.sum().backward()
 
 
-@unittest.skipIf(not bnb_available, "Bitsandbytes not available")
+@pytest.mark.skipIf(not bnb_available, "Bitsandbytes not available")
 @pytest.mark.parametrize("embed_dim", [256, 4096, 5120, 6656, 8192])
 @pytest.mark.parametrize("compile", [True, False])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_bitsandbytes_linear_parity(embed_dim, compile):
     device = torch.device("cuda:0")
     input_weight = qlora.build_input_weight(embed_dim, device)
@@ -127,9 +132,10 @@ def qlora_linear(
     assert bnb_difference.max() < 0.5 * embed_dim
 
 
-@unittest.skipIf(not bnb_available, "Bitsandbytes not available")
+@pytest.mark.skipIf(not bnb_available, "Bitsandbytes not available")
 @pytest.mark.parametrize("embed_dim", [256, 4096, 5120, 6656, 8192])
 @pytest.mark.parametrize("compile", [True, False])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_bitsandbytes_mlp_parity(embed_dim, compile):
     device = torch.device("cuda:0")
     weights = qlora.get_mlp_weights(embed_dim, device)
@@ -159,6 +165,7 @@ def test_bitsandbytes_mlp_parity(embed_dim, compile):
 @pytest.mark.parametrize("r", [1, 2])
 @pytest.mark.parametrize("dropout", [0.0, 0.2])
 @pytest.mark.parametrize("run_backward", [True, False])
+@pytest.mark.skipif(not torch.cuda.is_available(), reason="CUDA is not available")
 def test_qlora_linear(embed_dim: int, compile: bool, r: int, dropout: float, run_backward: bool):
     torch.manual_seed(0)
     device = "cuda:0"

From 8443777f2df03434fe79e2e6cf86ab2e56845a45 Mon Sep 17 00:00:00 2001
From: drisspg <drisspguessous@gmail.com>
Date: Sun, 11 Feb 2024 13:00:53 -0800
Subject: [PATCH 4/4] naming

---
 .github/workflows/python-app.yml | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
index fe0dc49..0a7975c 100644
--- a/.github/workflows/python-app.yml
+++ b/.github/workflows/python-app.yml
@@ -1,7 +1,6 @@
-# This workflow will install Python dependencies, run tests and lint with a single version of Python
-# For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
+# Basic flak8 + pytest workflow for Python 3.10
 
-name: Python application
+name: Python Lint and Test
 
 on:
   push: