add lint to oss (pytorch#1427)

Summary: Pull Request resolved: pytorch#1427 Pull Request resolved: pytorch#945 As title. This Diff will be landed after the previous Diff is populated into production. Please feel free to accept and I will monitor the production package and land it later to avoid BC breaking issue. Differential Revision: D40620055 fbshipit-source-id: 367ade5abb86a5e90ccc7f2c6541d008fb3f42c5
airidas-meta · Nov 1, 2022 · 84dadc7 · 84dadc7
1 parent 79f2073
commit 84dadc7
Show file tree

Hide file tree

Showing 17 changed files with 237 additions and 51 deletions.
diff --git a/.github/workflows/pylint.yaml b/.github/workflows/pylint.yaml
@@ -0,0 +1,45 @@
+name: Lint
+
+on:
+  push:
+    branches:
+      - main
+
+  pull_request:
+    branches:
+      - main
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    strategy:
+      matrix:
+        python-version: ["3.8"]
+    steps:
+    - uses: actions/checkout@v2
+    - name: Set up Python ${{ matrix.python-version }}
+      uses: actions/setup-python@v2
+      with:
+        python-version: ${{ matrix.python-version }}
+    - name: Install dependencies
+      run: |
+        python -m pip install --upgrade pip
+        pip install ufmt
+        pip install click
+        pip install flake8
+    - name: Analyzing the code with flake8
+      run: |
+        echo "::add-matcher::fbgemm_gpu/test/lint/flake8_problem_matcher.json"
+        flake8 --ignore=E501,W503,E203 .
+        # E501 = line too long
+        # W503 = line break before binary operator (deprecated)
+        # E203 = whitespace before ":"
+    - name: Analyzing the code with ufmt
+      run: |
+        ufmt diff fbgemm_gpu/fbgemm_gpu
+        ufmt diff fbgemm_gpu/test
+        ufmt diff fbgemm_gpu/bench
+    - name: Check Meta copyright header
+      run: |
+        python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/fbgemm_gpu --fixit=False
+        python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/test --fixit=False
+        python fbgemm_gpu/test/lint/check_meta_header.py --path=./fbgemm_gpu/bench --fixit=False
diff --git a/fbgemm_gpu/bench/bench_utils.py b/fbgemm_gpu/bench/bench_utils.py
@@ -3,7 +3,6 @@
 # This source code is licensed under the BSD-style license found in the
 # LICENSE file in the root directory of this source tree.
 
-import itertools
 import logging
 import statistics
 import time
@@ -344,6 +343,7 @@ def benchmark_requests_refer(
     check_median: bool = False,
 ) -> float:
     do_pooling = pooling_mode in ["sum", "mean"]
+
     if do_pooling:
         nn_embedding_list = [
             torch.nn.EmbeddingBag(E, D, mode=pooling_mode, sparse=True).cuda()
@@ -397,12 +397,15 @@ def benchmark_requests_refer(
                 )
             ]
         )
+
         if do_pooling:
             final_output = torch.cat(
                 [f.view(B, -1) for f in nn_embedding_output], dim=1
             )
         else:
-            final_output = torch.cat(nn_embedding_output, dim=0).view(-1, D)
+            final_output = torch.cat(nn_embedding_output, dim=0).view(  # noqa: F841
+                -1, D
+            )
 
         if torch.cuda.is_available():
             end_event.record()

diff --git a/fbgemm_gpu/bench/merge_embeddings_benchmark.py b/fbgemm_gpu/bench/merge_embeddings_benchmark.py
@@ -17,6 +17,14 @@
 import tabulate
 import torch
 
+from fbgemm_gpu.split_table_batched_embeddings_ops import (
+    BoundsCheckMode,
+    EmbeddingLocation,
+    IntNBitTableBatchedEmbeddingBagsCodegen,
+    SparseType,
+)
+from torch.profiler import profile, ProfilerActivity
+
 # pyre-fixme[16]: Module `fbgemm_gpu` has no attribute `open_source`.
 open_source: bool = getattr(fbgemm_gpu, "open_source", False)
 
@@ -32,15 +40,6 @@
     )
 
 
-from fbgemm_gpu.split_table_batched_embeddings_ops import (
-    BoundsCheckMode,
-    EmbeddingLocation,
-    IntNBitTableBatchedEmbeddingBagsCodegen,
-    SparseType,
-)
-from torch.profiler import profile, ProfilerActivity
-
-
 def get_gpu_device(gpu_num) -> torch.device:
     return torch.device(f"cuda:{gpu_num}")
 
@@ -72,7 +71,7 @@ def generate_requests(
     E: int,
     # inter-batch indices reuse rate
     reuse: float = 0.0,
-) -> List[Tuple[torch.IntTensor, torch.IntTensor,]]:
+) -> List[Tuple[torch.IntTensor, torch.IntTensor, None]]:
     rs = []
     for gpu_num in range(num_gpus):
         all_indices = torch.randint(

diff --git a/fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py b/fbgemm_gpu/bench/split_table_batched_embeddings_benchmark.py
@@ -19,15 +19,6 @@
 import fbgemm_gpu
 import numpy as np
 import torch
-
-haveAIBench = False
-try:
-    from aibench_observer.utils.observer import emitMetric
-
-    haveAIBench = True
-except Exception:
-    haveAIBench = False
-
 from fbgemm_gpu.split_table_batched_embeddings_ops import (
     BoundsCheckMode,
     CacheAlgorithm,
@@ -44,6 +35,15 @@
 )
 from torch import Tensor
 
+haveAIBench = False
+try:
+    from aibench_observer.utils.observer import emitMetric
+
+    haveAIBench = True
+except Exception:
+    haveAIBench = False
+
+
 # pyre-fixme[16]: Module `fbgemm_gpu` has no attribute `open_source`.
 open_source: bool = getattr(fbgemm_gpu, "open_source", False)
 
@@ -1769,11 +1769,10 @@ def nbit_cache(  # noqa C901
     param_size_multiplier = weights_precision.bit_rate() / 8.0
     output_size_multiplier = output_dtype.bit_rate() / 8.0
     read_write_bytes = (
-        param_size_multiplier
-        * B
-        * sum(Ds)
-        * L
-        # output_size_multiplier * B * sum(Ds) + param_size_multiplier * B * sum(Ds) * L
+        param_size_multiplier * B * sum(Ds) * L
+        + output_size_multiplier * B * sum(Ds)
+        + param_size_multiplier * B * sum(Ds) * L
+
     )
     logging.info(
         f"{weights_precision} Embedding tables: {E * T} rows, {nparams_byte / param_size_multiplier / 1.0e9: .2f} GParam, "

diff --git a/fbgemm_gpu/fbgemm_gpu/__init__.py b/fbgemm_gpu/fbgemm_gpu/__init__.py
@@ -19,4 +19,4 @@
 # Use existence to check if fbgemm_gpu_py.so has already been loaded
 open_source: bool = True
 
-from . import _fbgemm_gpu_docs
+# from . import _fbgemm_gpu_docs
diff --git a/fbgemm_gpu/fbgemm_gpu/_fbgemm_gpu_docs.py b/fbgemm_gpu/fbgemm_gpu/_fbgemm_gpu_docs.py
@@ -5,7 +5,7 @@
 
 import fbgemm_gpu
 import fbgemm_gpu.split_table_batched_embeddings_ops
-import torch
+import torch  # usort:skip
 
 Tensor = torch.Tensor
 
@@ -85,12 +85,12 @@ def add_docs(method, docstr):
     """
 dense_to_jagged(dense, x_offsets, total_L) -> (Tensor, Tensor[])
 
-Converts a dense tensor into a jagged tensor, given the desired offsets of the resulting dense tensor. 
+Converts a dense tensor into a jagged tensor, given the desired offsets of the resulting dense tensor.
 
 Args:
     dense (Tensor): A dense input tensor to be converted
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     total_L (int, Optional): Total number of values in the resulting jagged tensor.
 
@@ -119,7 +119,7 @@ def add_docs(method, docstr):
 Args:
     values (Tensor): Jagged tensor values
 
-    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     max_lengths (int[]): A list with max_length for each jagged dimension.
 
@@ -147,13 +147,13 @@ def add_docs(method, docstr):
     """
 jagged_dense_elementwise_add(x_values, x_offsets, y) -> Tensor
 
-Adds a jagged tensor to a dense tensor, resulting in dense tensor. Jagged 
+Adds a jagged tensor to a dense tensor, resulting in dense tensor. Jagged
 tensor input will be padded with zeros for the purposes of the addition.
 
 Args:
     x_values (Tensor): Jagged tensor values
 
-    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y (Tensor): A dense tensor
 
@@ -174,7 +174,7 @@ def add_docs(method, docstr):
 Args:
     x_values (Tensor): Jagged tensor values
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y (Tensor): A dense tensor
 
@@ -195,7 +195,7 @@ def add_docs(method, docstr):
 Args:
     x_values (Tensor): Jagged tensor values
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y_0 (Tensor): A dense tensor
 
@@ -218,7 +218,7 @@ def add_docs(method, docstr):
 Args:
     x_values (Tensor): Jagged tensor values
 
-    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension. 
+    x_offsets (Tensor[]): A list of jagged offset tensors, one for each jagged dimension.
 
     y (Tensor): A dense tensor
 

diff --git a/fbgemm_gpu/fbgemm_gpu/docs/__init__.py b/fbgemm_gpu/fbgemm_gpu/docs/__init__.py
@@ -0,0 +1,6 @@
+#!/usr/bin/env python3
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+# This source code is licensed under the BSD-style license found in the
+# LICENSE file in the root directory of this source tree.
diff --git a/fbgemm_gpu/fbgemm_gpu/enums.py b/fbgemm_gpu/fbgemm_gpu/enums.py
@@ -9,6 +9,7 @@
 import typing
 from typing import Any, Callable, List, Tuple
 
+
 # Create enums in given namespace with information from query_op
 def create_enums(
     namespace: typing.Dict[str, Any],

diff --git a/fbgemm_gpu/fbgemm_gpu/quantize_comm.py b/fbgemm_gpu/fbgemm_gpu/quantize_comm.py
@@ -24,7 +24,8 @@
     hfp8_to_fp32,
 )
 from fbgemm_gpu.split_embedding_configs import SparseType
-from torch.autograd.profiler import record_function
+from torch.autograd.profiler import record_function  # usort:skip
+
 
 logger: logging.Logger = logging.getLogger()
 

diff --git a/fbgemm_gpu/fbgemm_gpu/split_embedding_inference_converter.py b/fbgemm_gpu/fbgemm_gpu/split_embedding_inference_converter.py
@@ -11,11 +11,13 @@
 import math
 from typing import Optional, Tuple
 
-import fbgemm_gpu.split_table_batched_embeddings_ops as split_table_batched_embeddings_ops
+import fbgemm_gpu.split_table_batched_embeddings_ops as split_table_batched_embeddings_ops  # usort:skip
 import numpy as np
 import torch
+
 from fbgemm_gpu.split_embedding_configs import QuantizationConfig, SparseType
-from torch import nn, Tensor
+from torch import nn, Tensor  # usort:skip
+
 
 # TODO: add per-feature based converter option (based on embedding_specs during inference)
 # TODO: optimize embedding pruning and quantization latency.

diff --git a/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py b/fbgemm_gpu/fbgemm_gpu/split_table_batched_embeddings_ops.py
@@ -15,9 +15,9 @@
 from typing import Dict, List, NamedTuple, Optional, Tuple, Type, Union
 
 import fbgemm_gpu.split_embedding_codegen_lookup_invokers as invokers
-import torch
+import torch  # usort:skip
 from fbgemm_gpu.split_embedding_configs import EmbOptimType as OptimType, SparseType
-from torch import nn, Tensor
+from torch import nn, Tensor  # usort:skip
 
 DEFAULT_ASSOC = 32 if torch.version.hip is None else 64
 # Maximum number of times prefetch() can be called without

diff --git a/fbgemm_gpu/fbgemm_gpu/uvm.py b/fbgemm_gpu/fbgemm_gpu/uvm.py
@@ -9,6 +9,7 @@
 from typing import Optional
 
 import torch
+
 from fbgemm_gpu.enums import create_enums
 
 try:
-Original file line number
+Diff line change
@@ Expand Up / @@ -9,6 +9,7 @@ @@
     from typing import Optional
     import torch
     from fbgemm_gpu.enums import create_enums
     try:
@@ Expand Down @@