VinAIResearch · adidier17 · May 23, 2024
diff --git a/.devcontainer/Dockerfile_U2004_CUDA113 b/.devcontainer/Dockerfile_U2004_CUDA113
@@ -0,0 +1,48 @@
+FROM nvidia/cuda:11.3.1-devel-ubuntu20.04
+
+RUN apt-get update && apt-get install wget git -yq
+RUN apt-get install build-essential g++ gcc -y
+ENV DEBIAN_FRONTEND noninteractive
+# Unsure if openmpi is needed
+# RUN apt-get update && apt-get install libgl1-mesa-glx libglib2.0-0 libxcb-* \
+# openmpi-bin openmpi-common libopenmpi-dev libgtk2.0-dev -y  
+
+# Install miniconda
+ENV CONDA_DIR /opt/conda
+
+RUN wget --quiet https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh -O ~/miniconda.sh && \
+     /bin/bash ~/miniconda.sh -b -p /opt/conda
+
+# Put conda in path so we can use conda activate
+ENV PATH=$CONDA_DIR/bin:/usr/local/bin:$PATH
+# general packages
+RUN conda install python=3.8
+RUN conda install numpy=1.23
+RUN conda install -c anaconda jupyter
+RUN echo "numpy==1.23.*" > /opt/conda/conda-meta/pinned
+RUN conda install pytorch==1.11.0 torchvision==0.12.0 torchaudio==0.11.0 cudatoolkit=11.3 -c pytorch
+RUN conda install conda=22.11 
+RUN conda install -c conda-forge setuptools=59.5
+
+# Make sure CUDA is visible
+# ENV LD_LIBRARY_PATH /usr/local/cuda/lib64:$LD_LIBRARY_PATH
+ENV NVIDIA_VISIBLE_DEVICES all
+ENV NVIDIA_DRIVER_CAPABILITIES compute,utility
+# ARG TORCH_CUDA_ARCH_LIST="8.9"
+# Install pointgroup_ops
+RUN apt-get install libsparsehash-dev
+COPY requirements.txt /tmp/requirements.txt
+RUN pip install -r /tmp/requirements.txt
+COPY lib /lib
+RUN cd /lib/pointgroup_ops && python setup.py develop
+
+# Install spconv
+RUN conda install libboost && pip install pccm
+RUN pip install spconv-cu113
+
+# Install pointnet2
+# RUN cd /lib/pointnet2 && python setup.py install
+
+# Install faiss
+RUN conda install -c conda-forge faiss-gpu
+
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,51 @@
+{
+    "build": {
+        "dockerfile": "Dockerfile_U2004_CUDA113",
+        "context": "..",
+        "args": {
+            "DOCKER_BUILDKIT": "0"
+        }
+    },
+    "mounts": [
+        "source=${localWorkspaceFolder},target=/workspace,type=bind,consistency=cached"
+    ],
+    "runArgs": [
+        "--gpus",
+        "all",
+        "--shm-size",
+        "16gb",
+        "-v",
+        "/tmp/.X11-unix:/tmp.X11-unix"
+    ],
+    "containerEnv": {
+        "NVIDIA_DRIVER_CAPABILITIES": "all",
+        "DISPLAY": "unix:0"
+    },
+    "forwardPorts": [
+        8887,
+        8888,
+        8886
+    ],
+    "customizations": {
+        "vscode": {
+            "extensions": [
+                "ms-python.python",
+                "ms-python.vscode-pylance",
+                "ms-toolsai.jupyter",
+                "ms-python.black-formatter"
+            ],
+            "settings": { 
+                "python.defaultInterpreterPath": "/opt/conda/bin/python",
+                "python.linting.enabled": true,
+                "python.linting.pylintEnabled": true,
+                "[python]": {
+                    "editor.defaultFormatter": "ms-python.black-formatter",
+                    "editor.formatOnSave": true
+                }
+
+
+            }
+        }
+    },
+    "workspaceFolder": "/workspace"
+}
diff --git a/checkpoint.py b/checkpoint.py
@@ -26,8 +26,12 @@ def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
     loaded_keys = sorted(list(loaded_state_dict.keys()))
     # get a matrix of string matches, where each (i, j) entry correspond to the size of the
     # loaded_key string, if it matches
-    match_matrix = [len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys]
-    match_matrix = torch.as_tensor(match_matrix).view(len(current_keys), len(loaded_keys))
+    match_matrix = [
+        len(j) if i.endswith(j) else 0 for i in current_keys for j in loaded_keys
+    ]
+    match_matrix = torch.as_tensor(match_matrix).view(
+        len(current_keys), len(loaded_keys)
+    )
     max_match_size, idxs = match_matrix.max(1)
     # remove indices that correspond to no-match
     idxs[max_match_size == 0] = -1
@@ -44,15 +48,19 @@ def align_and_update_state_dicts(model_state_dict, loaded_state_dict):
         key = current_keys[idx_new]
         key_old = loaded_keys[idx_old]
         if loaded_state_dict[key_old].shape != model_state_dict[key].shape:
-            # if 'unet' in key or 'input_conv' in key:
-            #     reshaped = loaded_state_dict[key_old].permute(4,0,1,2,3)
-            #     loaded_state_dict[key_old] = reshaped
-            # else:
-            print(
-                "Skip loading parameter {}, required shape{}, "
-                "loaded shape{}.".format(key, model_state_dict[key].shape, loaded_state_dict[key_old].shape)
-            )
-            loaded_state_dict[key_old] = model_state_dict[key]
+            if "unet" in key or "input_conv" in key:
+                reshaped = loaded_state_dict[key_old].permute(4, 0, 1, 2, 3)
+                loaded_state_dict[key_old] = reshaped
+            else:
+                print(
+                    "Skip loading parameter {}, required shape{}, "
+                    "loaded shape{}.".format(
+                        key,
+                        model_state_dict[key].shape,
+                        loaded_state_dict[key_old].shape,
+                    )
+                )
+                loaded_state_dict[key_old] = model_state_dict[key]
 
         model_state_dict[key] = loaded_state_dict[key_old]
         logger.info(
@@ -87,15 +95,28 @@ def mkdir_p(path):
             raise
 
 
-def checkpoint(model, optimizer, epoch, log_dir, best_val=None, best_val_iter=None, postfix=None, last=False):
+def checkpoint(
+    model,
+    optimizer,
+    epoch,
+    log_dir,
+    best_val=None,
+    best_val_iter=None,
+    postfix=None,
+    last=False,
+):
     mkdir_p(log_dir)
 
     if last:
         filename = "checkpoint_last.pth"
     else:
         filename = f"checkpoint_epoch_{epoch}.pth"
     checkpoint_file = log_dir + "/" + filename
-    state = {"epoch": epoch, "state_dict": model.state_dict(), "optimizer": optimizer.state_dict()}
+    state = {
+        "epoch": epoch,
+        "state_dict": model.state_dict(),
+        "optimizer": optimizer.state_dict(),
+    }
 
     torch.save(state, checkpoint_file)
     logging.info(f"Checkpoint saved to {checkpoint_file}")
diff --git a/lib/pointgroup_ops/src/bfs_cluster/bfs_cluster.h b/lib/pointgroup_ops/src/bfs_cluster/bfs_cluster.h
@@ -8,7 +8,7 @@ All Rights Reserved 2020.
 #define BFS_CLUSTER_H
 #include <torch/serialize/tensor.h>
 #include <ATen/cuda/CUDAContext.h>
-#include <THC/THC.h>
+// #include <THC/THC.h>
 
 #include "../datatype/datatype.h"