diff --git a/.github/workflows/test-examples.yml b/.github/workflows/test-examples.yml index b6a7694..8a770a4 100644 --- a/.github/workflows/test-examples.yml +++ b/.github/workflows/test-examples.yml @@ -113,7 +113,9 @@ jobs: - {script: "integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/pytorch-lightning-optimizer.py", arg: ""} - {script: "integrations/model-training/pytorch/pytorch-mnist/pytorch-mnist-example.py", arg: ""} - {script: "integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py", arg: ""} + - {script: "integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py", arg: ""} - {script: "integrations/model-training/pytorch/pytorch-tensorboard/pytorch-tensorboard-example.py", arg: ""} + - {script: "integrations/model-training/ray-train/ray-train-hello-world-transformers/Comet_with_ray_train_huggingface_transformers.py", arg: "run"} - {script: "integrations/model-training/scikit-learn/sklearn-classification-example/comet-scikit-classification-example.py", arg: "run"} - {script: "integrations/model-training/scikit-learn/sklearn-nlp-example/comet-scikit-nlp-example.py", args: ""} - {script: "integrations/model-training/transformers/transformers-distilbert-fine-tuning/transformers-distilbert-fine-tuning.py", arg: ""} diff --git a/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/README.md b/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/README.md new file mode 100644 index 0000000..5562d0e --- /dev/null +++ b/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/README.md @@ -0,0 +1,28 @@ +# Pytorch Lightning integration with Comet.ml + +[PyTorch Lightning](https://lightning.ai/docs/pytorch/stable/) is the deep learning framework for professional AI researchers and machine learning engineers who need maximal flexibility without sacrificing performance at scale. Lightning evolves with you as your projects go from idea to paper/production. + +Instrument PyTorch Lightning with Comet to start managing experiments, create dataset versions and track hyperparameters for faster and easier reproducibility and collaboration. + +## Documentation + +For more information on using and configuring the PyTorch Lightning integration, see: [https://www.comet.com/docs/v2/integrations/ml-frameworks/pytorch-lightning/](https://www.comet.com/docs/v2/integrations/ml-frameworks/pytorch-lightning/?utm_source=comet-examples&utm_medium=referral&utm_campaign=github_repo_2023&utm_content=pytorch-lightning) + +## See it + +Take a look at this [public Comet Experiment](https://www.comet.com/examples/comet-example-pytorch-lightning/53ea47db44164a15af3a06a12f112f67). + +## Setup + +Install dependencies + +```bash +python -m pip install -r requirements.txt +``` + +## Run the example + + +```bash +python pytorch-lightning-hello-world.py +``` diff --git a/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/pytorch-lightning-hello-world.py b/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/pytorch-lightning-hello-world.py new file mode 100644 index 0000000..ac8f62c --- /dev/null +++ b/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/pytorch-lightning-hello-world.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python +# coding: utf-8 + +import os + +import comet_ml + +import lightning.pytorch as pl +import torch +import torch.nn.functional as F +from lightning import Trainer +from lightning.pytorch.loggers import CometLogger +from torch.utils.data import DataLoader +from torchvision import transforms +from torchvision.datasets import MNIST + +comet_ml.login(project_name="comet-example-pytorch-lightning") + + +# Arguments made to CometLogger are passed on to the comet_ml.Experiment class +comet_logger = CometLogger() + + +class Model(pl.LightningModule): + def __init__(self, layer_size=784): + super().__init__() + self.save_hyperparameters() + self.l1 = torch.nn.Linear(layer_size, 10) + + def forward(self, x): + return torch.relu(self.l1(x.view(x.size(0), -1))) + + def training_step(self, batch, batch_nb): + x, y = batch + loss = F.cross_entropy(self(x), y) + self.log("train_loss", loss) + return loss + + def validation_step(self, batch, batch_nb): + x, y = batch + y_hat = self.forward(x) + loss = F.cross_entropy(y_hat, y) + self.log("val_loss", loss) + return loss + + def configure_optimizers(self): + return torch.optim.Adam(self.parameters(), lr=0.02) + + +PATH_DATASETS = os.environ.get("PATH_DATASETS", ".") +BATCH_SIZE = 256 if torch.cuda.device_count() else 64 + + +# Init our model +model = Model() + +# Init DataLoader from MNIST Dataset +train_ds = MNIST( + PATH_DATASETS, train=True, download=True, transform=transforms.ToTensor() +) +train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE) + +eval_ds = MNIST( + PATH_DATASETS, train=False, download=True, transform=transforms.ToTensor() +) +eval_loader = DataLoader(train_ds, batch_size=BATCH_SIZE) + +comet_logger.log_hyperparams({"batch_size": BATCH_SIZE}) + +# Initialize a trainer +trainer = Trainer(max_epochs=3, logger=comet_logger) + +# Train the model ⚡ +trainer.fit(model, train_loader, eval_loader) diff --git a/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/requirements.txt b/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/requirements.txt new file mode 100644 index 0000000..afbe95f --- /dev/null +++ b/integrations/model-training/pytorch-lightning/pytorch-lightning-hello-world/requirements.txt @@ -0,0 +1,5 @@ +comet_ml>=3.44.0 +lightning +numpy +torch +torchvision diff --git a/integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/README.md b/integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/README.md index 5fa6e81..367e513 100644 --- a/integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/README.md +++ b/integrations/model-training/pytorch-lightning/pytorch-lightning-optimizer/README.md @@ -27,5 +27,5 @@ This example shows how to use PyTorch Lightning and Comet Optimizer together to ```bash -python pytorch_lightning_optimizer.py_ +python pytorch-lightning-optimizer.py ``` diff --git a/integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py b/integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py index e0f1bda..fcca561 100644 --- a/integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py +++ b/integrations/model-training/pytorch/pytorch-rich-logging/pytorch-rich-logging-example.py @@ -129,7 +129,6 @@ def onehot(i): # Train the Model total_steps = len(train_dataset) // hyper_params["batch_size"] with experiment.train(): - print("Logging weights as histogram (before training)...") # Log model weights weights = [] diff --git a/integrations/model-training/ray-train/ray-train-hello-world-transformers/Comet_with_ray_train_huggingface_transformers.py b/integrations/model-training/ray-train/ray-train-hello-world-transformers/Comet_with_ray_train_huggingface_transformers.py new file mode 100644 index 0000000..d242bb5 --- /dev/null +++ b/integrations/model-training/ray-train/ray-train-hello-world-transformers/Comet_with_ray_train_huggingface_transformers.py @@ -0,0 +1,134 @@ +#!/usr/bin/env python +# coding: utf-8 + + +import os + +import comet_ml +import comet_ml.integration.ray + +import evaluate +import numpy as np +import ray.train.huggingface.transformers +from datasets import load_dataset +from ray.train import RunConfig, ScalingConfig +from ray.train.torch import TorchTrainer +from transformers import ( + AutoModelForSequenceClassification, + AutoTokenizer, + Trainer, + TrainingArguments, + enable_full_determinism, +) + +comet_ml.login() + + +# Models +PRE_TRAINED_MODEL_NAME = "google-bert/bert-base-cased" +SEED = 42 + +enable_full_determinism(SEED) + + +def get_dataset(): + tokenizer = AutoTokenizer.from_pretrained("google-bert/bert-base-cased") + + def tokenize_function(examples): + return tokenizer(examples["text"], padding="max_length", truncation=True) + + dataset = load_dataset("yelp_review_full") + dataset["train"] = dataset["train"].shuffle(seed=SEED).select(range(100)) + dataset["test"] = dataset["test"].shuffle(seed=SEED).select(range(100)) + + tokenized_datasets = dataset.map(tokenize_function, batched=True) + + small_train_dataset = tokenized_datasets["train"] + small_eval_dataset = tokenized_datasets["test"] + return (small_train_dataset, small_eval_dataset) + + +def train_func(config): + from comet_ml.integration.ray import comet_worker_logger + + with comet_worker_logger(config): + small_train_dataset, small_eval_dataset = get_dataset() + + # Model + model = AutoModelForSequenceClassification.from_pretrained( + "google-bert/bert-base-cased", num_labels=5 + ) + + # Evaluation Metrics + metric = evaluate.load("accuracy") + + def compute_metrics(eval_pred): + logits, labels = eval_pred + predictions = np.argmax(logits, axis=-1) + + experiment = comet_ml.get_running_experiment() + if experiment: + experiment.log_confusion_matrix(predictions, labels) + + return metric.compute(predictions=predictions, references=labels) + + # Hugging Face Trainer + training_args = TrainingArguments( + do_eval=True, + do_train=True, + eval_strategy="epoch", + num_train_epochs=config["epochs"], + output_dir="./results", + overwrite_output_dir=True, + per_device_eval_batch_size=4, + per_device_train_batch_size=4, + report_to=["comet_ml"], + seed=SEED, + ) + trainer = Trainer( + model=model, + args=training_args, + train_dataset=small_train_dataset, + eval_dataset=small_eval_dataset, + compute_metrics=compute_metrics, + ) + + # Report Metrics and Checkpoints to Ray Train + callback = ray.train.huggingface.transformers.RayTrainReportCallback() + trainer.add_callback(callback) + + # Prepare Transformers Trainer + trainer = ray.train.huggingface.transformers.prepare_trainer(trainer) + + # Start Training + trainer.train() + + comet_ml.get_running_experiment().end() + + +def train(num_workers: int = 2, use_gpu: bool = False, epochs=1): + scaling_config = ScalingConfig(num_workers=num_workers, use_gpu=use_gpu) + config = {"use_gpu": use_gpu, "epochs": 2} + + callback = comet_ml.integration.ray.CometTrainLoggerCallback( + config, project_name="comet-example-ray-train-hugginface-transformers" + ) + + ray_trainer = TorchTrainer( + train_func, + scaling_config=scaling_config, + train_loop_config=config, + run_config=RunConfig(callbacks=[callback]), + ) + return ray_trainer.fit() + + +ideal_num_workers = 2 + +available_local_cpu_count = os.cpu_count() - 1 +num_workers = min(ideal_num_workers, available_local_cpu_count) + +if num_workers < 1: + num_workers = 1 + +train(num_workers, use_gpu=False, epochs=5) diff --git a/integrations/model-training/ray-train/ray-train-hello-world-transformers/README.md b/integrations/model-training/ray-train/ray-train-hello-world-transformers/README.md new file mode 100644 index 0000000..97ef3b6 --- /dev/null +++ b/integrations/model-training/ray-train/ray-train-hello-world-transformers/README.md @@ -0,0 +1,28 @@ +# Ray-Train integration with Comet.ml + +[Ray Train](https://docs.ray.io/en/latest/train/train.html) scales model training for popular ML frameworks such as Torch, XGBoost, TensorFlow, and more. It seamlessly integrates with other Ray libraries such as Tune and Predictors. + +Comet integrates with Ray Train by allowing you to easily monitor the resource usage of all of your workers, making sure you are fully using your expensive GPUs and that your CPUs are not the bottleneck in your training. + +## Documentation + +For more information on using and configuring the Ray-Train integration, see: [https://www.comet.com/docs/v2/integrations/ml-frameworks/ray/#ray-train](https://www.comet.com/docs/v2/integrations/ml-frameworks/ray/#ray-train/?utm_source=comet-examples&utm_medium=referral&utm_campaign=github_repo_2023&utm_content=ray-train) + +## See it + +Take a look at this [public Comet Project](https://www.comet.com/examples/comet-example-ray-train-hugginface-transformers/). + +## Setup + +Install dependencies + +```bash +python -m pip install -r requirements.txt +``` + +## Run the example + + +```bash +python Comet_with_ray_train_huggingface_transformers.py +``` diff --git a/integrations/model-training/ray-train/ray-train-hello-world-transformers/requirements.txt b/integrations/model-training/ray-train/ray-train-hello-world-transformers/requirements.txt new file mode 100644 index 0000000..67d6199 --- /dev/null +++ b/integrations/model-training/ray-train/ray-train-hello-world-transformers/requirements.txt @@ -0,0 +1,11 @@ +accelerate>=0.12.0 +comet_ml>=3.31.5 +datasets +evaluate +protobuf +ray[air]>=2.1.0 +scikit-learn +scipy +sentencepiece +torch>=1.3 +transformers>=4.43.0