diff --git a/README.md b/README.md index edfdff691..bd28f0a33 100644 --- a/README.md +++ b/README.md @@ -169,7 +169,7 @@ Running regression tests against a specific git revision or tag: ### Sample single-process running code: ```bash -python -m training.main \ +python -m open_clip_train.main \ --save-frequency 1 \ --zeroshot-frequency 1 \ --report-to tensorboard \ @@ -234,7 +234,7 @@ a job on a node of 4 GPUs: ```bash cd open_clip/src -torchrun --nproc_per_node 4 -m training.main \ +torchrun --nproc_per_node 4 -m open_clip_train.main \ --train-data '/data/cc12m/cc12m-train-{0000..2175}.tar' \ --train-num-samples 10968539 \ --dataset-type webdataset \ @@ -253,7 +253,7 @@ of nodes and host node. cd open_clip/src torchrun --nproc_per_node=4 \ --rdzv_endpoint=$HOSTE_NODE_ADDR \ - -m training.main \ + -m open_clip_train.main \ --train-data '/data/cc12m/cc12m-train-{0000..2175}.tar' \ --train-num-samples 10968539 \ --dataset-type webdataset \ @@ -289,7 +289,7 @@ export MASTER_ADDR=$master_addr cd /shared/open_clip export PYTHONPATH="$PYTHONPATH:$PWD/src" -srun --cpu_bind=v --accel-bind=gn python -u src/training/main.py \ +srun --cpu_bind=v --accel-bind=gn python -u src/open_clip_train/main.py \ --save-frequency 1 \ --report-to tensorboard \ --train-data="/data/LAION-400M/{00000..41455}.tar" \ @@ -307,7 +307,7 @@ srun --cpu_bind=v --accel-bind=gn python -u src/training/main.py \ ### Resuming from a checkpoint: ```bash -python -m training.main \ +python -m open_clip_train.main \ --train-data="/path/to/train_data.csv" \ --val-data="/path/to/validation_data.csv" \ --resume /path/to/checkpoints/epoch_K.pt @@ -376,7 +376,7 @@ pd.DataFrame.from_dict(future_df).to_csv( ``` This should create a csv dataset that one can use to fine-tune coca with open_clip ```bash -python -m training.main \ +python -m open_clip_train.main \ --dataset-type "csv" \ --train-data "path/to/data/dir/train2014.csv" \ --warmup 1000 \ @@ -392,7 +392,7 @@ python -m training.main \ --log-every-n-steps 100 ``` -This is a general setting, open_clip has very parameters that can be set, ```python -m training.main --help``` should show them. The only relevant change compared to pre-training are the two arguments +This is a general setting, open_clip has very parameters that can be set, ```python -m open_clip_train.main --help``` should show them. The only relevant change compared to pre-training are the two arguments ```bash --coca-contrastive-loss-weight 0 @@ -404,7 +404,7 @@ which make the model only train the generative side. If you wish to use different language models as the text encoder for CLIP you can do so by using one of the Hugging Face model configs in ```src/open_clip/model_configs``` and passing in it's tokenizer as the ```--model``` and ```--hf-tokenizer-name``` parameters respectively. Currently we only support RoBERTa ("test-roberta" config), however adding new models should be trivial. You can also determine how many layers, from the end, to leave unfrozen with the ```--lock-text-unlocked-layers``` parameter. Here's an example command to train CLIP with the RoBERTa LM that has it's last 10 layers unfrozen: ```bash -python -m training.main \ +python -m open_clip_train.main \ --train-data="pipe:aws s3 cp s3://s-mas/cc3m/{00000..00329}.tar -" \ --train-num-samples 3000000 \ --val-data="pipe:aws s3 cp s3://s-mas/cc3m/{00330..00331}.tar -" \ @@ -453,7 +453,7 @@ We recommend https://github.com/LAION-AI/CLIP_benchmark#how-to-use for systemati ### Evaluating local checkpoint: ```bash -python -m training.main \ +python -m open_clip_train.main \ --val-data="/path/to/validation_data.csv" \ --model RN101 \ --pretrained /path/to/checkpoints/epoch_K.pt @@ -462,7 +462,7 @@ python -m training.main \ ### Evaluating hosted pretrained checkpoint on ImageNet zero-shot prediction: ```bash -python -m training.main \ +python -m open_clip_train.main \ --imagenet-val /path/to/imagenet/validation \ --model ViT-B-32-quickgelu \ --pretrained laion400m_e32 diff --git a/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh b/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh index 7691742b2..b8d446f1d 100644 --- a/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh +++ b/docs/script_examples/clipa/vit_b16/i50_t16_finetune.sh @@ -1,4 +1,4 @@ -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh b/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh index 662c192d5..8985f7811 100644 --- a/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh +++ b/docs/script_examples/clipa/vit_b16/i50_t16_pretrain.sh @@ -1,4 +1,4 @@ -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh b/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh index a4a03e6d4..b4a024c5f 100644 --- a/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh +++ b/docs/script_examples/clipa/vit_l16/i17_t16_finetune.sh @@ -1,4 +1,4 @@ -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh b/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh index ecf0c1f7c..cf729876a 100644 --- a/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh +++ b/docs/script_examples/clipa/vit_l16/i17_t16_pretrain.sh @@ -1,4 +1,4 @@ -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh b/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh index 5437a3435..5d82e18dc 100644 --- a/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh +++ b/docs/script_examples/clipa/vit_l16/i37_t8_finetune.sh @@ -1,4 +1,4 @@ -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh b/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh index 5dcd5c8c2..83b1a2f08 100644 --- a/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh +++ b/docs/script_examples/clipa/vit_l16/i37_t8_pretrain.sh @@ -1,4 +1,4 @@ -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh b/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh index 40b9c4a9f..fad0b134a 100644 --- a/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh +++ b/docs/script_examples/clipav2/vit_h14/i257_t32_finetunex4.sh @@ -1,7 +1,7 @@ # have not been tested. use it at your own discretion # the original experiment was run on tpu v3-256. # this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups. -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh b/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh index f98ea9c25..f7e9a5fe1 100644 --- a/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh +++ b/docs/script_examples/clipav2/vit_h14/i50_t8_pretrain.sh @@ -1,7 +1,7 @@ # have not been tested. use it at your own discretion # the original experiment was run on tpu v3-256. # this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups. -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh b/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh index 232bb8fcf..1473c6c2b 100644 --- a/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh +++ b/docs/script_examples/clipav2/vit_h14/i577_t32_finetunex1.sh @@ -1,7 +1,7 @@ # have not been tested. use it at your own discretion # the original experiment was run on tpu v3-256. # this example script assumes 8 gpus, each with huge memory. Tune batchsize, warmup, and lr accordingly if you have different machine setups. -torchrun --nproc_per_node 8 -m training.main \ +torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/docs/script_examples/stability_example.sh b/docs/script_examples/stability_example.sh index f2801ee08..7eaa952be 100644 --- a/docs/script_examples/stability_example.sh +++ b/docs/script_examples/stability_example.sh @@ -34,7 +34,7 @@ export PYTHONPATH="$PYTHONPATH:/admin/home-mitchellw/open_clip/src" EXP_NAME="test-B-32-laion5b-lr1e-3-bs90k" -srun --comment laion --cpu_bind=v --accel-bind=gn python -m training.main \ +srun --comment laion --cpu_bind=v --accel-bind=gn python -m open_clip_train.main \ --save-frequency 1 \ --train-data="pipe:aws s3 cp s3://s-datasets/laion5b/{laion2B-data/{000000..231349}.tar,laion2B-multi-data/{000000..226687}.tar,laion1B-nolang-data/{000000..127231}.tar} -" \ --train-num-samples 135646078 \ diff --git a/scripts/clipav1_vit_l16_i37_t8.sh b/scripts/clipav1_vit_l16_i37_t8.sh index d3ff0901e..b84528a9b 100644 --- a/scripts/clipav1_vit_l16_i37_t8.sh +++ b/scripts/clipav1_vit_l16_i37_t8.sh @@ -1,5 +1,5 @@ # eval on a single gpu -CUDA_VISIBLE_DEVICES=2 TORCH_CUDNN_V8_API_ENABLED=1 TFDS_PREFETCH_SIZE=8192 python3 -m training.main \ +CUDA_VISIBLE_DEVICES=2 TORCH_CUDNN_V8_API_ENABLED=1 TFDS_PREFETCH_SIZE=8192 python3 -m open_clip_train.main \ --model ViT-L-16-CL32-GAP \ --pretrained "/path/to/clipa_vit_l16_i37_t8.pt" \ --seed 0 \ diff --git a/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh b/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh index 7f22386c3..434398b1f 100644 --- a/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh +++ b/scripts/clipav2_vit_h14_i84_224_336_cl32_gap_datacomp1b.sh @@ -1,4 +1,4 @@ -CUDA_VISIBLE_DEVICES=1 python3 -m training.main \ +CUDA_VISIBLE_DEVICES=1 python3 -m open_clip_train.main \ --model ViT-H-14-CL32-GAP-BigVision \ --pretrained "/path/to/vit_h14_i84_224_336_cl32_gap_datacomp1b.pt" \ --force-image-size 336 \ diff --git a/scripts/h14_224_32_finetune.sh b/scripts/h14_224_32_finetune.sh index 7026b6415..82c296a1f 100644 --- a/scripts/h14_224_32_finetune.sh +++ b/scripts/h14_224_32_finetune.sh @@ -1,5 +1,5 @@ # 64k batchsize for 2.048e-3 lr -TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m training.main \ +TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \ diff --git a/scripts/h14_84_8_pretrain.sh b/scripts/h14_84_8_pretrain.sh index 4130ee5f6..c430ba615 100644 --- a/scripts/h14_84_8_pretrain.sh +++ b/scripts/h14_84_8_pretrain.sh @@ -1,5 +1,5 @@ # 64k batchsize for 2.048e-3 lr -TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m training.main \ +TORCH_CUDNN_V8_API_ENABLED=1 torchrun --nproc_per_node 8 -m open_clip_train.main \ --save-frequency 1 \ --save-most-recent \ --zeroshot-frequency 1 \