|
14 | 14 | .with_compartment_id("<compartment_ocid>") |
15 | 15 | .with_project_id("<project_ocid>") |
16 | 16 | .with_subnet_id("<subnet_ocid>") |
17 | | - .with_shape_name("VM.GPU.A10.1") |
| 17 | + .with_shape_name("VM.GPU.A10.2") |
18 | 18 | .with_block_storage_size(256) |
19 | 19 | ) |
20 | 20 | .with_runtime( |
21 | 21 | PyTorchDistributedRuntime() |
22 | 22 | # Specify the service conda environment by slug name. |
23 | | - .with_service_conda("pytorch20_p39_gpu_v1") |
| 23 | + .with_service_conda("pytorch20_p39_gpu_v2") |
24 | 24 | .with_git( |
25 | 25 | url="https://github.com/facebookresearch/llama-recipes.git", |
26 | | - commit="03faba661f079ee1ecaeb66deaa6bdec920a7bab" |
| 26 | + commit="1aecd00924738239f8d86f342b36bacad180d2b3" |
27 | 27 | ) |
28 | 28 | .with_dependency( |
29 | 29 | pip_pkg=" ".join([ |
30 | | - "'accelerate>=0.21.0'", |
31 | | - "appdirs", |
32 | | - "loralib", |
33 | | - "bitsandbytes==0.39.1", |
34 | | - "black", |
35 | | - "'black[jupyter]'", |
36 | | - "datasets", |
37 | | - "fire", |
38 | | - "'git+https://github.com/huggingface/peft.git'", |
39 | | - "'transformers>=4.31.0'", |
40 | | - "sentencepiece", |
41 | | - "py7zr", |
42 | | - "scipy", |
43 | | - "optimum" |
| 30 | + "--extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0", |
| 31 | + "git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4" |
| 32 | + "appdirs==1.4.4", |
| 33 | + "llama-recipes==0.0.1", |
| 34 | + "py7zr==0.20.6", |
44 | 35 | ]) |
45 | 36 | ) |
46 | 37 | .with_output("/home/datascience/outputs", "oci://bucket@namespace/outputs/$JOB_RUN_OCID") |
47 | 38 | .with_command(" ".join([ |
48 | | - "torchrun llama_finetuning.py", |
| 39 | + "torchrun examples/finetuning.py", |
49 | 40 | "--enable_fsdp", |
50 | 41 | "--pure_bf16", |
51 | 42 | "--batch_size_training 1", |
52 | | - "--micro_batch_size 1", |
53 | 43 | "--model_name $MODEL_NAME", |
54 | 44 | "--dist_checkpoint_root_folder /home/datascience/outputs", |
55 | 45 | "--dist_checkpoint_folder fine-tuned" |
|
87 | 77 | spec: |
88 | 78 | git: |
89 | 79 | url: https://github.com/facebookresearch/llama-recipes.git |
90 | | - commit: 03faba661f079ee1ecaeb66deaa6bdec920a7bab |
| 80 | + commit: 1aecd00924738239f8d86f342b36bacad180d2b3 |
91 | 81 | command: >- |
92 | 82 | torchrun llama_finetuning.py |
93 | 83 | --enable_fsdp |
94 | 84 | --pure_bf16 |
95 | 85 | --batch_size_training 1 |
96 | | - --micro_batch_size 1 |
97 | 86 | --model_name $MODEL_NAME |
98 | 87 | --dist_checkpoint_root_folder /home/datascience/outputs |
99 | 88 | --dist_checkpoint_folder fine-tuned |
100 | 89 | replicas: 2 |
101 | 90 | conda: |
102 | 91 | type: service |
103 | | - slug: pytorch20_p39_gpu_v1 |
| 92 | + slug: pytorch20_p39_gpu_v2 |
104 | 93 | dependencies: |
105 | 94 | pipPackages: >- |
106 | | - 'accelerate>=0.21.0' |
107 | | - appdirs |
108 | | - loralib |
109 | | - bitsandbytes==0.39.1 |
110 | | - black |
111 | | - 'black[jupyter]' |
112 | | - datasets |
113 | | - fire |
114 | | - 'git+https://github.com/huggingface/peft.git' |
115 | | - 'transformers>=4.31.0' |
116 | | - sentencepiece |
117 | | - py7zr |
118 | | - scipy |
119 | | - optimum |
| 95 | + --extra-index-url https://download.pytorch.org/whl/cu118 torch==2.1.0 |
| 96 | + git+https://github.com/huggingface/peft.git@15a013af5ff5660b9377af24d3eee358213d72d4 |
| 97 | + llama-recipes==0.0.1 |
| 98 | + appdirs==1.4.4 |
| 99 | + py7zr==0.20.6 |
120 | 100 | outputDir: /home/datascience/outputs |
121 | 101 | outputUri: oci://bucket@namespace/outputs/$JOB_RUN_OCID |
122 | 102 | env: |
|
0 commit comments