@@ -122,106 +122,24 @@ deployment_groups:
122122 settings :
123123 name : sample-pool
124124 zones : [$(vars.zone)]
125- machine_type : c3-standard-88 # Hyperdisk-extreme required C3 machine with 88 or more vCPUs
125+ machine_type : c3-standard-88 # Hyperdisk-extreme requires C3 machine with 88 or more vCPUs
126126 auto_upgrade : true
127127
128- # Train a TensorFlow model with Keras and Hyperdisk Balanced on GKE
129- # Tutorial: https://cloud.google.com/parallelstore /docs/tensorflow-sample
130- - id : hyperdisk-balanced -job
128+ # This is an example job that will install and run an `fio`benchmark against the hyperdisk volumes.
129+ # For more FIO tests, see https://cloud.google.com/compute /docs/disks/benchmark-hyperdisk-performance
130+ - id : fio-bench -job-template
131131 source : modules/compute/gke-job-template
132132 use :
133133 - gke_cluster
134134 - hyperdisk-balanced-setup
135- settings :
136- name : tensorflow
137- image : jupyter/tensorflow-notebook@sha256:173f124f638efe870bb2b535e01a76a80a95217e66ed00751058c51c09d6d85d
138- security_context : # to make sure the job have enough access to execute the jobs and r/w from hyperdisk
139- - key : runAsUser
140- value : 1000
141- - key : runAsGroup
142- value : 100
143- - key : fsGroup
144- value : 100
145- command :
146- - bash
147- - -c
148- - |
149- pip install transformers datasets
150- python - <<EOF
151- from datasets import load_dataset
152- dataset = load_dataset("glue", "cola", cache_dir='/data/hyperdisk-balanced-pvc-0')
153- dataset = dataset["train"]
154- from transformers import AutoTokenizer
155- import numpy as np
156- tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
157- sentences = [str(s) for s in dataset["sentence"]]
158- tokenized_data = tokenizer(sentences, return_tensors="np", padding=True)
159- tokenized_data = dict(tokenized_data)
160- labels = np.array(dataset["label"])
161- from transformers import TFAutoModelForSequenceClassification
162- from tensorflow.keras.optimizers import Adam
163- model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased")
164- model.compile(optimizer=Adam(3e-5))
165- model.fit(tokenized_data, labels)
166- EOF
167- node_count : 1
168- outputs : [instructions]
169-
170- # Train a TensorFlow model with Keras and Hyperdisk Extreme on GKE
171- # Tutorial: https://cloud.google.com/parallelstore/docs/tensorflow-sample
172- - id : hyperdisk-extreme-job
173- source : modules/compute/gke-job-template
174- use :
175- - gke_cluster
176135 - hyperdisk-extreme-setup
177- settings :
178- name : tensorflow
179- image : jupyter/tensorflow-notebook@sha256:173f124f638efe870bb2b535e01a76a80a95217e66ed00751058c51c09d6d85d
180- security_context : # to make sure the job have enough access to execute the jobs and r/w from hyperdisk
181- - key : runAsUser
182- value : 1000
183- - key : runAsGroup
184- value : 100
185- - key : fsGroup
186- value : 100
187- command :
188- - bash
189- - -c
190- - |
191- pip install transformers datasets
192- python - <<EOF
193- from datasets import load_dataset
194- dataset = load_dataset("glue", "cola", cache_dir='/data/hyperdisk-extreme-pvc-0')
195- dataset = dataset["train"]
196- from transformers import AutoTokenizer
197- import numpy as np
198- tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
199- sentences = [str(s) for s in dataset["sentence"]]
200- tokenized_data = tokenizer(sentences, return_tensors="np", padding=True)
201- tokenized_data = dict(tokenized_data)
202- labels = np.array(dataset["label"])
203- from transformers import TFAutoModelForSequenceClassification
204- from tensorflow.keras.optimizers import Adam
205- model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased")
206- model.compile(optimizer=Adam(3e-5))
207- model.fit(tokenized_data, labels)
208- EOF
209- node_count : 1
210- outputs : [instructions]
211-
212- # Train a TensorFlow model with Keras and Hyperdisk Throughput on GKE
213- # Tutorial: https://cloud.google.com/parallelstore/docs/tensorflow-sample
214- - id : hyperdisk-throughput-job
215- source : modules/compute/gke-job-template
216- use :
217- - gke_cluster
218136 - hyperdisk-throughput-setup
219137 settings :
220- name : tensorflow
221- image : jupyter/tensorflow-notebook@sha256:173f124f638efe870bb2b535e01a76a80a95217e66ed00751058c51c09d6d85d
222- security_context : # to make sure the job have enough access to execute the jobs and r/w from hyperdisk
138+ name : fio-benchmark
139+ image : ubuntu:latest
140+ security_context : # to make sure the job have enough access to install the fio packages
223141 - key : runAsUser
224- value : 1000
142+ value : 0
225143 - key : runAsGroup
226144 value : 100
227145 - key : fsGroup
@@ -230,23 +148,49 @@ deployment_groups:
230148 - bash
231149 - -c
232150 - |
233- pip install transformers datasets
234- python - <<EOF
235- from datasets import load_dataset
236- dataset = load_dataset("glue", "cola", cache_dir='/data/hyperdisk-throughput-pvc-0')
237- dataset = dataset["train"]
238- from transformers import AutoTokenizer
239- import numpy as np
240- tokenizer = AutoTokenizer.from_pretrained("bert-base-cased")
241- sentences = [str(s) for s in dataset["sentence"]]
242- tokenized_data = tokenizer(sentences, return_tensors="np", padding=True)
243- tokenized_data = dict(tokenized_data)
244- labels = np.array(dataset["label"])
245- from transformers import TFAutoModelForSequenceClassification
246- from tensorflow.keras.optimizers import Adam
247- model = TFAutoModelForSequenceClassification.from_pretrained("bert-base-cased")
248- model.compile(optimizer=Adam(3e-5))
249- model.fit(tokenized_data, labels)
250- EOF
151+
152+ set -eux
153+ export DEBIAN_FRONTEND=noninteractive
154+
155+ # Install fio
156+ apt update -y && apt install -y fio
157+
158+ # Use a tag to create a unique path for tests
159+ TAG=`date +%s`
160+
161+ # Verify mountpoints
162+ df -h
163+ mountpoint /data/hyperdisk-balanced-pvc-0
164+ mountpoint /data/hyperdisk-extreme-pvc-0
165+ mountpoint /data/hyperdisk-throughput-pvc-0
166+
167+ # Create temporary directory for fio benchmarks
168+ mkdir -p /data/hyperdisk-balanced-pvc-0/fio-benchmarks-${TAG}
169+ mkdir -p /data/hyperdisk-extreme-pvc-0/fio-benchmarks-${TAG}
170+ mkdir -p /data/hyperdisk-throughput-pvc-0/fio-benchmarks-${TAG}
171+
172+ # Perform hyperdisk balanced performance (Mixed IOPS) test
173+ fio --name=hyperdisk-balanced-iops --ioengine=libaio --iodepth=256 --rw=randrw \
174+ --bs=4k --direct=1 --size=10G --numjobs=16 --group_reporting --time_based --runtime=300s \
175+ --ramp_time=10s --iodepth_batch_submit=256 --iodepth_batch_complete_max=256 \
176+ --directory=/data/hyperdisk-balanced-pvc-0/fio-benchmarks-${TAG} --filename_format=fiotest-balanced-iops
177+
178+ # Perform hyperdisk extreme performance test (Max IOPS)
179+ fio --name=hyperdisk-extreme-iops --ioengine=libaio --iodepth=256 --rw=randwrite \
180+ --bs=4k --direct=1 --size=10G --numjobs=32 --group_reporting --time_based --runtime=300s --ramp_time=10s \
181+ --iodepth_batch_submit=256 --iodepth_batch_complete_max=256 \
182+ --directory=/data/hyperdisk-extreme-pvc-0/fio-benchmarks-${TAG} --filename_format=fiotest-extreme-iops
183+
184+ # Perform hyperdisk throughput performance test
185+ fio --name=hyperdisk-throughput-bw --ioengine=libaio --iodepth=64 --rw=write --bs=1M \
186+ --direct=1 --size=10G --numjobs=32 --group_reporting --time_based --runtime=300s --ramp_time=10s \
187+ --iodepth_batch_submit=64 --iodepth_batch_complete_max=64 \
188+ --directory=/data/hyperdisk-throughput-pvc-0/fio-benchmarks-${TAG} --filename_format=fiotest-throughput-bw
189+
190+ # Clean up temporary directories for fio benchmarks
191+ rm -rf /data/hyperdisk-balanced-pvc-0/fio-benchmarks-${TAG}
192+ rm -rf /data/hyperdisk-extreme-pvc-0/fio-benchmarks-${TAG}
193+ rm -rf /data/hyperdisk-throughput-pvc-0/fio-benchmarks-${TAG}
251194 node_count : 1
195+
252196 outputs : [instructions]
0 commit comments