mlcommons
diff --git a/‎compliance/TEST01/run_verification.py‎
Lines changed: 60 additions & 33 deletions b/‎compliance/TEST01/run_verification.py‎
Lines changed: 60 additions & 33 deletions
diff --git a/‎compliance/TEST01/verify_accuracy.py‎
Lines changed: 36 additions & 42 deletions b/‎compliance/TEST01/verify_accuracy.py‎
Lines changed: 36 additions & 42 deletions
diff --git a/‎compliance/TEST04/run_verification.py‎
Lines changed: 22 additions & 12 deletions b/‎compliance/TEST04/run_verification.py‎
Lines changed: 22 additions & 12 deletions
diff --git a/‎docs/benchmarks/image_classification/get-resnet50-data.md‎
Lines changed: 3 additions & 3 deletions b/‎docs/benchmarks/image_classification/get-resnet50-data.md‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎docs/benchmarks/language/get-deepseek-r1-data.md‎
Lines changed: 5 additions & 6 deletions b/‎docs/benchmarks/language/get-deepseek-r1-data.md‎
Lines changed: 5 additions & 6 deletions
diff --git a/‎docs/benchmarks/language/get-llama3_1-405b-data.md‎
Lines changed: 8 additions & 0 deletions b/‎docs/benchmarks/language/get-llama3_1-405b-data.md‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎docs/benchmarks/language/get-llama3_1-8b-data.md‎
Lines changed: 4 additions & 3 deletions b/‎docs/benchmarks/language/get-llama3_1-8b-data.md‎
Lines changed: 4 additions & 3 deletions
@@ -76,51 +76,68 @@ def main():
     output_dir = os.path.join(args.output_dir, "TEST01")
     unixmode = ""
     if args.unixmode:
-        unixmode = " --unixmode"
-        for binary in ["wc", "md5sum", "grep", "awk", "sed", "head", "tail"]:
+        if os.name != "posix":
+            print(
+                "Warning: --unixmode not supported on this OS. Using Python fallback...")
+            unixmode = ""
+        else:
+            unixmode = " --unixmode"
             missing_binary = False
-            if shutil.which(binary) is None:
-                print(
-                    "Error: This script requires the {:} commandline utility".format(
-                        binary
+            for binary in ["wc", "md5sum", "grep",
+                           "awk", "sed", "head", "tail"]:
+                if shutil.which(binary) is None:
+                    print(
+                        "Error: This script requires the {:} commandline utility".format(
+                            binary
+                        )
                     )
-                )
-                missing_binary = True
-        if missing_binary:
-            exit()
+                    missing_binary = True
+            if missing_binary:
+                exit()
 
     dtype = args.dtype
 
     verify_accuracy_binary = os.path.join(
         os.path.dirname(__file__), "verify_accuracy.py"
     )
+
+    unixmode_str = unixmode if unixmode == "" else unixmode + " "
+
     # run verify accuracy
     verify_accuracy_command = (
-        "python3 "
+        sys.executable + " "
         + verify_accuracy_binary
         + " --dtype "
         + args.dtype
-        + unixmode
+        + unixmode_str
         + " -r "
-        + results_dir
-        + "/accuracy/mlperf_log_accuracy.json"
+        + os.path.join(results_dir, "accuracy", "mlperf_log_accuracy.json")
         + " -t "
-        + compliance_dir
-        + "/mlperf_log_accuracy.json | tee verify_accuracy.txt"
+        + os.path.join(compliance_dir, "mlperf_log_accuracy.json")
     )
     try:
-        os.system(verify_accuracy_command)
+        with open("verify_accuracy.txt", "w") as f:
+            process = subprocess.Popen(
+                verify_accuracy_command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                shell=True,
+                text=True
+            )
+            # Write output to both console and file
+            for line in process.stdout:
+                print(line, end="")
+                f.write(line)
+            process.wait()
     except Exception:
         print(
             "Exception occurred trying to execute:\n  " +
             verify_accuracy_command)
     # check if verify accuracy script passes
 
-    accuracy_pass_command = "grep PASS verify_accuracy.txt"
     try:
-        accuracy_pass = "TEST PASS" in subprocess.check_output(
-            accuracy_pass_command, shell=True
-        ).decode("utf-8")
+        with open("verify_accuracy.txt", "r") as file:
+            accuracy_pass = "TEST PASS" in file.read()
     except Exception:
         accuracy_pass = False
 
@@ -129,28 +146,38 @@ def main():
         os.path.dirname(__file__), "verify_performance.py"
     )
     verify_performance_command = (
-        "python3 "
+        sys.executable + " "
         + verify_performance_binary
-        + " -r "
-        + results_dir
-        + "/performance/run_1/mlperf_log_detail.txt"
-        + " -t "
-        + compliance_dir
-        + "/mlperf_log_detail.txt | tee verify_performance.txt"
+        + " -r"
+        + os.path.join(results_dir, "performance",
+                       "run_1", "mlperf_log_detail.txt")
+        + " -t"
+        + os.path.join(compliance_dir, "mlperf_log_detail.txt")
     )
+
     try:
-        os.system(verify_performance_command)
+        with open("verify_performance.txt", "w") as f:
+            process = subprocess.Popen(
+                verify_performance_command,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.STDOUT,
+                text=True,
+                shell=True,
+            )
+            # Write output to both console and file
+            for line in process.stdout:
+                print(line, end="")
+                f.write(line)
+            process.wait()
     except Exception:
         print(
             "Exception occurred trying to execute:\n  " +
             verify_performance_command)
 
     # check if verify performance script passes
-    performance_pass_command = "grep PASS verify_performance.txt"
     try:
-        performance_pass = "TEST PASS" in subprocess.check_output(
-            performance_pass_command, shell=True
-        ).decode("utf-8")
+        with open("verify_performance.txt", "r") as file:
+            performance_pass = "TEST PASS" in file.read()
     except Exception:
         performance_pass = False
 
 
@@ -20,6 +20,8 @@
 import subprocess
 import sys
 import shutil
+import hashlib
+import re
 
 sys.path.append(os.getcwd())
 
@@ -161,15 +163,11 @@ def main():
         print("Error: This script requires Python v3.3 or later")
         exit()
 
-    get_perf_lines_cmd = "wc -l " + perf_log + "| awk '{print $1}'"
-    num_perf_lines = int(
-        subprocess.check_output(get_perf_lines_cmd, shell=True).decode("utf-8")
-    )
+    with open(perf_log, "r") as file:
+        num_perf_lines = sum(1 for _ in file)
 
-    get_acc_lines_cmd = "wc -l " + acc_log + "| awk '{print $1}'"
-    num_acc_lines = int(
-        subprocess.check_output(get_acc_lines_cmd, shell=True).decode("utf-8")
-    )
+    with open(acc_log, "r") as file:
+        num_acc_lines = sum(1 for _ in file)
 
     num_acc_log_entries = num_acc_lines - 2
     num_perf_log_entries = num_perf_lines - 2
@@ -189,42 +187,38 @@ def main():
             continue
 
         # calculate md5sum of line in perf mode accuracy_log
-        perf_md5sum_cmd = (
-            "head -n "
-            + str(perf_line + 1)
-            + " "
-            + perf_log
-            + "| tail -n 1| sed -r 's/,//g' | sed -r 's/\"seq_id\" : \\S+//g' | md5sum"
-        )
-        # print(perf_md5sum_cmd)
-        perf_md5sum = subprocess.check_output(perf_md5sum_cmd, shell=True).decode(
-            "utf-8"
-        )
-
-        # get qsl idx
-        get_qsl_idx_cmd = (
-            "head -n "
-            + str(perf_line + 1)
-            + " "
-            + perf_log
-            + "| tail -n 1| awk -F\": |,\" '{print $4}'"
-        )
-        qsl_idx = (
-            subprocess.check_output(get_qsl_idx_cmd, shell=True)
-            .decode("utf-8")
-            .rstrip()
-        )
+        # read the specific line
+        with open(perf_log, "r") as f:
+            for i, line in enumerate(f):
+                if i == perf_line:
+                    line_content = line.strip()
+                    break
+
+        # remove commas and remove 'seq_id' key-value
+        clean_line = line_content.replace(",", "")
+        clean_line = re.sub(r'"seq_id"\s*:\s*\S+', '', clean_line)
+
+        # calculate md5sum
+        perf_md5sum = hashlib.md5(clean_line.encode("utf-8")).hexdigest()
+
+        # extract qsl idx
+        fields = re.split(r": |,", line_content)
+        qsl_idx = fields[3].strip()
 
         # calculate md5sum of line in acc mode accuracy_log
-        acc_md5sum_cmd = (
-            'grep "qsl_idx\\" : '
-            + qsl_idx
-            + '," '
-            + acc_log
-            + "| sed -r 's/,//g' | sed -r 's/\"seq_id\" : \\S+//g' | md5sum"
-        )
-        acc_md5sum = subprocess.check_output(
-            acc_md5sum_cmd, shell=True).decode("utf-8")
+        acc_matches = []
+        with open(acc_log, "r") as f:
+            for line in f:
+                if f'"qsl_idx" : {qsl_idx},' in line:
+                    acc_matches.append(line.strip())
+
+        # join all matching lines together
+        acc_line = "\n".join(acc_matches)
+
+        acc_line = acc_line.replace(",", "")
+        acc_line = re.sub(r'"seq_id"\s*:\s*\S+', '', acc_line)
+
+        acc_md5sum = hashlib.md5(acc_line.encode("utf-8")).hexdigest()
 
         if perf_md5sum != acc_md5sum:
             num_perf_log_data_mismatch += 1
 
@@ -58,28 +58,38 @@ def main():
         os.path.dirname(__file__), "verify_performance.py"
     )
     verify_performance_command = (
-        "python3 "
+        sys.executable + " "
         + verify_performance_binary
-        + " -r "
-        + results_dir
-        + "/performance/run_1/mlperf_log_summary.txt"
-        + " -t "
-        + compliance_dir
-        + "/mlperf_log_summary.txt | tee verify_performance.txt"
+        + " -r"
+        + os.path.join(results_dir, "performance",
+                       "run_1", "mlperf_log_summary.txt")
+        + " -t"
+        + os.path.join(compliance_dir, "mlperf_log_summary.txt")
     )
+
     try:
-        os.system(verify_performance_command)
+        with open("verify_performance.txt", "w") as f:
+            process = subprocess.Popen(
+                verify_performance_command,
+                stdout=subprocess.PIPE,  # capture output
+                stderr=subprocess.STDOUT,
+                text=True,  # decode output as text
+                shell=True,
+            )
+            # Write output to both console and file
+            for line in process.stdout:
+                print(line, end="")  # console
+                f.write(line)        # file
+            process.wait()
     except Exception:
         print(
             "Exception occurred trying to execute:\n  " +
             verify_performance_command)
 
     # check if verify performance script passes
-    performance_pass_command = "grep PASS verify_performance.txt"
     try:
-        performance_pass = "TEST PASS" in subprocess.check_output(
-            performance_pass_command, shell=True
-        ).decode("utf-8")
+        with open("verify_performance.txt", "r") as file:
+            performance_pass = "TEST PASS" in file.read()
     except Exception:
         performance_pass = False
 
 
@@ -15,7 +15,7 @@ The benchmark implementation run command will automatically download the validat
 
         ### Get Validation Dataset
         ```
-        mlcr get,dataset,imagenet,validation -j
+        mlcr get,dataset,imagenet,validation,_full -j
         ```
     === "Calibration"
         ResNet50 calibration dataset consist of 500 images selected from the Imagenet 2012 validation dataset. There are 2 alternative options for the calibration dataset.
@@ -32,7 +32,7 @@ The benchmark implementation run command will automatically download the validat
     ### Get ResNet50 preprocessed dataset
 
     ```
-    mlcr get,dataset,image-classification,imagenet,preprocessed,_pytorch -j
+    mlcr get,dataset,image-classification,imagenet,preprocessed,_pytorch,_full-j
     ```
 
 - `--outdirname=<PATH_TO_DOWNLOAD_IMAGENET_DATASET>` could be provided to download the dataset to a specific location.
@@ -52,7 +52,7 @@ Get the Official MLPerf ResNet50 Model
 
     ### Onnx
     ```
-    mlcr get,ml-model,resnet50,_onnx -j
+    mlcr get,ml-model,resnet50,image-classification,_onnx -j
     ```
 
 - `--outdirname=<PATH_TO_DOWNLOAD_RESNET50_MODEL>` could be provided to download the model to a specific location.
@@ -11,12 +11,11 @@ The benchmark implementation run command will automatically download the validat
 
 === "Validation"
 
-    ### Get Validation Dataset
-    ```
-    mlcr get,preprocessed,dataset,deepseek-r1,_validation,_mlc,_rclone --outdirname=<path to download> -j
-    ```
 
-=== "Calibration"
+  ### Get Validation Dataset
+  ```
+  mlcr get,preprocessed,dataset,deepseek-r1,_validation,_mlc,_r2-downloader --outdirname=<path to download> -j
+  ```
 
     ### Get Calibration Dataset
     ```
@@ -33,4 +32,4 @@ The benchmark implementation run command will automatically download the require
         ### Get the Official MLPerf DeekSeek-R1 model from MLCOMMONS Storage
         ```
         mlcr get,ml-model,deepseek-r1,_r2-downloader,_mlc,_dry-run -j
-        ```
+        ```
@@ -38,6 +38,14 @@ The benchmark implementation run command will automatically download the require
         ```
         mlcr get,ml-model,llama3,_mlc,_r2-downloader,_405b --outdirname=<path to download> -j
         ```
+    
+    === "From Cloudfare R2"
+
+        > **Note:**  One has to accept the [MLCommons Llama 3.1 License Confidentiality Notice](http://llama3-1.mlcommons.org/) to access the model files in MLCOMMONS Google Drive. 
+
+        ### Get the Official MLPerf LLAMA3.1-405B model from MLCOMMONS Cloudfare R2
+        ```
+        mlcr get,ml-model,llama3,_mlc,_405b,_r2-downloader    --outdirname=<path to download> -j
 
     === "From Hugging Face repo"
 
 
@@ -10,14 +10,14 @@ hide:
 The benchmark implementation run command will automatically download the validation and calibration datasets and do the necessary preprocessing. In case you want to download only the datasets, you can use the below commands.
 
 === "Validation"
-
+    
     === "Full dataset (Datacenter)"
 
         ### Get Validation Dataset
         ```
         mlcr get,dataset,cnndm,_validation,_datacenter,_llama3,_mlc,_r2-downloader --outdirname=<path to download> -j
         ```
-    
+
     === "5000 samples (Edge)"
 
         ### Get Validation Dataset
@@ -26,7 +26,8 @@ The benchmark implementation run command will automatically download the validat
         ```
 
 === "Calibration"
-
+        ```
+        
     ### Get Calibration Dataset
     ```
     mlcr get,dataset,cnndm,_calibration,_llama3,_mlc,_r2-downloader --outdirname=<path to download> -j