amd
diff --git a/‎docs/source/_ext/quark_jupyter_notebook_build.py‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/_ext/quark_jupyter_notebook_build.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/_static/float_and_xint8_quantized_resnet50.png‎
-71.2 KB b/‎docs/source/_static/float_and_xint8_quantized_resnet50.png‎
-71.2 KB
diff --git a/‎docs/source/_static/quant/fx_mode_quant/yolo_nas/1_original_fp32_train_stage.png‎
100644100755 b/‎docs/source/_static/quant/fx_mode_quant/yolo_nas/1_original_fp32_train_stage.png‎
100644100755
diff --git a/‎docs/source/_static/quant/fx_mode_quant/yolo_nas/2_folded_fp32_validation_stage.png‎
100644100755 b/‎docs/source/_static/quant/fx_mode_quant/yolo_nas/2_folded_fp32_validation_stage.png‎
100644100755
diff --git a/‎docs/source/_static/quant/fx_mode_quant/yolo_nas/3_quant_scope.png‎
100644100755 b/‎docs/source/_static/quant/fx_mode_quant/yolo_nas/3_quant_scope.png‎
100644100755
diff --git a/‎docs/source/autoapi/quark/index.rst‎
Lines changed: 21 additions & 0 deletions b/‎docs/source/autoapi/quark/index.rst‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎docs/source/conf.py‎
Lines changed: 2 additions & 0 deletions b/‎docs/source/conf.py‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎docs/source/index.rst‎
Lines changed: 1 addition & 1 deletion b/‎docs/source/index.rst‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/source/onnx/appendix_full_quant_config_features.rst‎
Lines changed: 16 additions & 4 deletions b/‎docs/source/onnx/appendix_full_quant_config_features.rst‎
Lines changed: 16 additions & 4 deletions
diff --git a/‎docs/source/onnx/example_quark_onnx_BFP.rst‎
Lines changed: 219 additions & 0 deletions b/‎docs/source/onnx/example_quark_onnx_BFP.rst‎
Lines changed: 219 additions & 0 deletions
@@ -23,7 +23,7 @@ def update_jupyter_notebook_toc_placeholder(app, docname, source):
 
     jupyter_notebook_index_rst = os.path.join('source', 'jupyter_notebook_index.rst_')
     if "READTHEDOCS" in os.environ:
-        READTHEDOCS_REPOSITORY_PATH= os.environ.get("READTHEDOCS_REPOSITORY_PATH")
+        READTHEDOCS_REPOSITORY_PATH = os.environ.get("READTHEDOCS_REPOSITORY_PATH")
         jupyter_notebook_index_rst = os.path.join(READTHEDOCS_REPOSITORY_PATH, 'docs', 'source', 'jupyter_notebook_index.rst_')
     jupyter_notebook_toc_placeholder = '@quark_jupyter_notebook_toc_placeholder@'
     with open(jupyter_notebook_index_rst, 'r') as f:
 
@@ -0,0 +1,21 @@
+quark
+=====
+
+.. py:module:: quark
+
+.. autoapi-nested-parse::
+
+   **Quark** is a comprehensive cross-platform toolkit designed to simplify and
+   enhance the quantization of deep learning models. Supporting both PyTorch and
+   ONNX models, Quark empowers developers to optimize their models for deployment
+   on a wide range of hardware backends, achieving significant performance gains
+   without compromising accuracy.
+
+   For further details on the features and capabilities of Quark, please refer to the
+
+   * [Documentation](https://quark.docs.amd.com>)
+   * [Pytorch examples](https://quark.docs.amd.com/latest/pytorch/pytorch_examples.html>)
+   * [ONNX examples](https://quark.docs.amd.com/latest/onnx/onnx_examples.html>).
+
+
+
@@ -177,6 +177,8 @@ def setup(app):
 # directories to ignore when looking for source files.
 # This patterns also effect to html_static_path and html_extra_path
 exclude_patterns = ['include', 'api_rst', '_build', 'Thumbs.db', '.DS_Store', '**.ipynb_checkpoints']
+exclude_patterns.append('*autoapi/quark/index.rst')
+
 nitpicky = True
 
 # The name of the Pygments (syntax highlighting) style to use.
 
@@ -131,7 +131,7 @@ Key Features
 
 .. toctree::
    :hidden:
-   :caption: Reference API
+   :caption: APIs
    :maxdepth: 1
 
    PyTorch APIs <autoapi/pytorch_apis>
 
@@ -539,10 +539,18 @@ Quantization Configuration
          the input data is from the float module fully, 1 represents all
          from the quantized module. The default value is 1.
       -  **MemOptLevel**: (Int) Specifies the level of memory optimization.
-         Options are 0 and 1. If 0, it means no memory optimization is applied,
-         which will be faster but requires more memory for caching. If 1, it
-         caches the ground-truth for finetuning layer by layer instead of all,
-         which consumes less memory but may take longer time. The default is 1.
+         Options are 0, 1 and 2. Setting it to 0 disables optimization,
+         making training faster but using more memory for caching.
+         Setting it to 1 caches data one layer at a time, reducing memory
+         usage at the cost of longer training times. Setting it to 2
+         saves layer data to a cache directory on disk and loads only
+         one batch at a time, greatly lowering memory consumption but further
+         increasing training time. The default value is 1.
+      -  **CacheDir**: (String) Specifies the directory used to cache
+         intermediate files during fine-tuning. This option is only effective
+         when the MemOptLevel is set to 2. Please note that after fine-tuning,
+         some intermediate files may remain in this directory. The default value
+         is None, in which case a temporary directory will be used for the caching.
       -  **LogPeriod**: (Int) Indicate how many iterations to print the
          log once. The default value is NumIterations/10.
 
@@ -672,6 +680,10 @@ Quantization Configuration
       -  **Bits**: (int) The target bits to quantize. Only 4b quantization is supported for inference, additional bits support is planned.
       -  **AccuracyLevel**: (int) The quantization level of input, can be: 0(unset), 1(fp32), 2(fp16), 3(bf16), or 4(int8). The default is 0.
 
+   *  **EncryptionAlgorithm**: (String) A parameter used to specify the encryption algorithm for crypto mode,
+      only "AES-256" algorithm is supported currently. The default value is None, which means it will not save
+      any intermediate models/files to disk in crypto mode.
+
 
 Table 7. Quantize Types can be selected for different Quantize Formats
 
 
@@ -0,0 +1,219 @@
+Block Floating Point (BFP) Example
+==================================
+
+.. note::
+
+   For information on accessing Quark ONNX examples, refer to :doc:`Accessing ONNX Examples <onnx_examples>`.
+   This example and the relevant files are available at ``/onnx/accuracy_improvement/BFP``.
+
+This is an example of quantizing a `mobilenetv2_050.lamb_in1k` model using the ONNX quantizer of Quark with BFP16.
+Int8 quantization performs poorly on the model, but BFP16 and ADAQUANT can significantly mitigate the quantization loss.
+
+Block Floating Point (BFP) quantization computational complexity by grouping numbers to share a common exponent, preserving accuracy efficiently.
+BFP has both reduced storage requirements and high quantization precision.
+
+The example has the following parts:
+
+-  `Pip requirements <#pip-requirements>`__
+-  `Prepare model <#prepare-model>`__
+-  `Prepare data <#prepare-data>`__
+-  `BFP16 Quantization <#bfp16-quantization>`__
+-  `BFP16 Quantization with ADAQUANT <#bfp16-quantization-with-adaquant>`__
+-  `Evaluation <#evaluation>`__
+
+
+Pip requirements
+----------------
+
+Install the necessary python packages:
+
+::
+
+   python -m pip install -r ../utils/requirements.txt
+
+
+Prepare model
+-------------
+
+Export onnx model from mobilenetv2_050.lamb_in1k torch model. The corresponding model link is https://huggingface.co/timm/mobilenetv2_050.lamb_in1k:
+
+::
+
+   mkdir models && python ../utils/export_onnx.py mobilenetv2_050.lamb_in1k
+
+Prepare data
+------------
+
+ILSVRC 2012, commonly known as 'ImageNet'. This dataset provides access
+to ImageNet (ILSVRC) 2012 which is the most commonly used subset of
+ImageNet. This dataset spans 1000 object classes and contains 50,000
+validation images.
+
+If you already have an ImageNet datasets, you can directly use your
+dataset path.
+
+To prepare the test data, please check the download section of the main
+website: https://huggingface.co/datasets/imagenet-1k/tree/main/data. You
+need to register and download **val_images.tar.gz**.
+
+Then, create the validation dataset and calibration dataset:
+
+::
+
+   mkdir val_data && tar -xzf val_images.tar.gz -C val_data
+   python ../utils/prepare_data.py val_data calib_data
+
+The storage format of the val_data of the ImageNet dataset organized as
+follows:
+
+- val_data
+
+   - n01440764
+
+      - ILSVRC2012_val_00000293.JPEG
+      - ILSVRC2012_val_00002138.JPEG
+      - …
+
+   - n01443537
+
+      - ILSVRC2012_val_00000236.JPEG
+      - ILSVRC2012_val_00000262.JPEG
+      - …
+
+   - …
+   - n15075141
+
+      - ILSVRC2012_val_00001079.JPEG
+      - ILSVRC2012_val_00002663.JPEG
+      - …
+
+The storage format of the calib_data of the ImageNet dataset organized
+as follows:
+
+- calib_data
+
+   - n01440764
+
+      - ILSVRC2012_val_00000293.JPEG
+
+   - n01443537
+
+      - ILSVRC2012_val_00000236.JPEG
+
+   - …
+   - n15075141
+
+      - ILSVRC2012_val_00001079.JPEG
+
+BFP16 Quantization
+------------------
+
+The quantizer takes the float model and produce a BFP16 quantized model.
+
+::
+
+   python quantize_model.py --model_name mobilenetv2_050.lamb_in1k \
+                            --input_model_path models/mobilenetv2_050.lamb_in1k.onnx \
+                            --output_model_path models/mobilenetv2_050.lamb_in1k_quantized.onnx \
+                            --calibration_dataset_path calib_data \
+                            --config BFP16
+
+This command will generate a BFP16 quantized model under the **models**
+folder, which was quantized by BFP16 configuration.
+
+BFP16 Quantization with ADAQUANT
+--------------------------------
+
+The quantizer takes the float model and produce a BFP16 quantized model with
+ADAQUANT.
+
+Note: If the model has dynamic shapes, you need to convert the model to fixed shapes before performing ADAQUANT.
+
+::
+
+   python -m  quark.onnx.tools.convert_dynamic_to_fixed  --fix_shapes 'input:[1,3,224,224]' models/mobilenetv2_050.lamb_in1k.onnx  models/mobilenetv2_050.lamb_in1k_fix.onnx
+
+::
+
+   python quantize_model.py --model_name mobilenetv2_050.lamb_in1k \
+                            --input_model_path models/mobilenetv2_050.lamb_in1k_fix.onnx \
+                            --output_model_path models/mobilenetv2_050.lamb_in1k_adaquant_quantized.onnx \
+                            --calibration_dataset_path calib_data \
+                            --config BFP16_ADAQUANT
+
+If the GPU is available in your environment, you can accelerate the training process by configuring parameter 'device' as 'rocm' or 'cuda'.
+
+::
+
+   python quantize_model.py --model_name mobilenetv2_050.lamb_in1k \
+                            --input_model_path models/mobilenetv2_050.lamb_in1k_fix.onnx \
+                            --output_model_path models/mobilenetv2_050.lamb_in1k_adaquant_quantized.onnx \
+                            --calibration_dataset_path calib_data \
+                            --config BFP16_ADAQUANT \
+                            --device cuda
+
+This command will generate a BFP16 quantized model under the **models**
+folder, which was quantized by BFP16 configuration with ADAQUANT.
+
+Evaluation
+----------
+
+Test the accuracy of the float model on ImageNet val dataset:
+
+::
+
+   python ../utils/onnx_validate.py val_data --model-name mobilenetv2_050.lamb_in1k --batch-size 1 --onnx-input models/mobilenetv2_050.lamb_in1k.onnx
+
+Test the accuracy of the BFP16 quantized model on ImageNet
+val dataset:
+
+::
+
+   python ../utils/onnx_validate.py val_data --model-name mobilenetv2_050.lamb_in1k --batch-size 1 --onnx-input models/mobilenetv2_050.lamb_in1k_quantized.onnx
+
+If want to run faster with GPU support, you can also execute the following command:
+
+::
+
+   python ../utils/onnx_validate.py val_data --model-name mobilenetv2_050.lamb_in1k --batch-size 1 --onnx-input models/mobilenetv2_050.lamb_in1k_quantized.onnx --gpu
+
+Test the accuracy of the BFP16 quantized model with ADAQUANT on ImageNet val
+dataset:
+
+::
+
+   python ../utils/onnx_validate.py val_data --model-name mobilenetv2_050.lamb_in1k --batch-size 1 --onnx-input models/mobilenetv2_050.lamb_in1k_adaquant_quantized.onnx
+
+If want to run faster with GPU support, you can also execute the following command:
+
+::
+
+   python ../utils/onnx_validate.py val_data --model-name mobilenetv2_050.lamb_in1k --batch-size 1 --onnx-input models/mobilenetv2_050.lamb_in1k_adaquant_quantized.onnx --gpu
+
+Quantization Results
+--------------------
+
++-------+-------------------+---------------------+-------------------+
+|       | Float Model       | Quantized Model     | Quantized Model   |
+|       |                   | without ADAQUANT    | with ADAQUANT     |
++=======+===================+=====================+===================+
+| Model | 8.7 MB            | 8.4 MB              | 8.4 MB            |
+| Size  |                   |                     |                   |
++-------+-------------------+---------------------+-------------------+
+| P     | 65.424 %          | 60.806 %            | 64.652 %          |
+| rec@1 |                   |                     |                   |
++-------+-------------------+---------------------+-------------------+
+| P     | 85.788 %          | 82.648 %            | 85.278 %          |
+| rec@5 |                   |                     |                   |
++-------+-------------------+---------------------+-------------------+
+
+.. note:: Different execution devices can lead to minor variations in the
+          accuracy of the quantized model.
+
+
+.. raw:: html
+
+   <!--
+   ## License
+   Copyright (C) 2024, Advanced Micro Devices, Inc. All rights reserved. SPDX-License-Identifier: MIT
+   -->