From 07932937a57863624372f9f570d3466b009463a5 Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Thu, 7 Aug 2025 09:07:58 -0700 Subject: [PATCH 1/8] doc update for hgq/da --- README.md | 12 + docs/advanced/_static/da4ml-workflow.svg | 4463 ++++++++++++++++++++++ docs/advanced/_static/hgq-overview.svg | 1 + docs/advanced/da.rst | 32 + docs/advanced/extension.rst | 9 +- docs/advanced/hgq.rst | 89 +- docs/advanced/hgq1.rst | 49 + docs/advanced/precision.rst | 14 + docs/api/configuration.rst | 8 +- docs/frontend/keras.rst | 20 +- docs/index.rst | 2 + docs/intro/introduction.rst | 4 +- docs/intro/reference.rst | 14 + docs/intro/setup.rst | 96 +- docs/intro/status.rst | 116 +- hls4ml/backends/vivado/vivado_backend.py | 6 +- hls4ml/contrib/kl_layer/kl_layer.py | 2 +- hls4ml/converters/__init__.py | 4 +- hls4ml/converters/keras_v2_to_hls.py | 2 +- test/pytest/test_extensions.py | 2 +- 20 files changed, 4741 insertions(+), 204 deletions(-) create mode 100644 docs/advanced/_static/da4ml-workflow.svg create mode 100644 docs/advanced/_static/hgq-overview.svg create mode 100644 docs/advanced/da.rst create mode 100644 docs/advanced/hgq1.rst create mode 100644 docs/advanced/precision.rst diff --git a/README.md b/README.md index fd04bfca74..a6f4a080cb 100644 --- a/README.md +++ b/README.md @@ -125,6 +125,18 @@ Additionally, if you use specific features developed in later papers, please cit year = "2022" } ``` +Distributed arithmetic: +```bibtex +@misc{Sun:2025, + title={da4ml: Distributed Arithmetic for Real-time Neural Networks on FPGAs}, + author={Chang Sun and others}, + year={2025}, + eprint={2507.04535}, + archivePrefix={arXiv}, + primaryClass={cs.AR}, + url={https://arxiv.org/abs/2507.04535}, +} +``` binary/ternary networks: ```bibtex @article{Loncar:2020hqp, diff --git a/docs/advanced/_static/da4ml-workflow.svg b/docs/advanced/_static/da4ml-workflow.svg new file mode 100644 index 0000000000..392b7a2e0a --- /dev/null +++ b/docs/advanced/_static/da4ml-workflow.svg @@ -0,0 +1,4463 @@ + + + +FFPGAx0x0x1x1x2x2x3x3y + +0y0y + +1y1y + +2y2y + +3y3<< 1<< 1<< 1<< 1da111111112211-1-1-2-211-1-1-1-11111-2-22211x0x0x1x1x2x2x3x3ml4Text is not SVG - cannot displayCMVM Problems(or other frontend)Optimized adder trees111111112211-1-1-2111-1-1-1-11111-2-22211x0x0x1x1x2x2x3x3C+ +bitstream diff --git a/docs/advanced/_static/hgq-overview.svg b/docs/advanced/_static/hgq-overview.svg new file mode 100644 index 0000000000..166a6af99d --- /dev/null +++ b/docs/advanced/_static/hgq-overview.svg @@ -0,0 +1 @@ + diff --git a/docs/advanced/da.rst b/docs/advanced/da.rst new file mode 100644 index 0000000000..0d6373306f --- /dev/null +++ b/docs/advanced/da.rst @@ -0,0 +1,32 @@ +====================== +Distributed Arithmetic +====================== + +.. image:: https://img.shields.io/badge/License-LGPLv3-blue.svg + :target: https://www.gnu.org/licenses/lgpl-3.0.en.html +.. image:: https://badge.fury.io/py/da4ml.svg + :target: https://badge.fury.io/py/da4ml +.. image:: https://img.shields.io/badge/arXiv-2507.04535-b31b1b.svg + :target: https://arxiv.org/abs/2507.04535 + + +Distributed Arithmetic (DA) is a strategy for constant-matrix-vector multiplication (CMVM) operations used in hls4ml. The implementation is provided by an external library, `da4ml `__. The library transforms the CMVM operations into an adder graph with common subexpression elimations to reduce the overall complexity. As the CMVM operation is fully unrolled, `reuse_factor` **must** be 1 (by default) for the corresponding CMVM operations [*]_. Comparing to the traditional `Latency` strategy CMVM kernels, DA can usually reduce up to 30% of the LUTs and all DSPs used. + +.. rst-class:: light +.. image:: _static/da4ml-workflow.svg + :alt: Workflow of DA in hls4ml + :width: 600 + +When the DA strategy is used, the CMVM operations will be implemented bit-exactly, and the accumulator precision setting will not be used. + +.. [*] Not to be confused with `II=1`. `reuse_factor` is the `II` for one CMVM operation, not one layer. One layer may invoke the same CMVM kernel multiple times and thus has `II>1` while each CMVM operation is unrolled, e.g., convolution layers with more than one partition. + +Currently, the DA strategy is only available for the Vivado/Vitis HLS backends. The following layers are supported: +* Dense +* Convolutional (1D, 2D) +* EinsumDense +* Multi-head attention (implemented as multiple EinsumDense layers) + +While possible, the RNN layers are not yet supported by the DA strategy. + +For more details, please refer to the `da4ml repository `__ or the `paper `__. diff --git a/docs/advanced/extension.rst b/docs/advanced/extension.rst index d9f71d39b7..548ba6e164 100644 --- a/docs/advanced/extension.rst +++ b/docs/advanced/extension.rst @@ -18,10 +18,10 @@ To implement a custom layer in ``hls4ml`` with the extension API, the required c * Function config template * Registration of layer, source code, and templates -Complete example -================ +Complete example for Keras v2 +============================= -For concreteness, let's say our custom layer ``KReverse`` is implemented in Keras and reverses the order of the last dimension of the input. +For concreteness, let's say our custom layer ``KReverse`` is implemented in Keras v2 and reverses the order of the last dimension of the input. .. code-block:: Python @@ -140,7 +140,8 @@ In this case, the HLS code is valid for both the Vivado and Quartus backends. .. code-block:: Python # Register the converter for custom Keras layer - hls4ml.converters.register_keras_layer_handler('KReverse', parse_reverse_layer) + hls4ml.converters.register_keras_v2_layer_handler('KReverse', parse_reverse_layer) + # For keras v3, use register on subclassed KerasV3LayerHandler from hls4ml.converters.keras_v3._base instead # Register the hls4ml's IR layer hls4ml.model.layers.register_layer('HReverse', HReverse) diff --git a/docs/advanced/hgq.rst b/docs/advanced/hgq.rst index dd0faad7dc..ba1c484118 100644 --- a/docs/advanced/hgq.rst +++ b/docs/advanced/hgq.rst @@ -1,49 +1,70 @@ -=================================== -High Granularity Quantization (HGQ) -=================================== - -.. image:: https://github.com/calad0i/HGQ/actions/workflows/sphinx-build.yml/badge.svg - :target: https://calad0i.github.io/HGQ/ -.. image:: https://badge.fury.io/py/hgq.svg - :target: https://badge.fury.io/py/hgq +====================================== +High Granularity Quantization (HGQ2) +====================================== + +.. note:: + HGQ2 is the successor of the original `HGQ <./hgq1.html>`__. framework, which was built on Keras v2. HGQ2 built on top of Keras v3, leveraging its new features and improvements. + +.. image:: https://img.shields.io/badge/License-LGPLv3-blue.svg + :target: https://www.gnu.org/licenses/lgpl-3.0.en.html +.. image:: https://github.com/calad0i/HGQ2/actions/workflows/sphinx-build.yml/badge.svg + :target: https://calad0i.github.io/HGQ2/ +.. image:: https://badge.fury.io/py/hgq2.svg + :target: https://badge.fury.io/py/hgq2 .. image:: https://img.shields.io/badge/arXiv-2405.00645-b31b1b.svg :target: https://arxiv.org/abs/2405.00645 -`High Granularity Quantization (HGQ) `_ is a library that performs gradient-based automatic bitwidth optimization and quantization-aware training algorithm for neural networks to be deployed on FPGAs. By leveraging gradients, it allows for bitwidth optimization at arbitrary granularity, up to per-weight and per-activation level. +HGQ2 (High Granularity Quantization 2) is a quantization-aware training framework built on Keras v3, targeting real-time deep learning applications on edge devices like FPGAs. It provides a comprehensive set of tools for creating and training quantized neural networks with minimal effort. -.. image:: https://calad0i.github.io/HGQ/_images/overview.svg - :alt: Overview of HGQ - :align: center +HGQ2 implements an gradient-based automatic bitwidth optimization and quantization-aware training algorithm. By laveraging gradients, it allows for bitwidth optimization at arbitrary granularity, up to per-weight and per-activation level. -Conversion of models made with HGQ library is fully supported. The HGQ models are first converted to proxy model format, which can then be parsed by hls4ml bit-accurately. Below is an example of how to create a model with HGQ and convert it to hls4ml model. +.. rst-class:: light +.. image:: _static/hgq-overview.svg + :alt: HGQ-overview + :width: 600 -.. code-block:: Python +Key Features +----------- - import keras - from HGQ.layers import HDense, HDenseBatchNorm, HQuantize - from HGQ import ResetMinMax, FreeBOPs +- **Multi-backend support**: Works with TensorFlow, JAX, and PyTorch through Keras v3 +- **Flexible quantization**: Supports different quantization schemes including fixed-point and minifloat +- **Hardware synthesis**: Direct integration with hls4ml for FPGA deployment +- **Trainable quantization parameters**: Optimize bitwidths through gradient-based methods +- **Effective Bit-Operations (EBOP)**: Accurate resource estimation during training for the deployed firmware +- **Advanced layer support**: HGQ2 supports advanced layers like einsum, einsum dense, and multi-head attention layers with quantization and hardware synthesis support - model = keras.models.Sequential([ - HQuantize(beta=1.e-5), - HDenseBatchNorm(32, beta=1.e-5, activation='relu'), - HDenseBatchNorm(32, beta=1.e-5, activation='relu'), - HDense(10, beta=1.e-5), - ]) - opt = keras.optimizers.Adam(learning_rate=0.001) - loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True) - model.compile(optimizer=opt, loss=loss, metrics=['accuracy']) - callbacks = [ResetMinMax(), FreeBOPs()] +.. code-block:: python + :caption: Simple example - model.fit(..., callbacks=callbacks) + import keras + from hgq.layers import QDense, QConv2D + from hgq.config import LayerConfigScope, QuantizerConfigScope - from HGQ import trace_minmax, to_proxy_model - from hls4ml.converters import convert_from_keras_model + # Setup quantization configuration + # These values are the defaults, just for demonstration purposes here + with ( + # Configuration scope for setting the default quantization type and overflow mode + # The second configuration scope overrides the first one for the 'datalane' place + QuantizerConfigScope(place='all', default_q_type='kbi', overflow_mode='SAT_SYM'), + # Configuration scope for enabling EBOPs and setting the beta0 value + QuantizerConfigScope(place='datalane', default_q_type='kif', overflow_mode='WRAP'), + LayerConfigScope(enable_ebops=True, beta0=1e-5), + ): + model = keras.Sequential([ + QConv2D(32, (3, 3), activation='relu'), + keras.layers.MaxPooling2D((2, 2)), + keras.layers.Flatten(), + QDense(10) + ]) - trace_minmax(model, x_train, cover_factor=1.0) - proxy = to_proxy_model(model, aggressive=True) + ... # Training, evaluation, and anything else you want to do with the model - model_hls = convert_from_keras_model(proxy, backend='vivado',output_dir=... ,part=...) + model_hls = hls4ml.converters.convert_from_keras(model, ...) + # Model-wise precision propagation is done automatically for HGQ models for bit-exactness + # Do NOT pass precision config if you don't know what you are doing + model_hls.compile() -An interactive example of HGQ can be found in the `kaggle notebook `_. Full documentation can be found at `calad0i.github.io/HGQ `_. +.. note:: + Do not pass any precision configuration from ``hls4ml.converters.convert_from_keras`` in general. HGQ-defined models will invoke model-wise precision propagation automatically to ensure bit-exactness between the Keras model and the generated HLS code (See `here <./precision.html>`__ for more details). diff --git a/docs/advanced/hgq1.rst b/docs/advanced/hgq1.rst new file mode 100644 index 0000000000..dd0faad7dc --- /dev/null +++ b/docs/advanced/hgq1.rst @@ -0,0 +1,49 @@ +=================================== +High Granularity Quantization (HGQ) +=================================== + +.. image:: https://github.com/calad0i/HGQ/actions/workflows/sphinx-build.yml/badge.svg + :target: https://calad0i.github.io/HGQ/ +.. image:: https://badge.fury.io/py/hgq.svg + :target: https://badge.fury.io/py/hgq +.. image:: https://img.shields.io/badge/arXiv-2405.00645-b31b1b.svg + :target: https://arxiv.org/abs/2405.00645 + +`High Granularity Quantization (HGQ) `_ is a library that performs gradient-based automatic bitwidth optimization and quantization-aware training algorithm for neural networks to be deployed on FPGAs. By leveraging gradients, it allows for bitwidth optimization at arbitrary granularity, up to per-weight and per-activation level. + +.. image:: https://calad0i.github.io/HGQ/_images/overview.svg + :alt: Overview of HGQ + :align: center + +Conversion of models made with HGQ library is fully supported. The HGQ models are first converted to proxy model format, which can then be parsed by hls4ml bit-accurately. Below is an example of how to create a model with HGQ and convert it to hls4ml model. + +.. code-block:: Python + + import keras + from HGQ.layers import HDense, HDenseBatchNorm, HQuantize + from HGQ import ResetMinMax, FreeBOPs + + model = keras.models.Sequential([ + HQuantize(beta=1.e-5), + HDenseBatchNorm(32, beta=1.e-5, activation='relu'), + HDenseBatchNorm(32, beta=1.e-5, activation='relu'), + HDense(10, beta=1.e-5), + ]) + + opt = keras.optimizers.Adam(learning_rate=0.001) + loss = keras.losses.SparseCategoricalCrossentropy(from_logits=True) + model.compile(optimizer=opt, loss=loss, metrics=['accuracy']) + callbacks = [ResetMinMax(), FreeBOPs()] + + model.fit(..., callbacks=callbacks) + + from HGQ import trace_minmax, to_proxy_model + from hls4ml.converters import convert_from_keras_model + + trace_minmax(model, x_train, cover_factor=1.0) + proxy = to_proxy_model(model, aggressive=True) + + model_hls = convert_from_keras_model(proxy, backend='vivado',output_dir=... ,part=...) + + +An interactive example of HGQ can be found in the `kaggle notebook `_. Full documentation can be found at `calad0i.github.io/HGQ `_. diff --git a/docs/advanced/precision.rst b/docs/advanced/precision.rst new file mode 100644 index 0000000000..622e20d60b --- /dev/null +++ b/docs/advanced/precision.rst @@ -0,0 +1,14 @@ +============================== +Model-wise Precision Inference +============================== + +The model-wise precision inference (implemented in :py:class:`~hls4ml.model.optimizer.passes.bit_exact.BitExact`) attempts to infer the appropriate for **all** precisions in the model. Unlike the automatic precision inference, this pass disregards all user-defined precisions, and "trust" only data embedded in the model, i.e., the actual values of the weights and explicit quantizers defined between layers. + +Currently, this pass will only be triggered by the presence of any ``FixedPointQuantizer`` (explicit quantizer operator) layer in the model. This pass uses an modified symbolic interval arithmetic to compute the ranges and needed quantization steps for all precisions in the model graph, with the goal of eliminating the discrepency between the quantized model and the original model. Currently, only HGQ/HGQ2 models will produce such quantizers, and the pass will not be triggered for models from other frontends. + +If the original model is not properly quantized, this pass will lead to huge bitwidths in the model. In this context, properly quantized models are those that have quantizers defined between **all layers with non-trivial arithmetics**. Importantly, quantizers **should be used immediately after the inputs**, or the input precision may not be properly inferred. The successful application of this pass should result in bit-exact model, i.e., the quantized model should produce the same outputs as the original model for all inputs [*]_. + +.. [*] While quantized, the original model will still operate on float-point values, so there is a chance that the outputs will not be exactly the same due to float rounding errors in the original model. + +.. note:: + Unlike the automatic precision inference, it is strongly recommended to **not** use the ``config_from_*`` functions to set the precisions in the model. diff --git a/docs/api/configuration.rst b/docs/api/configuration.rst index 1bc8f0676c..5266d93ef6 100644 --- a/docs/api/configuration.rst +++ b/docs/api/configuration.rst @@ -73,7 +73,7 @@ for automatic setting of precisions. The layer-level precisions with the ``'nam (see :ref:`Automatic precision inference`). Note that layer-level settings take precedence over model-level settings. A ``'name'`` granularity is required for QKeras and QONNX model parsing. Passing the backend to these functions is recommended because some configuration options depend on the backend. See :py:class:`~hls4ml.utils.config.config_from_keras_model` and similar for more information on the various options. Note specifically the documentation of :py:class:`~hls4ml.utils.config.config_from_pytorch_model` on how to handle differences in input data -formats between pytorch and keras (hls4ml follows keras conventions internally). +formats between pytorch and keras (hls4ml follows keras conventions internally). Note that passing precision configurations for HGQ/HGQ2 models is not needed in general, and **should not be done** without understanding the implications. One can override specific values before using the configuration: @@ -158,12 +158,12 @@ For Vivado backend the options are: * **Part**\ : the particular FPGA part number that you are considering, here it's a Xilinx Virtex UltraScale+ VU13P FPGA * **ClockPeriod**\ : the clock period, in ns, at which your algorithm runs Then you have some optimization parameters for how your algorithm runs: -* **IOType**\ : your options are ``io_parallel`` or ``io_stream`` which defines the type of data structure used for inputs, intermediate activations between layers, and outputs. For ``io_parallel``, arrays are used that, in principle, can be fully unrolled and are typically implemented in RAMs. For ``io_stream``, HLS streams are used, which are a more efficient/scalable mechanism to represent data that are produced and consumed in a sequential manner. Typically, HLS streams are implemented with FIFOs instead of RAMs. For more information see `here `__. -* **HLSConfig**\: the detailed configuration of precision and parallelism, including: +* **IOType**\ : your options are ``io_parallel`` or ``io_stream`` which defines how data is transferred into and out of the HLS model IP, and how the data is transferred between layers. For ``io_parallel``, data are directly wired between layers fully in parallel. For ``io_stream``, HLS streams are used, which instantiates as stateful FIFO buffers, which effectively decouples the producer and consumer (upstream and downstream in a neural network), removing the need of a global state machine coordinating the exact timing for io operations. This is particular useful with the DATAFLOW pipeline style. For more information, see `here `__. + * **HLSConfig**\: the detailed configuration of precision and parallelism, including: * **ReuseFactor**\ : in the case that you are pipelining, this defines the pipeline interval or initiation interval * **ParallelizationFactor**\ : The number of output "pixels" to compute in parallel in convolutional layers. Increasing this parameter results in significant increase in resources required on the FPGA. - * **Strategy**\ : Optimization strategy on FPGA, either "Latency", "Resource" or "Unrolled". If none is supplied then hl4ml uses "Latency" as default. Note that a reuse factor larger than 1 should be specified when using "resource" or "unrolled" strategy. An example of using larger reuse factor can be found `here. `__ + * **Strategy**\ : Optimization strategy on FPGA, either "Latency", "Resource", "distributed_arithmetic" (or "da"), or "Unrolled". If none is supplied then hl4ml uses "Latency" as default. Note that a reuse factor must be 1 if using "distributed_arithmetic", and must be larger than 1 when using "resource" or "unrolled" strategy. * **PipelineStyle**\ : Set the top level pipeline style. Valid options are "auto", "pipeline" and "dataflow". If unspecified, it defaults to "auto". * **PipelineInterval**\ : Optionally override the desired initiation interval of the design. Only valid in combination with "pipeline" style. If unspecified, it is left to the compiler to decide, ideally matching the largest reuse factor of the network. * **Precision**\ : this defines the precision of your inputs, outputs, weights and biases. It is denoted by ``fixed``\ , where ``Y`` is the number of bits representing the signed number above the binary point (i.e. the integer part), and ``X`` is the total number of bits. Additionally, integers in the type (\ ``int``\ , where ``N`` is a bit-size from 1 to 1024) can also be used. The format follows ``ap_fixed`` and ``ap_int`` conventions. You have a chance to further configure this more finely with per-layer configuration described below. In the per-layer configuration (but not globally) one can also use ``'auto'`` precision. diff --git a/docs/frontend/keras.rst b/docs/frontend/keras.rst index 093db120b4..e5c1a77722 100644 --- a/docs/frontend/keras.rst +++ b/docs/frontend/keras.rst @@ -2,18 +2,22 @@ Keras and its quantized variants ================================ -Keras and the quantization library QKeras are well supported in ``hls4ml``. Both Keras v2 (``tf.keras``) and the new Keras v3 are supported. While the Keras v2 support is based on parsing the serialized json representation of the model, the Keras v3 support uses direct model inspection. +Keras and its quantized variants are supported in ``hls4ml``. Both Keras v2 (``tf.keras``) and the new Keras v3 are supported. While the Keras v2 support is based on parsing the serialized json representation of the model, the Keras v3 support uses direct model inspection. -Currently, ``hls4ml`` can parse most Keras layers, including core layers, convolutional layers, pooling layers, recurrent layers, merging/reshaping layers and activation layers, implemented either via sequential or functional API. Notably missing are the attention and normalization layers. The ``Lambda`` layers don't save their state in the serialized format and are thus impossible to parse. In this case, the ``Lambda`` layers can be implemented as custom layers and parsed via the :ref:`Extension API`. +For Keras v2, QKeras, and HGQ, ``hls4ml`` supports most of its layers, including core layers, convolutional layers, pooling layers, recurrent layers (not implemented in HGQ), merging/reshaping layers, and activation layers. For normalization layers, only the ``(Q)BatchNormalization`` layer is supported. -The ``data_format='channels_first'`` parameter of Keras layers is supported, but not extensively tested. All HLS implementations in ``hls4ml`` are based on ``channels_last`` data format and need to be converted to that format before the HLS code can be emitted. We encourage users of ``channels_first`` to report their experiences to developers on GitHub. +For Keras v3, the support for EinsumDense layer is added. For HGQ2, the following layers are supported in addition: `QEinsum`, `QMultiHeadAttention`, `QUnaryFunctionLUT` (arbitrary unary function as a 1-d lookup table), and some binary operators. + +keras `Operators` that are not layers are generally not supported in ``hls4ml``. This includes operators such as `Add`, `Subtract`, `Multiply`, and `Divide`. Please use the corresponding Keras layers instead. + +Arbitrary ``Lambda`` layers are not, and are not planned to be supported in ``hls4ml`` due to the difficultness to parse generic lambda expression. For custom operations required, please refer to the :ref:`Extension API ` documentation to add custom layers to the conversion process. + +The ``data_format='channels_first'`` parameter of Keras layers is supported for a limited subset of layers and it is not extensively tested. All HLS implementations in ``hls4ml`` are based on ``channels_last`` data format convention and need to be converted to that format before the HLS code can be emitted. We encourage users of ``channels_first`` to report their experiences to developers on GitHub. * `QKeras `_ - The equivalent QKeras API and its quantizers are also supported by ``hls4ml``. QKeras is not compatible with Keras v3. Currently, only HGQ2 is compatible with Keras v3 (see below). + The equivalent QKeras API and its quantizers are also supported by ``hls4ml``. QKeras is not compatible with Keras v3. * `HGQ `_ - The equivalent HGQ API is also supported. HGQ is not compatible with Keras v3. See `advanced/HGQ <../advanced/hgq.html>`__ for more information. + The equivalent HGQ API is also supported. * `HGQ2 `_ - HGQ2 is based on Keras v3. Its support in hls4ml is currently under development. - -The development team of ``hls4ml`` is currently exploring options for QKeras alternative and will provide a drop-in replacement API compatible with Keras v3. + The equivalent HGQ2 API is also supported, plus some additional advanced operators. diff --git a/docs/index.rst b/docs/index.rst index a87e7c95e6..2bdb7964ed 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -47,6 +47,8 @@ advanced/profiling advanced/auto advanced/hgq + advanced/da + advanced/precision advanced/fifo_depth advanced/extension advanced/model_optimization diff --git a/docs/intro/introduction.rst b/docs/intro/introduction.rst index 8d603bd78f..8c276e02a7 100644 --- a/docs/intro/introduction.rst +++ b/docs/intro/introduction.rst @@ -20,11 +20,11 @@ The Solution: ``hls4ml`` ======================== .. image:: ../img/overview.jpg - + :alt: Overview of hls4ml, taken from `JINST 13 P07027 (2018) `_. With this in mind, let's take a look at how ``hls4ml`` helps to achieve such a goal. First, it's important to realize the architecture differences between an FPGA and a CPU or GPU. An FPGA can be specifically programmed to do a certain task, in this case evaluate neural networks given a set of inputs, and as such can be highly optimized for the task, with tricks like pipelining and parallel evaluation. However, this means dynamic remapping while running isn't really a possibility. FPGAs also often come at a comparatively low power cost with respect to CPUs and GPUs. This allows ``hls4ml`` to build HLS code from compressed neural networks that results in predictions on the microsecond scale for latency. The ``hls4ml`` tool saves the time investment needed to convert a neural network to a hardware design language or even HLS code, thus allowing for rapid prototyping. -For more detailed information on technical details of ``hls4ml``, read the "Internals" section of our documentation or our :doc:`References ` page. All figures on this page are taken from the following paper: `JINST 13 P07027 (2018) `_. +For more detailed information on technical details of ``hls4ml``, read the "Internals" section of our documentation or our :doc:`References ` page. diff --git a/docs/intro/reference.rst b/docs/intro/reference.rst index 0bd5912bb1..653c8c76d4 100644 --- a/docs/intro/reference.rst +++ b/docs/intro/reference.rst @@ -68,6 +68,20 @@ Additionally, if you use specific features developed in later papers, please cit year = "2022" } +distributed arithmetic: + +.. code-block:: bibtex + + @misc{Sun:2025, + title={da4ml: Distributed Arithmetic for Real-time Neural Networks on FPGAs}, + author={Chang Sun and others}, + year={2025}, + eprint={2507.04535}, + archivePrefix={arXiv}, + primaryClass={cs.AR}, + url={https://arxiv.org/abs/2507.04535}, + } + binary/ternary networks: .. code-block:: bibtex diff --git a/docs/intro/setup.rst b/docs/intro/setup.rst index 682d5fe54e..2b5b55b07a 100644 --- a/docs/intro/setup.rst +++ b/docs/intro/setup.rst @@ -14,12 +14,6 @@ The latest release of ``hls4ml`` can be installed with ``pip``: pip install hls4ml -If you want to use our :doc:`profiling <../advanced/profiling>` toolbox, you might need to install extra dependencies: - -.. code-block:: - - pip install hls4ml[profiling] - .. warning:: Previously, versions of hls4ml were made available on ``conda-forge``. These are outdated and should NOT be used. Installing with ``pip`` is currently the only supported method. @@ -40,6 +34,10 @@ Dependencies .. note:: As of version 1.1.0+, all conversion frontend specific packages are optional. Only install the packages you need. + +Frontend +-------- + The ``hls4ml`` library requires python 3.10 or later, and depends on a number of Python packages and external tools for synthesis and simulation. Python dependencies are automatically managed by ``pip`` or ``conda``. The following Python packages are all optional and are only required if you intend to use the corresponding converter. @@ -55,12 +53,13 @@ The following Python packages are all optional and are only required if you inte * Quantization support * `QKeras `_: based on Keras v2. See `frontend/keras <../frontend/keras.html>`_ for more details * `HGQ `_: Based on Keras v2. See `advanced/HGQ <../advanced/hgq.html>`_ for more details. + * `HGQ2 `_: Based on Keras v3. See `advanced/HGQ2 <../advanced/hgq.html>`_ for more details. * `Brevitas `_: Based on PyTorch. See `frontend/pytorch <../frontend/pytorch.html>`_ for more details. * `QONNX `_: Based on ONNX. See `frontend/onnx <../frontend/onnx.html>`_ for more details. -Running C simulation from Python requires a C++11-compatible compiler. On Linux, a GCC C++ compiler ``g++`` is required. Any version from a recent -Linux should work. On MacOS, the *clang*-based ``g++`` is enough. For the oneAPI backend, one must have oneAPI installed, along with the FPGA compiler, -to run C/SYCL simulations. +Running C simulation from Python requires a C++11-compatible compiler. On Linux, a GCC C++ compiler ``g++`` is required. Any version from a recent Linux should work. On MacOS, the *clang*-based ``g++`` is enough. For the oneAPI backend, one must have oneAPI installed, along with the FPGA compiler, to run C/SYCL simulations. + +Specific functionalities may need additional Python packages. If any needed is missing, ``hls4ml`` will raise an error and prompt you to install the missing packages. To run FPGA synthesis, installation of following tools is required: @@ -84,15 +83,15 @@ Here we give line-by-line instructions to demonstrate the general workflow. .. code-block:: python import hls4ml - import tensorflow as tf - from tensorflow.keras.layers import Dense + from keras.models import Sequential + from keras.layers import Dense # Construct a basic keras model - model = tf.keras.models.Sequential() - model.add(Dense(64, input_shape=(16,), name='Dense', kernel_initializer='lecun_uniform', kernel_regularizer=None)) - model.add(Activation(activation='elu', name='Activation')) - model.add(Dense(32, name='Dense2', kernel_initializer='lecun_uniform', kernel_regularizer=None)) - model.add(Activation(activation='elu', name='Activation2')) + model = Sequential() + model.add(Dense(64, input_shape=(16,))) + model.add(Activation(activation='relu')) + model.add(Dense(32)) + model.add(Activation(activation='relu')) # This is where you would train the model in a real-world scenario @@ -139,71 +138,6 @@ Done! You've built your first project using ``hls4ml``! To learn more about our If you want to configure your model further, check out our :doc:`Configuration <../api/configuration>` page. -.. - Apart from our main API, we also support model conversion using a command line interface, check out our next section to find out more: - - Getting started with hls4ml CLI (deprecated) - -------------------------------------------- - - As an alternative to the recommended Python PI, the command-line interface is provided via the ``hls4ml`` command. - - To follow this tutorial, you must first download our ``example-models`` repository: - - .. code-block:: bash - - git clone https://github.com/fastmachinelearning/example-models - - Alternatively, you can clone the ``hls4ml`` repository with submodules - - .. code-block:: bash - - git clone --recurse-submodules https://github.com/fastmachinelearning/hls4ml - - The model files, along with other configuration parameters, are defined in the ``.yml`` files. - Further information about ``.yml`` files can be found in :doc:`Configuration ` page. - - In order to create an example HLS project, first go to ``example-models/`` from the main directory: - - .. code-block:: bash - - cd example-models/ - - And use this command to translate a Keras model: - - .. code-block:: bash - - hls4ml convert -c keras-config.yml - - This will create a new HLS project directory with an implementation of a model from the ``example-models/keras/`` directory. - To build the HLS project, do: - - .. code-block:: bash - - hls4ml build -p my-hls-test -a - - This will create a Vivado HLS project with your model implementation! - - **NOTE:** For the last step, you can alternatively do the following to build the HLS project: - - .. code-block:: Bash - - cd my-hls-test - vivado_hls -f build_prj.tcl - - ``vivado_hls`` can be controlled with: - - .. code-block:: bash - - vivado_hls -f build_prj.tcl "csim=1 synth=1 cosim=1 export=1 vsynth=1" - - Setting the additional parameters from ``1`` to ``0`` disables that step, but disabling ``synth`` also disables ``cosim`` and ``export``. - - Further help - ^^^^^^^^^^^^ - - * For further information about how to use ``hls4ml``\ , do: ``hls4ml --help`` or ``hls4ml -h`` - * If you need help for a particular ``command``\ , ``hls4ml command -h`` will show help for the requested ``command`` - * We provide a detailed documentation for each of the command in the :doc:`Command Help ` section Existing examples ----------------- diff --git a/docs/intro/status.rst b/docs/intro/status.rst index f025d3f79c..75ece1897e 100644 --- a/docs/intro/status.rst +++ b/docs/intro/status.rst @@ -13,20 +13,28 @@ See the :ref:`Release Notes` section for a changelog. Features ======== -A list of supported ML frameworks, HLS backends, and neural network architectures, including a summary table is below. Dependencies are given in the :doc:`Setup ` page. +A list of supported ML frameworks (Frontends), HLS backends, and neural network architectures, including a summary table is below. Dependencies are given in the :doc:`Setup ` page. -ML framework support: +Frontend support: -* (Q)Keras +* Keras + * Keras v2 + * QKeras + * HGQ + * Keras v3 + * HGQ2 * PyTorch -* (Q)ONNX +* ONNX + * QONNX Neural network architectures: * Fully connected NN (multilayer perceptron, MLP) -* Convolutional NN -* Recurrent NN (LSTM) -* Graph NN (GarNet) +* Convolutional NN (1D and 2D) +* Recurrent NN (RNN, LSTM, GRU) +* GarNet +* Einsum and EinsumDense (Einsum) +* Multi-head attention (MHA) (experimental) HLS backends: @@ -38,68 +46,50 @@ HLS backends: A summary of the on-going status of the ``hls4ml`` tool is in the table below. -.. list-table:: - :header-rows: 1 - - * - ML framework/HLS backend - - (Q)Keras - - PyTorch - - (Q)ONNX - - Vivado HLS - - Intel HLS - - Vitis HLS - - Catapult HLS - - oneAPI - * - MLP - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``experimental`` - * - CNN - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``experimental`` - * - RNN (LSTM) - - ``supported`` - - ``supported`` - - ``N/A`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``supported`` - - ``experimental`` - * - GNN (GarNet) - - ``supported`` - - ``in development`` - - ``N/A`` - - ``N/A`` - - ``N/A`` - - ``N/A`` - - ``N/A`` - - ``N/A`` +.. table:: hls4ml Supported Features + ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| Frontend/Backend | MLP | CNN | RNN/LSTM/GRU | GarNet | Einsum | MHA | ++=======================+=====+=====+==============+========+========+=====+ +| Keras v2/QKeras | ✅ | ✅ | ✅ | ✅ | N/A | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| HGQ | ✅ | ✅ | N/A | N/A | N/A | N/A | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| Keras v3 | ✅ | ✅ | ✅ | N/A | ✅ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| HGQ2 | ✅ | ✅ | N/A | N/A | ✅ | ✅ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| Torch | ✅ | ✅ | ✅ | ❌ | ✅ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| ONNX | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| QONNX | ✅ | ✅ | N/A | N/A | N/A | N/A | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| Vivado/Vitis HLS | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| Intel HLS | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| Catapult HLS | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| oneAPI (experimental) | ✅ | ✅ | ✅ | ❌ | ❌ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ Other feature notes: * ``hls4ml`` is tested on the following platforms. Newer versions might work just fine, but try at your own risk. - * Vivado HLS versions 2018.2 to 2020.1 - * Intel HLS versions 20.1 to 21.4, versions \> 21.4 have not been tested. - * Vitis HLS versions 2022.2 to 2024.1. Versions \<= 2022.1 are known not to work. - * Catapult HLS versions 2024.1_1 to 2024.2 - * oneAPI versions 2024.1 to 2025.0 - -* ``hls4ml`` supports Linux and requires python \>=3.10. hlsml does not require a specific Linux distribution version and we recommended to follow the requirements of the HLS tool you are using. -* Windows and macOS are not supported. Setting up ``hls4ml`` on these platforms, for example using the Windows Subsystem for Linux (WSL) should be possible, but we do not provide support for such use cases. + + - Vivado HLS versions 2018.2 to 2020.1 + - Intel HLS versions 20.1 to 21.4, versions > 21.4 have not been tested. + - Vitis HLS versions 2022.2 to 2024.1. Versions <= 2022.1 are known not to work. + - Catapult HLS versions 2024.1_1 to 2024.2 + - oneAPI versions 2024.1 to 2025.0. 2025.1 is known to not work. + +* ``hls4ml`` supports Linux [*]_ and requires python >=3.10. hls4ml does not require a specific Linux distribution version and we recommend following the requirements of the HLS tool you are using. +* Windows and macOS are not supported. Setting up ``hls4ml`` on these platforms, for example using the Windows Subsystem for Linux (WSL), should be possible, but we do not provide support for such use cases. * BDT support has moved to the `Conifer `__ package +.. [*] For compiling the projects for simulation or actual HLS. Otherwise, the code **may** be used on other platforms and it will likely to work. However, please note that Windows or other platforms are **not supported** in general and are not tested. + Example Models ============== diff --git a/hls4ml/backends/vivado/vivado_backend.py b/hls4ml/backends/vivado/vivado_backend.py index d5309c377f..84daf5f5f4 100644 --- a/hls4ml/backends/vivado/vivado_backend.py +++ b/hls4ml/backends/vivado/vivado_backend.py @@ -359,7 +359,7 @@ def init_dense(self, layer): else: self.set_closest_reuse_factor(layer, n_in, n_out, include_max_rf=False) layer.set_attr('strategy', 'resource_unrolled') - elif layer.model.config.get_strategy(layer).lower() == 'distributed_arithmetic': + elif layer.model.config.get_strategy(layer).lower() in ('distributed_arithmetic', 'da'): rf = layer.get_attr('reuse_factor') if rf != 1: raise Exception(f'Layer {layer.name} has rf = {rf} != 1, but has strategy = "distributed_arithmetic".') @@ -401,7 +401,7 @@ def init_conv1d(self, layer): else: self.set_closest_reuse_factor(layer, n_in, n_out, include_max_rf=False) layer.set_attr('strategy', 'resource_unrolled') - elif layer.model.config.get_strategy(layer).lower() == 'distributed_arithmetic': + elif layer.model.config.get_strategy(layer).lower() in ('distributed_arithmetic', 'da'): rf = layer.get_attr('reuse_factor') if rf != 1: raise Exception(f'Layer {layer.name} has rf = {rf} != 1, but has strategy = "distributed_arithmetic".') @@ -527,7 +527,7 @@ def init_conv2d(self, layer): else: self.set_closest_reuse_factor(layer, n_in, n_out, include_max_rf=False) layer.set_attr('strategy', 'resource_unrolled') - elif layer.model.config.get_strategy(layer).lower() == 'distributed_arithmetic': + elif layer.model.config.get_strategy(layer).lower() in ('distributed_arithmetic', 'da'): rf = layer.get_attr('reuse_factor') if rf != 1: raise Exception(f'Layer {layer.name} has rf = {rf} != 1, but has strategy = "distributed_arithmetic".') diff --git a/hls4ml/contrib/kl_layer/kl_layer.py b/hls4ml/contrib/kl_layer/kl_layer.py index c3c27a849a..59a3f10850 100644 --- a/hls4ml/contrib/kl_layer/kl_layer.py +++ b/hls4ml/contrib/kl_layer/kl_layer.py @@ -123,7 +123,7 @@ def parse_klloss_layer(keras_layer, input_names, input_shapes, data_reader): def main(): # Register the converter for custom Keras layer - hls4ml.converters.register_keras_layer_handler('KLLoss', parse_klloss_layer) + hls4ml.converters.register_keras_v2_layer_handler('KLLoss', parse_klloss_layer) # Register the hls4ml's IR layer hls4ml.model.layers.register_layer('KLLoss', HKLLoss) diff --git a/hls4ml/converters/__init__.py b/hls4ml/converters/__init__.py index 89f4fc04b9..4414286719 100644 --- a/hls4ml/converters/__init__.py +++ b/hls4ml/converters/__init__.py @@ -8,7 +8,7 @@ from hls4ml.converters.keras_v2_to_hls import KerasReader # noqa: F401 from hls4ml.converters.keras_v2_to_hls import get_supported_keras_layers # noqa: F401 from hls4ml.converters.keras_v2_to_hls import parse_keras_model # noqa: F401 -from hls4ml.converters.keras_v2_to_hls import keras_v2_to_hls, register_keras_layer_handler +from hls4ml.converters.keras_v2_to_hls import keras_v2_to_hls, register_keras_v2_layer_handler from hls4ml.converters.keras_v3_to_hls import keras_v3_to_hls, parse_keras_v3_model # noqa: F401 from hls4ml.converters.onnx_to_hls import get_supported_onnx_layers # noqa: F401 from hls4ml.converters.onnx_to_hls import parse_onnx_model # noqa: F401 @@ -41,7 +41,7 @@ if callable(func) and hasattr(func, 'handles') and func.__module__ == lib.__name__: for layer in func.handles: # type: ignore if model_type == 'keras': - register_keras_layer_handler(layer, func) + register_keras_v2_layer_handler(layer, func) elif model_type == 'pytorch': register_pytorch_layer_handler(layer, func) elif model_type == 'onnx': diff --git a/hls4ml/converters/keras_v2_to_hls.py b/hls4ml/converters/keras_v2_to_hls.py index daa9fc5575..cebaeab25b 100644 --- a/hls4ml/converters/keras_v2_to_hls.py +++ b/hls4ml/converters/keras_v2_to_hls.py @@ -127,7 +127,7 @@ def get_layer_handlers(): return layer_handlers -def register_keras_layer_handler(layer_cname, handler_func): +def register_keras_v2_layer_handler(layer_cname, handler_func): """Register a handler function for the given layer class name. The handler function should have the following signature: diff --git a/test/pytest/test_extensions.py b/test/pytest/test_extensions.py index 23bd6734f2..ee608c641a 100644 --- a/test/pytest/test_extensions.py +++ b/test/pytest/test_extensions.py @@ -124,7 +124,7 @@ def format(self, node): @pytest.fixture(scope='session', autouse=True) def register_custom_layer(): # Register the converter for custom Keras layer - hls4ml.converters.register_keras_layer_handler('KReverse', parse_reverse_layer) + hls4ml.converters.register_keras_v2_layer_handler('KReverse', parse_reverse_layer) # Register the hls4ml's IR layer hls4ml.model.layers.register_layer('HReverse', HReverse) From df1bb35362538dae72bf17d206031ffc5c41fb99 Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Mon, 18 Aug 2025 12:12:29 -0700 Subject: [PATCH 2/8] updates --- docs/advanced/extension.rst | 58 +++++++++++++++++++++++++------------ docs/advanced/hgq.rst | 3 +- docs/advanced/hgq1.rst | 3 ++ docs/api/configuration.rst | 5 +++- docs/conf.py | 3 +- docs/frontend/keras.rst | 2 +- docs/intro/setup.rst | 6 ++-- docs/intro/status.rst | 12 ++++---- pyproject.toml | 1 + 9 files changed, 63 insertions(+), 30 deletions(-) diff --git a/docs/advanced/extension.rst b/docs/advanced/extension.rst index 548ba6e164..c357f4c6d2 100644 --- a/docs/advanced/extension.rst +++ b/docs/advanced/extension.rst @@ -26,14 +26,14 @@ For concreteness, let's say our custom layer ``KReverse`` is implemented in Kera .. code-block:: Python # Keras implementation of a custom layer - class KReverse(tf.keras.layers.Layer): + class KReverse(keras.layers.Layer): '''Keras implementation of a hypothetical custom layer''' def __init__(self): super().__init__() def call(self, inputs): - return tf.reverse(inputs, axis=[-1]) + return inputs[..., ::-1] def get_config(self): return super().get_config() @@ -58,19 +58,42 @@ This parser reads the attributes of the Keras layer instance and populates a dic It also returns a list of output shapes (one sjape for each output). In this case, there a single output with the same shape as the input. -.. code-block:: Python +.. tabs:: + .. tab:: Keras v2 + .. code-block:: Python + + # Parser for converter + def parse_reverse_layer(keras_layer, input_names, input_shapes, data_reader): + layer = {} + layer['class_name'] = 'KReverse' + layer['name'] = keras_layer['config']['name'] + layer['n_in'] = input_shapes[0][1] + + if input_names is not None: + layer['inputs'] = input_names + + return layer, [shape for shape in input_shapes[0]] + + .. tab:: Keras v3 + .. code-block:: Python - # Parser for converter - def parse_reverse_layer(keras_layer, input_names, input_shapes, data_reader): - layer = {} - layer['class_name'] = 'HReverse' - layer['name'] = keras_layer['config']['name'] - layer['n_in'] = input_shapes[0][1] + from hls4ml.converters.keras_v3._base import register, KerasV3LayerHandler - if input_names is not None: - layer['inputs'] = input_names + @register + class KReverseHandler(KerasV3LayerHandler): + '''Keras v3 layer handler for KReverse''' - return layer, [shape for shape in input_shapes[0]] + handles = ('KReverse',) + def handle( + self, + layer: 'keras.Layer', + in_tensors: Sequence['KerasTensor'], + out_tensors: Sequence['KerasTensor'], + ) -> dict[str, Any] | tuple[dict[str, Any], ...]: + # Only layer-specific parameters are needed. + # Common parameters are automatically added in the base class. + assert len(in_tensors[0].shape) == 2, 'KReverse is only supported for 2D tensors' + return {'n_in': in_tensors[0].shape[-1]} Next, we need the actual HLS implementaton of the function, which can be written in a header file ``nnet_reverse.h``. @@ -144,30 +167,29 @@ In this case, the HLS code is valid for both the Vivado and Quartus backends. # For keras v3, use register on subclassed KerasV3LayerHandler from hls4ml.converters.keras_v3._base instead # Register the hls4ml's IR layer - hls4ml.model.layers.register_layer('HReverse', HReverse) + hls4ml.model.layers.register_layer('KReverse', HReverse) for backend_id in ['Vivado', 'Quartus']: # Register the optimization passes (if any) backend = hls4ml.backends.get_backend(backend_id) - backend.register_pass('remove_duplicate_reverse', RemoveDuplicateReverse, flow=f'{backend_id.lower()}:optimize') # Register template passes for the given backend backend.register_template(HReverseConfigTemplate) backend.register_template(HReverseFunctionTemplate) # Register HLS implementation - backend.register_source('nnet_reverse.h') + backend.register_source('/path/to/your/nnet_reverse.h') Finally, we can actually test the ``hls4ml`` custom layer compared to the Keras one. .. code-block:: Python # Test if it works - kmodel = tf.keras.models.Sequential( + kmodel = keras.models.Sequential( [ - tf.keras.layers.Input(shape=(8,)), + keras.layers.Input(shape=(8,)), KReverse(), - tf.keras.layers.ReLU(), + keras.layers.ReLU(), ] ) diff --git a/docs/advanced/hgq.rst b/docs/advanced/hgq.rst index ba1c484118..de218b351e 100644 --- a/docs/advanced/hgq.rst +++ b/docs/advanced/hgq.rst @@ -3,7 +3,8 @@ High Granularity Quantization (HGQ2) ====================================== .. note:: - HGQ2 is the successor of the original `HGQ <./hgq1.html>`__. framework, which was built on Keras v2. HGQ2 built on top of Keras v3, leveraging its new features and improvements. + New projects are encouraged to use `HGQ2 <../hgq2.html>`_ instead of the original `HGQ <../hgq.html>`_. + HGQ2 extends the original HGQ with more supported layers, more quantizer options, and is on top of Keras v3, which can be used natively with JAX, PyTorch, and TensorFlow backends. .. image:: https://img.shields.io/badge/License-LGPLv3-blue.svg :target: https://www.gnu.org/licenses/lgpl-3.0.en.html diff --git a/docs/advanced/hgq1.rst b/docs/advanced/hgq1.rst index dd0faad7dc..3d12675d18 100644 --- a/docs/advanced/hgq1.rst +++ b/docs/advanced/hgq1.rst @@ -2,6 +2,9 @@ High Granularity Quantization (HGQ) =================================== +.. note:: + While still supported and maintained, HGQ is deprecated in favor of `HGQ2 <../hgq2.html>`_. New projects are strongly encouraged to use HGQ2 instead. + .. image:: https://github.com/calad0i/HGQ/actions/workflows/sphinx-build.yml/badge.svg :target: https://calad0i.github.io/HGQ/ .. image:: https://badge.fury.io/py/hgq.svg diff --git a/docs/api/configuration.rst b/docs/api/configuration.rst index 5266d93ef6..1c0e1f644e 100644 --- a/docs/api/configuration.rst +++ b/docs/api/configuration.rst @@ -73,7 +73,10 @@ for automatic setting of precisions. The layer-level precisions with the ``'nam (see :ref:`Automatic precision inference`). Note that layer-level settings take precedence over model-level settings. A ``'name'`` granularity is required for QKeras and QONNX model parsing. Passing the backend to these functions is recommended because some configuration options depend on the backend. See :py:class:`~hls4ml.utils.config.config_from_keras_model` and similar for more information on the various options. Note specifically the documentation of :py:class:`~hls4ml.utils.config.config_from_pytorch_model` on how to handle differences in input data -formats between pytorch and keras (hls4ml follows keras conventions internally). Note that passing precision configurations for HGQ/HGQ2 models is not needed in general, and **should not be done** without understanding the implications. +formats between pytorch and keras (hls4ml follows keras conventions internally). + +.. warning:: + Note that passing precision configurations when invoking the full model precision propagation (by default for HGQ/HGQ2 models, or when `bit_exact=True` is set for other frontends) is **not needed** and **should not be done** without understanding the implications. One can override specific values before using the configuration: diff --git a/docs/conf.py b/docs/conf.py index e4d7f399c1..b91f3d8119 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -70,6 +70,7 @@ def get_pypi_version(package, url_pattern=URL_PATTERN): 'sphinx.ext.napoleon', 'sphinx_contributors', 'sphinx_github_changelog', + 'sphinx_tabs.tabs', ] # Note: to build locally, you will need to set the SPHINX_GITHUB_CHANGELOG_TOKEN @@ -103,7 +104,7 @@ def get_pypi_version(package, url_pattern=URL_PATTERN): html_theme_options = { 'canonical_url': '', - 'analytics_id': '', # Provided by Google in your dashboard + 'analytics_id': '', # Provided by Google in your dashboard 'logo_only': True, 'display_version': True, 'prev_next_buttons_location': 'bottom', diff --git a/docs/frontend/keras.rst b/docs/frontend/keras.rst index e5c1a77722..cd98ce0b3f 100644 --- a/docs/frontend/keras.rst +++ b/docs/frontend/keras.rst @@ -18,6 +18,6 @@ The ``data_format='channels_first'`` parameter of Keras layers is supported for * `QKeras `_ The equivalent QKeras API and its quantizers are also supported by ``hls4ml``. QKeras is not compatible with Keras v3. * `HGQ `_ - The equivalent HGQ API is also supported. + The equivalent HGQ API is also supported. Still maintained but deprecated in favor of `HGQ2 <../hgq2.html>`_. * `HGQ2 `_ The equivalent HGQ2 API is also supported, plus some additional advanced operators. diff --git a/docs/intro/setup.rst b/docs/intro/setup.rst index 2b5b55b07a..ad534cdb49 100644 --- a/docs/intro/setup.rst +++ b/docs/intro/setup.rst @@ -57,19 +57,19 @@ The following Python packages are all optional and are only required if you inte * `Brevitas `_: Based on PyTorch. See `frontend/pytorch <../frontend/pytorch.html>`_ for more details. * `QONNX `_: Based on ONNX. See `frontend/onnx <../frontend/onnx.html>`_ for more details. -Running C simulation from Python requires a C++11-compatible compiler. On Linux, a GCC C++ compiler ``g++`` is required. Any version from a recent Linux should work. On MacOS, the *clang*-based ``g++`` is enough. For the oneAPI backend, one must have oneAPI installed, along with the FPGA compiler, to run C/SYCL simulations. +Running C simulation from Python requires a C++11-compatible compiler. On Linux, a GCC C++ compiler ``g++`` is required. Any version from a recent Linux should work. On MacOS, the *clang*-based ``g++`` is enough. For the oneAPI backend, one must have `oneAPI=2025.0` (2025.1 is known **not to work**) installed, along with the FPGA compiler, to run C/SYCL simulations. Specific functionalities may need additional Python packages. If any needed is missing, ``hls4ml`` will raise an error and prompt you to install the missing packages. To run FPGA synthesis, installation of following tools is required: -* Xilinx Vivado HLS 2018.2 to 2020.1 for synthesis for Xilinx FPGAs using the ``Vivado`` backend. +* Xilinx Vivado HLS 2020.1 for synthesis for Xilinx FPGAs using the ``Vivado`` backend. Older versions may work, but use at your own risk. * Vitis HLS 2022.2 or newer is required for synthesis for Xilinx FPGAs using the ``Vitis`` backend. * Intel Quartus 20.1 to 21.4 for the synthesis for Intel/Altera FPGAs using the ``Quartus`` backend. -* oneAPI 2024.1 to 2025.0 with the FPGA compiler and recent Intel/Altera Quartus for Intel/Altera FPGAs using the ``oneAPI`` backend. +* oneAPI 2024.1 to 2025.0 with the FPGA compiler and recent Intel/Altera Quartus for Intel/Altera FPGAs using the ``oneAPI`` backend. Newer versions are known **not to work**. Catapult HLS 2024.1_1 or 2024.2 can be used to synthesize both for ASICs and FPGAs. diff --git a/docs/intro/status.rst b/docs/intro/status.rst index 75ece1897e..d9931e6df6 100644 --- a/docs/intro/status.rst +++ b/docs/intro/status.rst @@ -51,7 +51,9 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below. +-----------------------+-----+-----+--------------+--------+--------+-----+ | Frontend/Backend | MLP | CNN | RNN/LSTM/GRU | GarNet | Einsum | MHA | +=======================+=====+=====+==============+========+========+=====+ -| Keras v2/QKeras | ✅ | ✅ | ✅ | ✅ | N/A | ❌ | +| Keras v2 | ✅ | ✅ | ✅ | ✅ | ❌ | ❌ | ++-----------------------+-----+-----+--------------+--------+--------+-----+ +| QKeras | ✅ | ✅ | ✅ | ✅ | N/A | N/A | +-----------------------+-----+-----+--------------+--------+--------+-----+ | HGQ | ✅ | ✅ | N/A | N/A | N/A | N/A | +-----------------------+-----+-----+--------------+--------+--------+-----+ @@ -63,7 +65,7 @@ A summary of the on-going status of the ``hls4ml`` tool is in the table below. +-----------------------+-----+-----+--------------+--------+--------+-----+ | ONNX | ✅ | ✅ | ❌ | ❌ | ❌ | ❌ | +-----------------------+-----+-----+--------------+--------+--------+-----+ -| QONNX | ✅ | ✅ | N/A | N/A | N/A | N/A | +| QONNX | ✅ | ✅ | ❌ | N/A | N/A | N/A | +-----------------------+-----+-----+--------------+--------+--------+-----+ | Vivado/Vitis HLS | ✅ | ✅ | ✅ | ❌ | ✅ | ✅ | +-----------------------+-----+-----+--------------+--------+--------+-----+ @@ -78,11 +80,11 @@ Other feature notes: * ``hls4ml`` is tested on the following platforms. Newer versions might work just fine, but try at your own risk. - - Vivado HLS versions 2018.2 to 2020.1 + - Vivado HLS 2020.1. Older versions may work, but use at your own risk. - Intel HLS versions 20.1 to 21.4, versions > 21.4 have not been tested. - - Vitis HLS versions 2022.2 to 2024.1. Versions <= 2022.1 are known not to work. + - Vitis HLS versions 2022.2 to 2024.1. Versions > 2024.1 are less tested. - Catapult HLS versions 2024.1_1 to 2024.2 - - oneAPI versions 2024.1 to 2025.0. 2025.1 is known to not work. + - oneAPI versions 2024.1 to 2025.0. Any future versions are known to not work. * ``hls4ml`` supports Linux [*]_ and requires python >=3.10. hls4ml does not require a specific Linux distribution version and we recommend following the requirements of the HLS tool you are using. * Windows and macOS are not supported. Setting up ``hls4ml`` on these platforms, for example using the Windows Subsystem for Linux (WSL), should be possible, but we do not provide support for such use cases. diff --git a/pyproject.toml b/pyproject.toml index 6f25ffee75..c455249bea 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ optional-dependencies.doc = [ "sphinx-contributors", "sphinx-github-changelog", "sphinx-rtd-theme", + "sphinx-tabs", ] optional-dependencies.hgq = [ "hgq>=0.2.3" ] optional-dependencies.hgq2 = [ "hgq2>=0.0.1" ] From f8123bbf052b37d1b3dedb94faf2a3bbe0e2a0db Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Mon, 18 Aug 2025 12:26:19 -0700 Subject: [PATCH 3/8] why there is another requirements.txt? --- docs/requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/requirements.txt b/docs/requirements.txt index fe3c4f2544..77a36ef399 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,6 +1,7 @@ . setuptools_scm[toml]>=5 sphinx>=3.2.1 +sphinx-tabs sphinx_contributors sphinx_github_changelog sphinx_rtd_theme From a89f41c5c04d0be6be7fdbef9fa1ce7af517e44c Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Tue, 2 Sep 2025 08:32:28 -0700 Subject: [PATCH 4/8] simpler example model --- docs/intro/setup.rst | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/docs/intro/setup.rst b/docs/intro/setup.rst index ad534cdb49..f89d5faace 100644 --- a/docs/intro/setup.rst +++ b/docs/intro/setup.rst @@ -88,10 +88,8 @@ Here we give line-by-line instructions to demonstrate the general workflow. # Construct a basic keras model model = Sequential() - model.add(Dense(64, input_shape=(16,))) - model.add(Activation(activation='relu')) - model.add(Dense(32)) - model.add(Activation(activation='relu')) + model.add(Dense(64, input_shape=(16,), activation='relu')) + model.add(Dense(32, activation='relu')) # This is where you would train the model in a real-world scenario From e8d1e7758ae81b7e697e452216d347a9e95f9c98 Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Tue, 9 Sep 2025 06:25:59 -0700 Subject: [PATCH 5/8] smaller workflow file --- docs/advanced/_static/da4ml-workflow.svg | 4464 +--------------------- 1 file changed, 1 insertion(+), 4463 deletions(-) diff --git a/docs/advanced/_static/da4ml-workflow.svg b/docs/advanced/_static/da4ml-workflow.svg index 392b7a2e0a..ae1226979c 100644 --- a/docs/advanced/_static/da4ml-workflow.svg +++ b/docs/advanced/_static/da4ml-workflow.svg @@ -1,4463 +1 @@ - - - -FFPGAx0x0x1x1x2x2x3x3y - -0y0y - -1y1y - -2y2y - -3y3<< 1<< 1<< 1<< 1da111111112211-1-1-2-211-1-1-1-11111-2-22211x0x0x1x1x2x2x3x3ml4Text is not SVG - cannot displayCMVM Problems(or other frontend)Optimized adder trees111111112211-1-1-2111-1-1-1-11111-2-22211x0x0x1x1x2x2x3x3C+ +bitstream +C+ +bitstreamFPGAx0x1x2x3y0y1y2y3<< 1<< 1da111121-1-21-1-111-221x0x1x2x3ml4CMVM Problems(or other frontend)Optimized adder trees111121-111-1-111-221x0x1x2x3 From fd7d1e8fc544264d7ad61d83b9e972890289a4f9 Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Tue, 9 Sep 2025 06:51:00 -0700 Subject: [PATCH 6/8] model-wise desc update --- docs/advanced/auto.rst | 3 +++ docs/advanced/hgq.rst | 2 +- docs/advanced/precision.rst | 17 +++++++++++++---- 3 files changed, 17 insertions(+), 5 deletions(-) diff --git a/docs/advanced/auto.rst b/docs/advanced/auto.rst index f944a11e54..70ada44e35 100644 --- a/docs/advanced/auto.rst +++ b/docs/advanced/auto.rst @@ -20,3 +20,6 @@ inference will never set a bitwdith larger than the bitwidth of the ``max_precis When manually setting bitdwidths, the accumulator can overflow, and the precision may need to be reduced. For the accumulator, it is usually a bad idea to explicitly enable rounding or saturation modes since it dramatically increases the execution time. For other types (e.g. output types or weight types), however, rounding and saturation handling can be enabled as needed. + +.. note:: + For supported models (Most ``HGQ/HGQ2`` models and some ``QKeras`` models), Model-wise Precision Inference (documented in `model-wise precision inference <../precision.html>`_) can be used to achieve bit-exact conversion. Please refer to that section for more details. diff --git a/docs/advanced/hgq.rst b/docs/advanced/hgq.rst index de218b351e..f2f4ad936d 100644 --- a/docs/advanced/hgq.rst +++ b/docs/advanced/hgq.rst @@ -25,7 +25,7 @@ HGQ2 implements an gradient-based automatic bitwidth optimization and quantizati :width: 600 Key Features ------------ +------------ - **Multi-backend support**: Works with TensorFlow, JAX, and PyTorch through Keras v3 - **Flexible quantization**: Supports different quantization schemes including fixed-point and minifloat diff --git a/docs/advanced/precision.rst b/docs/advanced/precision.rst index 622e20d60b..afec135748 100644 --- a/docs/advanced/precision.rst +++ b/docs/advanced/precision.rst @@ -2,13 +2,22 @@ Model-wise Precision Inference ============================== -The model-wise precision inference (implemented in :py:class:`~hls4ml.model.optimizer.passes.bit_exact.BitExact`) attempts to infer the appropriate for **all** precisions in the model. Unlike the automatic precision inference, this pass disregards all user-defined precisions, and "trust" only data embedded in the model, i.e., the actual values of the weights and explicit quantizers defined between layers. +The model-wise precision inference (implemented in :py:class:`~hls4ml.model.optimizer.passes.bit_exact.BitExact`) attempts to infer the appropriate configuration for **all** precision in the model. Unlike the automatic precision inference, this pass disregards all user-defined precision, and "trust" only data embedded in the model, i.e., the actual values of the weights and explicit quantizers defined between layers. -Currently, this pass will only be triggered by the presence of any ``FixedPointQuantizer`` (explicit quantizer operator) layer in the model. This pass uses an modified symbolic interval arithmetic to compute the ranges and needed quantization steps for all precisions in the model graph, with the goal of eliminating the discrepency between the quantized model and the original model. Currently, only HGQ/HGQ2 models will produce such quantizers, and the pass will not be triggered for models from other frontends. +This pass uses a modified symbolic interval arithmetic to compute the ranges and the needed quantization steps for all precision in the model graph, with the goal of eliminating any discrepancy between the quantized model and the original model. In the inference process, only the raw weight values and the explicit quantizers (either ``FixedPointQuantizer``, or ``linear/relu`` layers with ``trusted=True``) are considered as sources of precision information. All other precision information (e.g., user-defined precision in ``config_from_*`` functions) will not be used in the inference process. -If the original model is not properly quantized, this pass will lead to huge bitwidths in the model. In this context, properly quantized models are those that have quantizers defined between **all layers with non-trivial arithmetics**. Importantly, quantizers **should be used immediately after the inputs**, or the input precision may not be properly inferred. The successful application of this pass should result in bit-exact model, i.e., the quantized model should produce the same outputs as the original model for all inputs [*]_. +Invoking of this pass is configured by the ``bit_exact`` key in the backend configuration (default: ``None``). There are two ways to enable this pass: +- When converting from ``HGQ/HGQ2`` models, this pass is automatically enabled unless ``bit_exact`` is explicitly set to ``False``. +- For other models, this pass can be enabled by setting ``bit_exact`` to ``True``. Currently, only ``QKeras`` sets this key automatically when converting from ``QKeras`` models. Support for ``QONNX`` is planned but not yet implemented. + +If the original model is not properly quantized, this pass will lead to huge bitwidths in the model. In this context, properly quantized models are those that have quantizers defined between **all layers with non-trivial arithmetics**. The successful application of this pass should result in bit-exact model, i.e., the quantized model should produce the same outputs as the original model for all inputs [*]_. + +Not all operator types are supported in this pass. If any unsupported operator is encountered during the inference, this pass will **crash** the conversion process to prevent silent failures. Please consider use `automatic precision inference <../auto.html>`_ if your model contains unsupported operators or unquantized components. + +.. warning:: + Importantly, quantizers **should be used immediately after the inputs**, or the input precision may not be properly inferred. If you are using ``HGQ/HGQ2``, this is automatically taken care of in most cases. If you are using ``QKeras``, make sure to put a ``QActivation`` with ``quantized_bits`` right after the input layer such that the input precision can be derived. .. [*] While quantized, the original model will still operate on float-point values, so there is a chance that the outputs will not be exactly the same due to float rounding errors in the original model. .. note:: - Unlike the automatic precision inference, it is strongly recommended to **not** use the ``config_from_*`` functions to set the precisions in the model. + Unlike the automatic precision inference, it is strongly recommended to **not** use the ``config_from_*`` functions to set the precisions in the model. Automatic precision inference and this pass cannot be used simultaneously. From 3ddf275bd9f55541f56221784006e0685c62edee Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Tue, 9 Sep 2025 13:46:45 -0700 Subject: [PATCH 7/8] fix broken hyperlink --- docs/frontend/keras.rst | 2 +- docs/ir/multimodelgraph.rst | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/docs/frontend/keras.rst b/docs/frontend/keras.rst index cd98ce0b3f..71a18d36e8 100644 --- a/docs/frontend/keras.rst +++ b/docs/frontend/keras.rst @@ -10,7 +10,7 @@ For Keras v3, the support for EinsumDense layer is added. For HGQ2, the followin keras `Operators` that are not layers are generally not supported in ``hls4ml``. This includes operators such as `Add`, `Subtract`, `Multiply`, and `Divide`. Please use the corresponding Keras layers instead. -Arbitrary ``Lambda`` layers are not, and are not planned to be supported in ``hls4ml`` due to the difficultness to parse generic lambda expression. For custom operations required, please refer to the :ref:`Extension API ` documentation to add custom layers to the conversion process. +Arbitrary ``Lambda`` layers are not, and are not planned to be supported in ``hls4ml`` due to the difficultness to parse generic lambda expression. For custom operations required, please refer to the :ref:`Extension API` documentation to add custom layers to the conversion process. The ``data_format='channels_first'`` parameter of Keras layers is supported for a limited subset of layers and it is not extensively tested. All HLS implementations in ``hls4ml`` are based on ``channels_last`` data format convention and need to be converted to that format before the HLS code can be emitted. We encourage users of ``channels_first`` to report their experiences to developers on GitHub. diff --git a/docs/ir/multimodelgraph.rst b/docs/ir/multimodelgraph.rst index 347b3a5d15..d743853764 100644 --- a/docs/ir/multimodelgraph.rst +++ b/docs/ir/multimodelgraph.rst @@ -42,7 +42,6 @@ Key Methods for MultiModelGraph * :ref:`compile ` * :ref:`predict ` * :ref:`build ` -* :ref:`trace ` ---- From 76a4b78cc603357101edfdd856de685ee74cf162 Mon Sep 17 00:00:00 2001 From: Chang Sun Date: Tue, 9 Sep 2025 13:51:22 -0700 Subject: [PATCH 8/8] rst syntax fix with white spaces --- docs/advanced/extension.rst | 60 +++++++++++++++++++------------------ docs/intro/status.rst | 4 +++ 2 files changed, 35 insertions(+), 29 deletions(-) diff --git a/docs/advanced/extension.rst b/docs/advanced/extension.rst index c357f4c6d2..0854b59431 100644 --- a/docs/advanced/extension.rst +++ b/docs/advanced/extension.rst @@ -60,40 +60,42 @@ In this case, there a single output with the same shape as the input. .. tabs:: .. tab:: Keras v2 - .. code-block:: Python - # Parser for converter - def parse_reverse_layer(keras_layer, input_names, input_shapes, data_reader): - layer = {} - layer['class_name'] = 'KReverse' - layer['name'] = keras_layer['config']['name'] - layer['n_in'] = input_shapes[0][1] + .. code-block:: Python - if input_names is not None: - layer['inputs'] = input_names + # Parser for converter + def parse_reverse_layer(keras_layer, input_names, input_shapes, data_reader): + layer = {} + layer['class_name'] = 'KReverse' + layer['name'] = keras_layer['config']['name'] + layer['n_in'] = input_shapes[0][1] - return layer, [shape for shape in input_shapes[0]] + if input_names is not None: + layer['inputs'] = input_names + + return layer, [shape for shape in input_shapes[0]] .. tab:: Keras v3 - .. code-block:: Python - - from hls4ml.converters.keras_v3._base import register, KerasV3LayerHandler - - @register - class KReverseHandler(KerasV3LayerHandler): - '''Keras v3 layer handler for KReverse''' - - handles = ('KReverse',) - def handle( - self, - layer: 'keras.Layer', - in_tensors: Sequence['KerasTensor'], - out_tensors: Sequence['KerasTensor'], - ) -> dict[str, Any] | tuple[dict[str, Any], ...]: - # Only layer-specific parameters are needed. - # Common parameters are automatically added in the base class. - assert len(in_tensors[0].shape) == 2, 'KReverse is only supported for 2D tensors' - return {'n_in': in_tensors[0].shape[-1]} + + .. code-block:: Python + + from hls4ml.converters.keras_v3._base import register, KerasV3LayerHandler + + @register + class KReverseHandler(KerasV3LayerHandler): + '''Keras v3 layer handler for KReverse''' + + handles = ('KReverse',) + def handle( + self, + layer: 'keras.Layer', + in_tensors: Sequence['KerasTensor'], + out_tensors: Sequence['KerasTensor'], + ) -> dict[str, Any] | tuple[dict[str, Any], ...]: + # Only layer-specific parameters are needed. + # Common parameters are automatically added in the base class. + assert len(in_tensors[0].shape) == 2, 'KReverse is only supported for 2D tensors' + return {'n_in': in_tensors[0].shape[-1]} Next, we need the actual HLS implementaton of the function, which can be written in a header file ``nnet_reverse.h``. diff --git a/docs/intro/status.rst b/docs/intro/status.rst index d9931e6df6..7966903a4b 100644 --- a/docs/intro/status.rst +++ b/docs/intro/status.rst @@ -18,13 +18,17 @@ A list of supported ML frameworks (Frontends), HLS backends, and neural network Frontend support: * Keras + * Keras v2 + * QKeras * HGQ * Keras v3 + * HGQ2 * PyTorch * ONNX + * QONNX Neural network architectures: