diff --git a/.gitignore b/.gitignore index 22c8ff685b..8151cd3af9 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,7 @@ sdist/ *.egg-info/ vivado_prj .vscode +.idea my-hls-test *.tar.gz docs/_build @@ -14,3 +15,6 @@ docs/autodoc/* hls4mlprj_* *~ *.ipynb_checkpoints/ + +test/pytest/test_backend/input_file/* +test/pytest/test_backend/output_file/* diff --git a/hls4ml/backends/__init__.py b/hls4ml/backends/__init__.py index 4a48f072cd..3bb8aa7c6d 100644 --- a/hls4ml/backends/__init__.py +++ b/hls4ml/backends/__init__.py @@ -11,9 +11,13 @@ from hls4ml.backends.vitis.vitis_backend import VitisBackend # isort: skip +from hls4ml.backends.vitis_unified.vitis_unified_backend import VitisUnifiedBackend # isort: skip + + register_backend('Vivado', VivadoBackend) register_backend('VivadoAccelerator', VivadoAcceleratorBackend) register_backend('Vitis', VitisBackend) +register_backend('VitisUnified', VitisUnifiedBackend) register_backend('Quartus', QuartusBackend) register_backend('Catapult', CatapultBackend) register_backend('SymbolicExpression', SymbolicExpressionBackend) diff --git a/hls4ml/backends/vitis_unified/passes/fifo_depth_optimization.py b/hls4ml/backends/vitis_unified/passes/fifo_depth_optimization.py new file mode 100644 index 0000000000..0451270ca1 --- /dev/null +++ b/hls4ml/backends/vitis_unified/passes/fifo_depth_optimization.py @@ -0,0 +1,113 @@ +# we inherit it from vitis +import zipfile + +from hls4ml.backends.vitis.passes.fifo_depth_optimization import ( + generate_depths_file, + initialize_large_fifos, + set_optimized_fifo_depths, +) +from hls4ml.model.optimizer.optimizer import ConfigurableOptimizerPass, ModelOptimizerPass + + +def get_vitis_optimized_fifo_depths(model, cus_hls_prj_path=None): + """Parse the files generated by the co-simulation to retrieve the optimized depths for the FIFOs. + Attention, only the FIFOs between the layers are profiled! + + Args: + model (ModelGraph): The model to which FIFO depth optimization is applied. + + Returns: + Dict[str, int]: A dictionary that contains the FIFO names as keys and the optimized depths as values. + """ + # channel.zip is generated after the co-simulation and contains the chan_status*.csv files + # in the chan_status*.csv files the max depth achieved during co-simulation can be found at the last (4th) line + + if cus_hls_prj_path is None: + cus_hls_prj_path = model.config.get_output_dir() + '/' + model.config.get_project_name() + '/_prj/solution1' + + path_to_zip_file = cus_hls_prj_path + '/.autopilot/db/channel_depth_info/' + + with zipfile.ZipFile(f'{path_to_zip_file}channel.zip', 'r') as zip_ref: + zip_ref.extractall(path_to_zip_file) + + # the channel_info.csv file contains the mapping of each fifo name (i.e layer4_out_U) to the respective + # chan_status*.csv file + names_file_path = cus_hls_prj_path + '/.autopilot/db/channel_info.csv' + + csv_fifo_depth_files = {} + with open(names_file_path) as names_file: + for _ in range(4): + next(names_file) + for line in names_file: + layer_name = line.split(',')[1] + csv_file_name = line.split(',')[3][:-1] + csv_fifo_depth_files[layer_name] = csv_file_name + + optmized_fifo_depths = {} + for layer_name, file_name in csv_fifo_depth_files.items(): + with open(path_to_zip_file + file_name) as chan_status_file: + lines = chan_status_file.readlines() + optmized_fifo_depths[layer_name[:-4]] = int( + lines[-1] + ) # remove "_i_U" from the layer name string and keep the last line of the file that contains the max depth + + return optmized_fifo_depths + + +def execute_cosim_to_profile_fifos(model): + model.write() + model.build( + reset=False, + csim=False, + synth=True, + cosim=False, + validation=False, + export=False, + vsynth=False, + fifo_opt=True, + bitfile=False, + log_to_stdout=False, + ) + + +class FifoDepthOptimization(ConfigurableOptimizerPass, ModelOptimizerPass): + + def __init__(self): + self.profiling_fifo_depth = 100_000 + + def transform(self, model): + """Perform FIFO depth optimization between the FIFOs of all layers to reduce resource utilization as the + initial FIFOs set by hls4ml might be larger than required. At the end of the optimization the FIFOs will + have the largest depths achieved during co-simulation without causing any deadlocks between the layers + (producer-consumer), thus no additional delays between the layers. In some cases, this optimization + might lead to bigger FIFOs than initially set by the hls4ml tool in order to prevent deadlocks. + + Args: + model (ModelGraph): The model to which FIFO depth optimization is applied. + + Raises: + ValueError: If the FIFO depth for profiling provided by the user is not a non-negative integer. + RuntimeError: If the IO type is not set to "io_stream". + + Returns: + bool: The execution state of the Optimizer Pass + """ + + if not isinstance(self.profiling_fifo_depth, int) or self.profiling_fifo_depth <= 0: + raise ValueError('The FIFO depth for profiling (profiling_fifo_depth variable) must be a non-negative integer.') + + # check axi-stream or io-stream + if not (model.config.get_config_value('IOType') == 'io_stream'): + raise RuntimeError('To use this optimization you have to set `IOType` field to `io_stream` in the HLS config.') + + hlsPrjPath = model.config.backend.writer.mg.get_vitis_hls_exec_dir(model) + + initial_fifo_depths = initialize_large_fifos(model, self.profiling_fifo_depth) + execute_cosim_to_profile_fifos(model) + optimized_fifo_depths = get_vitis_optimized_fifo_depths(model, cus_hls_prj_path=hlsPrjPath + "/hls") + generate_depths_file(model, initial_fifo_depths, optimized_fifo_depths) + set_optimized_fifo_depths(model, optimized_fifo_depths) + + print('FIFO optimization completed') + + return False diff --git a/hls4ml/backends/vitis_unified/vitis_unified_backend.py b/hls4ml/backends/vitis_unified/vitis_unified_backend.py new file mode 100644 index 0000000000..8bfed0f88a --- /dev/null +++ b/hls4ml/backends/vitis_unified/vitis_unified_backend.py @@ -0,0 +1,181 @@ +import os +import subprocess +import sys +from shutil import copy2 + +from hls4ml.backends import VitisBackend, VivadoBackend +from hls4ml.model.flow import register_flow +from hls4ml.writer.vitis_unified_writer.meta_gen import VitisUnified_MetaGen as mg + + +class VitisUnifiedBackend(VitisBackend): + def __init__(self): + super(VivadoBackend, self).__init__(name='VitisUnified') + self._register_layer_attributes() + self._register_flows() + + def run_term_command(self, model, taskName: str, command: str, logStdOut: bool, cwd): + + print("-------------------------------------------------------") + print(f"start running task : {taskName}") + print(f" with command: {command}") + print("-------------------------------------------------------") + + output_dir = model.config.get_output_dir() + + out_log_path = os.path.join(output_dir, f'{taskName}_out.log') + err_log_path = os.path.join(output_dir, f'{taskName}_err.log') + out_target = None if logStdOut else open(out_log_path, 'w') + err_target = None if logStdOut else open(err_log_path, 'w') + + try: + runningProcess = subprocess.Popen(command, shell=True, cwd=cwd, stdout=out_target, stderr=err_target, text=True) + runningProcess.communicate() + if runningProcess.returncode != 0: + raise Exception( + f'Package failed for {taskName} for project {model.config.get_project_name()}. See logs for details.' + ) + + stdout, stderr = runningProcess.communicate() + print(f"stdout: {stdout}") + print(f"stderr: {stderr}") + + print(f"task {taskName} finished") + + except Exception as e: + print(f"task {taskName} failed") + print(e) + raise e + finally: + if out_target: + out_target.close() + if err_target: + err_target.close() + + def build( + self, + model, + reset=False, + csim=False, + synth=False, + cosim=False, + validation=False, + export=False, + vsynth=False, + fifo_opt=False, + bitfile=False, + log_to_stdout=True, + ): + # it builds and return vivado reports + if 'linux' in sys.platform: + found = os.system('command -v vitis > /dev/null') + if found != 0: + raise Exception('Vitis installation not found. Make sure "vitis" is on PATH.') + + if csim: + raise Exception("Current Vitis Unified not support csim. Please set csim=False to run Vitis Unified.") + if validation: + raise Exception( + "Current Vitis Unified not support validation. Please set validation=False to run Vitis Unified." + ) + if export: + raise Exception("Current Vitis Unified not support export. Please set export=False to run Vitis Unified.") + + output_dir = model.config.get_output_dir() + + hls_config_file = os.path.join(output_dir, "hls_kernel_config.cfg") + # build command + csynth_cmd = ("v++ -c --mode hls --config {configPath} --work_dir unifiedPrj").format(configPath=hls_config_file) + csynth_cwd = mg.get_vitis_hls_dir(model) + + # util template (used in csim/cosim/package) + util_command = "vitis-run --mode hls --{op} --config {configPath} --work_dir unifiedPrj" + + # command for each configuration + + package_cmd = util_command.format(op="package", configPath=hls_config_file) + package_cwd = mg.get_vitis_hls_dir(model) + cosim_cmd = util_command.format(op="cosim", configPath=hls_config_file) + cosim_cwd = mg.get_vitis_hls_dir(model) + csim_cmd = util_command.format(op="csim", configPath=hls_config_file) + csim_cwd = mg.get_vitis_hls_dir(model) + + kerlink_cmd = "./buildAcc.sh" + kerlink_cwd = mg.get_vitis_linker_dir(model) + + if synth: + self.prepare_sim_config_file(model, True) + self.run_term_command(model, "csynth", csynth_cmd, log_to_stdout, csynth_cwd) + self.run_term_command(model, "package", package_cmd, log_to_stdout, package_cwd) + + if csim: + self.prepare_sim_config_file(model, True) + self.run_term_command(model, "csim", csim_cmd, log_to_stdout, csim_cwd) + + if cosim or fifo_opt: + self.prepare_sim_config_file(model, False) + self.run_term_command(model, "cosim", cosim_cmd, log_to_stdout, cosim_cwd) + + # if bitfile + if bitfile: + self.run_term_command(model, "kerlink", kerlink_cmd, log_to_stdout, kerlink_cwd) + + def prepare_sim_config_file(self, model, is_csim): + suffix = "csim" if is_csim else "cosim" + src = f"{model.config.get_output_dir()}/hls_kernel_config_{suffix}.cfg" + des = f"{model.config.get_output_dir()}/hls_kernel_config.cfg" + copy2(src, des) + return des + + def create_initial_config( + self, + board='zcu102', + part=None, + clock_period=5, + clock_uncertainty='12.5%', + io_type='io_stream', + driver='python', + input_type='float', + output_type='float', + in_stream_buf_size=128, + out_stream_buf_size=128, + xpfmPath='/opt/Xilinx/Vitis/2023.2/base_platforms/' 'xilinx_zcu102_base_202320_1/xilinx_zcu102_base_202320_1.xpfm', + **_, + ): + + config = super().create_initial_config(part, clock_period, clock_uncertainty, io_type) + + config['UnifiedConfig'] = {} + config['UnifiedConfig']["in_stream_buf_Size"] = in_stream_buf_size + config['UnifiedConfig']["out_stream_buf_Size"] = out_stream_buf_size + config['UnifiedConfig']['XPFMPath'] = xpfmPath + config['UnifiedConfig']['Board'] = board + config['UnifiedConfig']['Driver'] = driver + config['UnifiedConfig']['InputDtype'] = input_type # float, double or ap_fixed + config['UnifiedConfig']['OutputDtype'] = output_type # float, double or ap_fixed + + if io_type != "io_stream": + raise Exception("io_type must be io_stream") + if input_type not in ["double", "float"]: + raise Exception("input_type must be float or double") + if output_type not in ["double", "float"]: + raise Exception("output_type must be float or double") + + return config + + def get_default_flow(self): + return self._default_flow + + def get_writer_flow(self): + return self._writer_flow + + def _register_flows(self): + vitis_ip = 'vitis:ip' + writer_passes = ['make_stamp', 'vitisunified:write_hls'] + self._writer_flow = register_flow('write', writer_passes, requires=['vitis:ip'], backend=self.name) + self._default_flow = vitis_ip + + # register fifo depth optimization + fifo_depth_opt_passes = ['vitisunified:fifo_depth_optimization'] + writer_passes + + register_flow('fifo_depth_optimization', fifo_depth_opt_passes, requires=['vitis:ip'], backend=self.name) diff --git a/hls4ml/backends/vitis_unified/vitis_unified_config.py b/hls4ml/backends/vitis_unified/vitis_unified_config.py new file mode 100644 index 0000000000..8f3289931e --- /dev/null +++ b/hls4ml/backends/vitis_unified/vitis_unified_config.py @@ -0,0 +1,49 @@ +class VitisUnifiedConfig: + + def __init__(self, config, model_inputs, model_outputs): + self.config = config.config + self.board = self.config.get('UnifiedConfig', {}).get('Board', 'pynq-z2') + + # before first and afterlast layer we have the configuratble buffer + # [platform]<-->[in_steram_bufferSz]<-->[hls]<-->[out_stream_bufferSz]<-->[platform] + self.in_steram_bufferSz = self.config["UnifiedConfig"]["in_stream_buf_Size"] + self.out_stream_bufferSz = self.config["UnifiedConfig"]["out_stream_buf_Size"] + + # the path to the generated platform + self.XPFMPath = self.config["UnifiedConfig"]["XPFMPath"] + + self.driver = self.config['UnifiedConfig']['Driver'] + + # c++ type for input and output of the hls kernel it must be str (float/double) + self.input_type = self.config['UnifiedConfig']['InputDtype'] + self.output_type = self.config['UnifiedConfig']['OutputDtype'] + + assert self.input_type == self.output_type, "Input and Output data types must be the same type different" + assert len(model_inputs) >= 1, "Only models with at least one input tensor are currently supported by VitisUnified" + assert len(model_outputs) >= 1, "Only models with one output tensor are currently supported by VitisUnified" + self.inps = model_inputs.copy() + self.outs = model_outputs.copy() + + def get_corrected_types(self): + return self.input_type, self.output_type, self.inps, self.outs + + def get_driver(self): + return self.driver + + def get_board(self): + return self.board + + def get_input_type(self): + return self.input_type + + def get_output_type(self): + return self.output_type + + def get_in_stream_bufferSz(self): + return self.in_steram_bufferSz + + def get_out_stream_bufferSz(self): + return self.out_stream_bufferSz + + def get_XPFMPath(self): + return self.XPFMPath diff --git a/hls4ml/templates/vitis_unified/build_lib.sh b/hls4ml/templates/vitis_unified/build_lib.sh new file mode 100644 index 0000000000..2645804f90 --- /dev/null +++ b/hls4ml/templates/vitis_unified/build_lib.sh @@ -0,0 +1,31 @@ +#!/bin/bash + +CC=g++ +if [[ "$OSTYPE" == "linux-gnu" ]]; then + CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" +elif [[ "$OSTYPE" == "darwin"* ]]; then + CFLAGS="-O3 -fPIC -std=c++11" +fi +VITIS_UNIFIED_FLAGS="VITIS_UNIFIED" +CFLAGS="$CFLAGS -D$VITIS_UNIFIED_FLAGS" + +INCFLAGS="-Ifirmware/ap_types/" + +PROJECT=myprojectBaseName +WRAPPER_NAME=myprojectWrapName +LIB_STAMP=mystamp +BASEDIR="$(cd "$(dirname "$0")" && pwd)" +WEIGHTS_DIR="\"${BASEDIR}/firmware/weights\"" + +echo "------------- This is build_lib.sh debug message ----------------" +echo "Compiling for OSTYPE: $OSTYPE" +echo "CFLAGS: $CFLAGS" +echo "Include Flags: $INCFLAGS" +echo "Weights directory: $WEIGHTS_DIR" +echo "-----------------------------------------------------------------" + +${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${PROJECT}.cpp -o ${PROJECT}.o +${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c firmware/${WRAPPER_NAME}.cpp -o ${WRAPPER_NAME}.o +${CC} ${CFLAGS} ${INCFLAGS} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c ${PROJECT}_bridge.cpp -o ${PROJECT}_bridge.o +${CC} ${CFLAGS} ${INCFLAGS} -shared ${PROJECT}.o ${WRAPPER_NAME}.o ${PROJECT}_bridge.o -o firmware/${PROJECT}-${LIB_STAMP}.so +rm -f *.o diff --git a/hls4ml/templates/vitis_unified/build_lib_multigraph.sh b/hls4ml/templates/vitis_unified/build_lib_multigraph.sh new file mode 100644 index 0000000000..9dcd85f7d1 --- /dev/null +++ b/hls4ml/templates/vitis_unified/build_lib_multigraph.sh @@ -0,0 +1,69 @@ +#!/bin/bash +set -e + +CC=g++ +if [[ "$OSTYPE" == "linux-gnu" ]]; then + CFLAGS="-O3 -fPIC -std=c++11 -fno-gnu-unique" +elif [[ "$OSTYPE" == "darwin"* ]]; then + CFLAGS="-O3 -fPIC -std=c++11" +fi + +graph_project_names=(mygraph_name_list) + +LDFLAGS= +VITIS_UNIFIED_FLAGS="VITIS_UNIFIED" +CFLAGS="$CFLAGS -D$VITIS_UNIFIED_FLAGS" + +ORIGINAL_PROJECT=myproject +PROJECT=myproject_stitched +LIB_STAMP=mystamp +BASEDIR="$(cd "$(dirname "$0")" && cd .. && pwd)" +INCFLAGS="" +OUTPUT_DIR="${BASEDIR}/stitched/firmware" +WEIGHTS_DIR="\"${BASEDIR}/stitched/firmware/weights\"" + +mkdir -p "${OUTPUT_DIR}" + +# Compile all graphs in parallel +OBJECT_FILES=() +PIDS=() + +for g in "${graph_project_names[@]}"; do + SRC_FILE="${g}/firmware/${ORIGINAL_PROJECT}_${g}.cpp" + OBJ_FILE="${ORIGINAL_PROJECT}_${g}.o" + AP_TYPES_PATH="-I${BASEDIR}/${g}/firmware/ap_types/" + ( + ${CC} ${CFLAGS} ${AP_TYPES_PATH} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c "${BASEDIR}/${SRC_FILE}" -o "${OBJ_FILE}" + ) & + PIDS+=($!) + OBJECT_FILES+=("${OBJ_FILE}") + INCFLAGS+="-I${BASEDIR}/${g}/ " +done + +# compile axi_stream as well + +for g in "${graph_project_names[@]}"; do + SRC_FILE="${g}/firmware/${ORIGINAL_PROJECT}_${g}_axi.cpp" + OBJ_FILE="${ORIGINAL_PROJECT}_${g}_axi.o" + AP_TYPES_PATH="-I${BASEDIR}/${g}/firmware/ap_types/" + ( + ${CC} ${CFLAGS} ${AP_TYPES_PATH} -D WEIGHTS_DIR="${WEIGHTS_DIR}" -c "${BASEDIR}/${SRC_FILE}" -o "${OBJ_FILE}" + ) & + PIDS+=($!) + OBJECT_FILES+=("${OBJ_FILE}") + #INCFLAGS+="-I${BASEDIR}/${g}/ " +done + + + +for pid in "${PIDS[@]}"; do + wait $pid +done + +AP_TYPES_PATH="-I${BASEDIR}/${graph_project_names[@]: -1}/firmware/ap_types/" + +${CC} ${CFLAGS} ${INCFLAGS} ${AP_TYPES_PATH} -c "${PROJECT}_bridge.cpp" -o ${PROJECT}_bridge.o +${CC} ${CFLAGS} ${INCFLAGS} ${AP_TYPES_PATH} -shared "${OBJECT_FILES[@]}" ${PROJECT}_bridge.o -o "${OUTPUT_DIR}/${PROJECT}-${LIB_STAMP}.so" + +rm -f "${OBJECT_FILES[@]}" +rm -f ${PROJECT}_bridge.o diff --git a/hls4ml/templates/vitis_unified/driver/pynq/pynq_driver.py.hls4ml b/hls4ml/templates/vitis_unified/driver/pynq/pynq_driver.py.hls4ml new file mode 100644 index 0000000000..e8c1d0de5c --- /dev/null +++ b/hls4ml/templates/vitis_unified/driver/pynq/pynq_driver.py.hls4ml @@ -0,0 +1,98 @@ +# import the library +import os +import re +import subprocess +import time + +import numpy as np +from pynq import DefaultIP # import the ip connector library for extension +from pynq import Overlay # import the overlay +from pynq import allocate # import for CMA (contingeous memory allocation) + + +class MyDfxCtrl(DefaultIP): + def __init__(self, description): + super().__init__(description=description) + + self.REG_ADDR_AP_CTRL = 0x00 + self.REG_ADDR_AMT_QUERY = VAL + + self.REG_ADDR_GIE = 0x04 + self.REG_ADDR_IER = 0x08 + self.REG_ADDR_ISR = 0x0C + + self.INP_PORT_NAMEs = [ + # hls-driver-input-dbg-name + ] + + self.REG_ADDR_INP_PTRs = [ + # hls-driver-input-ptr + ] + + self.OUT_PORT_NAMEs = [ + # hls-driver-output-dbg-name + ] + + self.REG_ADDR_OUT_PTRs = [ + # hls-driver-output-ptr + ] + + bindto = ['xilinx.com:hls::1.0'] + + def enable_gie(self): + print("global interrupt enable register") + self.write(self.REG_ADDR_GIE, 0x01) + print("enable gie successful") + + def disable_gie(self): + print("global interrupt enable register") + self.write(self.REG_ADDR_GIE, 0x01) + print("disable gie successful") + + def enable_done_intr(self): + print("ap_done interrupt enable register") + self.write(self.REG_ADDR_IER, 0x01) + print("enable ap_done interrupt successful") + + def clear_done_status(self): + print("ap_done register clear") + self.write(self.REG_ADDR_ISR, 0x01) + print("clear ap_done interrupt successful") + + def prepare_intr(self): + print("prepare your interrupt") + self.enable_gie() + self.enable_done_intr() + self.clear_done_status() + print("----------------------") + + def set_single_bit(self, addr, idx): + self.write(addr, 1 << idx) + + def ctrl_start(self): + self.write(0x00, 0x01) # ap_start = 1 + + def wait_until_done(self): + while (self.read(0x00) & 0x2) == 0: # Wait for ap_done + time.sleep(0.001) + + def set_input(self, idx, buffer): + + print( + f"input {self.INP_PORT_NAMEs[idx]} will be set to addr: {hex(buffer.physical_address)} with elements: {buffer.size}" + ) + self.write(self.REG_ADDR_INP_PTRs[idx], buffer.physical_address) + self.write(self.REG_ADDR_INP_PTRs[idx] + 4, 0) + buffer.flush() + + def set_output(self, idx, buffer): + + print( + f"output {self.OUT_PORT_NAMEs[idx]} will be set to addr: {hex(buffer.physical_address)} with elements: {buffer.size}" + ) + self.write(self.REG_ADDR_OUT_PTRs[idx], buffer.physical_address) + self.write(self.REG_ADDR_OUT_PTRs[idx] + 4, 0) + + def set_amt_query(self, val): + print(f"amount of queries will be set to: {val} at address: {hex(self.REG_ADDR_AMT_QUERY)}") + self.write(self.REG_ADDR_AMT_QUERY, val) diff --git a/hls4ml/templates/vitis_unified/hls_kernel_config.cfg b/hls4ml/templates/vitis_unified/hls_kernel_config.cfg new file mode 100644 index 0000000000..c1d12a0c18 --- /dev/null +++ b/hls4ml/templates/vitis_unified/hls_kernel_config.cfg @@ -0,0 +1,24 @@ +part={PART} + +[hls] +clock={CLK} +clock_uncertainty={CLK_UC} +flow_target=vivado +syn.file={OUTDIR}/firmware/{FILE_NAME_WRAP}.cpp +syn.file={OUTDIR}/firmware/{FILE_NAME_BASE}.cpp +syn.file_cflags={OUTDIR}/firmware/{FILE_NAME_WRAP}.cpp,-std=c++0x +syn.file_cflags={OUTDIR}/firmware/{FILE_NAME_BASE}.cpp,-std=c++0x + +syn.top={TOP_NAME} + +tb.file={OUTDIR}/{SIM_FILE_NAME}.cpp +tb.file={OUTDIR}/firmware/weights +tb.file={OUTDIR}/tb_data +tb.file_cflags={OUTDIR}/{SIM_FILE_NAME}.cpp,-std=c++0x +tb.file_cflags={OUTDIR}/{SIM_FILE_NAME}.cpp,-DRTL_SIM +package.ip.version=1.0.0 +package.output.format={OUTPUT_KERNEL_TYPE} +syn.compile.name_max_length=80 +syn.schedule.enable_dsp_full_reg=0 +package.output.syn=1 +cosim.enable_fifo_sizing=true diff --git a/hls4ml/templates/vitis_unified/myproject_bridge.cpp b/hls4ml/templates/vitis_unified/myproject_bridge.cpp new file mode 100644 index 0000000000..9a56f10d99 --- /dev/null +++ b/hls4ml/templates/vitis_unified/myproject_bridge.cpp @@ -0,0 +1,71 @@ +#ifndef MYPROJECT_BRIDGE_H_ +#define MYPROJECT_BRIDGE_H_ + +#include "firmware/PROJECT_FILE_NAME.h" +#include "firmware/nnet_utils/nnet_helpers.h" +#include +#include + +// hls-fpga-machine-learning insert bram + +namespace nnet { +bool trace_enabled = false; +std::map *trace_outputs = NULL; +size_t trace_type_size = sizeof(double); +} // namespace nnet + +extern "C" { + +struct trace_data { + const char *name; + void *data; +}; + +void allocate_trace_storage(size_t element_size) { + nnet::trace_enabled = true; + nnet::trace_outputs = new std::map; + nnet::trace_type_size = element_size; + // hls-fpga-machine-learning insert trace_outputs +} + +void free_trace_storage() { + for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { + void *ptr = i->second; + free(ptr); + } + nnet::trace_outputs->clear(); + delete nnet::trace_outputs; + nnet::trace_outputs = NULL; + nnet::trace_enabled = false; +} + +void collect_trace_output(struct trace_data *c_trace_outputs) { + int ii = 0; + for (std::map::iterator i = nnet::trace_outputs->begin(); i != nnet::trace_outputs->end(); i++) { + c_trace_outputs[ii].name = i->first.c_str(); + c_trace_outputs[ii].data = i->second; + ii++; + } +} + +// hls-fpga-machine-learning insert tb_input_writer + +// Wrapper of top level function for Python bridge +void myproject_float( + // hls-fpga-machine-learning insert header #float +) { + // hls-fpga-machine-learning insert namespace + + // hls-fpga-machine-learning insert wrapper #float +} + +void myproject_double( + // hls-fpga-machine-learning insert header #double +) { + // hls-fpga-machine-learning insert namespace + + // hls-fpga-machine-learning insert wrapper #double +} +} + +#endif diff --git a/hls4ml/templates/vitis_unified/myproject_dm.cpp b/hls4ml/templates/vitis_unified/myproject_dm.cpp new file mode 100644 index 0000000000..fa373d5a4c --- /dev/null +++ b/hls4ml/templates/vitis_unified/myproject_dm.cpp @@ -0,0 +1,64 @@ +#include +#include +#include +//#include "ap_axi_sdata.h" +#include "MY_PROJECT_DM_INC.h" + +#define STREAM_BUF_IN_SZ VAL +#define STREAM_BUF_OUT_SZ VAL + +template +void load_input(ATOMIC_TYPE *in, hls::stream &inStream, int amtQuery, const int TENSOR_SIZE) { +mem_rd: + int baseQuery = 0; + for (int q = 0; q < amtQuery; q++) { + for (int i = 0; i < TENSOR_SIZE / INPUT_LAYER_ARR::size; i++) { + INPUT_LAYER_ARR tmp; + for (int j = 0; j < INPUT_LAYER_ARR::size; j++) { + tmp[j] = in[baseQuery]; + baseQuery++; + } + inStream.write(tmp); + } + } +} + +template +void store_result(ATOMIC_TYPE *out, hls::stream &out_stream, int amtQuery, const int TENSOR_SIZE) { +mem_wr: + int baseQuery = 0; + for (int q = 0; q < amtQuery; q++) { + for (int i = 0; i < TENSOR_SIZE / OUT_LAYER_ARR::size; i++) { + OUT_LAYER_ARR tmp = out_stream.read(); + for (int j = 0; j < OUT_LAYER_ARR::size; j++) { + out[baseQuery] = tmp[j]; + baseQuery++; + } + } + } +} + +void MY_PROJECT_TOP_FUNC( + // vitis-unified-wrapper-io + , int amtQuery + +) { + + // vitis-unified-wrapper-interface + #pragma HLS INTERFACE s_axilite port=amtQuery bundle=control + #pragma HLS INTERFACE s_axilite port=return bundle=control + + // vitis-unified-wrapper-stream-dec + + // vitis-unified-wrapper-stream-config + + #pragma HLS dataflow + + // vitis-unified-wrapper-load + + for (int q = 0; q < amtQuery; q++) { + // vitis-unified-wrapper-compute + } + + // vitis-unified-wrapper-store +} diff --git a/hls4ml/templates/vitis_unified/myproject_dm.h b/hls4ml/templates/vitis_unified/myproject_dm.h new file mode 100644 index 0000000000..d8ca8eb0f7 --- /dev/null +++ b/hls4ml/templates/vitis_unified/myproject_dm.h @@ -0,0 +1,13 @@ +#ifndef FILENAME_H +#define FILENAME_H + +#include + +#include "MY_PROJECT_INC.h" + +void MY_PROJECT_TOP_FUNC( + + // vitis-unified-wrapper-io + , int amtQuery); + +#endif diff --git a/hls4ml/templates/vitis_unified/myproject_test.cpp b/hls4ml/templates/vitis_unified/myproject_test.cpp new file mode 100644 index 0000000000..a8d031e304 --- /dev/null +++ b/hls4ml/templates/vitis_unified/myproject_test.cpp @@ -0,0 +1,97 @@ +#include +#include +#include +#include +#include +#include +#include +#include + +// hls-fpga-machine-learning insert include + +#include "firmware/nnet_utils/nnet_helpers.h" + +// hls-fpga-machine-learning insert bram + +#define CHECKPOINT 5000 + +namespace nnet { +bool trace_enabled = true; +std::map *trace_outputs = NULL; +size_t trace_type_size = sizeof(double); +} // namespace nnet + +int main(int argc, char **argv) { + // hls-fpga-machine-learning insert namespace + + // load input data from text file + std::ifstream fin("tb_data/tb_input_features.dat"); + // load predictions from text file + std::ifstream fpr("tb_data/tb_output_predictions.dat"); + +#ifdef RTL_SIM + std::string RESULTS_LOG = "tb_data/rtl_cosim_results.log"; +#else + std::string RESULTS_LOG = "tb_data/csim_results.log"; +#endif + std::ofstream fout(RESULTS_LOG); + + std::string iline; + std::string pline; + int e = 0; + + if (fin.is_open() && fpr.is_open()) { + while (std::getline(fin, iline) && std::getline(fpr, pline)) { + if (e % CHECKPOINT == 0) + std::cout << "Processing input " << e << std::endl; + char *cstr = const_cast(iline.c_str()); + char *current; + std::vector in; + current = strtok(cstr, " "); + while (current != NULL) { + in.push_back(atof(current)); + current = strtok(NULL, " "); + } + cstr = const_cast(pline.c_str()); + std::vector pr; + current = strtok(cstr, " "); + while (current != NULL) { + pr.push_back(atof(current)); + current = strtok(NULL, " "); + } + + // hls-fpga-machine-learning insert data + + // hls-fpga-machine-learning insert top-level-function + + if (e % CHECKPOINT == 0) { + std::cout << "Predictions" << std::endl; + // hls-fpga-machine-learning insert predictions + std::cout << "Quantized predictions" << std::endl; + // hls-fpga-machine-learning insert quantized + } + e++; + + // hls-fpga-machine-learning insert tb-output + } + fin.close(); + fpr.close(); + } else { + std::cout << "INFO: Unable to open input/predictions file, using default input." << std::endl; + const unsigned NUM_TEST_SAMPLES = 5; + for (unsigned i = 0; i < NUM_TEST_SAMPLES; i++) { + // hls-fpga-machine-learning insert zero + + // hls-fpga-machine-learning insert top-level-function + + // hls-fpga-machine-learning insert output + + // hls-fpga-machine-learning insert tb-output + } + } + + fout.close(); + std::cout << "INFO: Saved inference results to file: " << RESULTS_LOG << std::endl; + + return 0; +} diff --git a/hls4ml/templates/vitis_unified/workspace/projectName/vitis-comp.json b/hls4ml/templates/vitis_unified/workspace/projectName/vitis-comp.json new file mode 100644 index 0000000000..9c7eb3fb62 --- /dev/null +++ b/hls4ml/templates/vitis_unified/workspace/projectName/vitis-comp.json @@ -0,0 +1,9 @@ +{ + "name": "{HLS_NAME}", + "type": "HLS", + "configuration": { + "componentType": "HLS", + "configFiles": ["{CONFIG_FILE}"], + "work_dir": "unifiedPrj" + } +} diff --git a/hls4ml/templates/vitis_unified/workspace/sysProj/buildAcc.sh b/hls4ml/templates/vitis_unified/workspace/sysProj/buildAcc.sh new file mode 100644 index 0000000000..38da5ac4e5 --- /dev/null +++ b/hls4ml/templates/vitis_unified/workspace/sysProj/buildAcc.sh @@ -0,0 +1,6 @@ +v++ -l -t hw --platform {PLATFORM_XPFM} {KERNEL_XO} --config buildConfig.cfg -o {PROJECT_NAME}.xclbin --save-temps +[ -f ../../export/system.bit ] && rm -f ../../export/system.bit +[ -f ../../export/system.hwh ] && rm -f ../../export/system.hwh + +xclbinutil --dump-section BITSTREAM:RAW:../../export/system.bit --input {PROJECT_NAME}.xclbin +cp _x/link/vivado/vpl/prj/prj.gen/sources_1/bd/vitis_design/hw_handoff/vitis_design.hwh ../../export/system.hwh diff --git a/hls4ml/templates/vitis_unified/workspace/sysProj/buildConfig.cfg b/hls4ml/templates/vitis_unified/workspace/sysProj/buildConfig.cfg new file mode 100644 index 0000000000..c1266844d0 --- /dev/null +++ b/hls4ml/templates/vitis_unified/workspace/sysProj/buildConfig.cfg @@ -0,0 +1,2 @@ +[vivado] +gui={GUI_STATUS} diff --git a/hls4ml/templates/vivado/ap_types/ap_axi_sdata.h b/hls4ml/templates/vivado/ap_types/ap_axi_sdata.h new file mode 100755 index 0000000000..2913ce80a1 --- /dev/null +++ b/hls4ml/templates/vivado/ap_types/ap_axi_sdata.h @@ -0,0 +1,402 @@ +// Copyright 1986-2022 Xilinx, Inc. All Rights Reserved. +// Copyright 2022-2023 Advanced Micro Devices, Inc. All Rights Reserved. + +// 67d7842dbbe25473c3c32b93c0da8047785f30d78e8a024de1b57352245f9689 + +/* + * This file contains the definition of the data types for AXI streaming. + * ap_axi_s is a signed interpretation of the AXI stream + * ap_axi_u is an unsigned interpretation of the AXI stream + */ + +#ifndef __AP__AXI_SDATA__ +#define __AP__AXI_SDATA__ + +#include "ap_int.h" +#include "hls_stream.h" +#include +#include +#include +#include +//#include "ap_fixed.h" +template +struct ap_fixed; +template +struct ap_ufixed; + +namespace hls { + +template constexpr std::size_t bitwidth = sizeof(T) * CHAR_BIT; +template <> constexpr std::size_t bitwidth = 1 * CHAR_BIT; + +template constexpr std::size_t bitwidth> = W; +template constexpr std::size_t bitwidth> = W; +template +constexpr std::size_t bitwidth> = + _AP_W; +template +constexpr std::size_t bitwidth> = + _AP_W; + +template +constexpr std::size_t bytewidth = (bitwidth + CHAR_BIT - 1) / CHAR_BIT; +template <> constexpr std::size_t bytewidth = 1; + +struct axis_disabled_signal {}; + +// Enablement for axis signals +#define AXIS_ENABLE_DATA 0b00000001 +#define AXIS_ENABLE_DEST 0b00000010 +#define AXIS_ENABLE_ID 0b00000100 +#define AXIS_ENABLE_KEEP 0b00001000 +#define AXIS_ENABLE_LAST 0b00010000 +#define AXIS_ENABLE_STRB 0b00100000 +#define AXIS_ENABLE_USER 0b01000000 + +// clang-format off +// Disablement mask for DATA axis signals +#define AXIS_DISABLE_DATA (0b11111111 ^ AXIS_ENABLE_DATA) & \ + (0b11111111 ^ AXIS_ENABLE_KEEP) & \ + (0b11111111 ^ AXIS_ENABLE_STRB) + +// Enablement/disablement of all axis signals +#define AXIS_ENABLE_ALL 0b01111111 +#define AXIS_DISABLE_ALL 0b00000000 + +// Struct: axis - struct that has one or more member 'signals' +// Signals: DATA, DEST, ID, KEEP, LAST, STRB, USER +// All signals are optional: +// LAST is enabled by default +// DEST, ID, & USER are disabled by default +// DATA, KEEP, & STRB are enabled by default for non-void DATA type +// Template parameters: +// T : type of the DATA signal +// WUser : size of the USER signal, if zero signal will be disabled +// WId : size of the ID signal, if zero signal will be disabled +// WDest : size of the DEST signal, if zero signal will be disabled +// EnableSignals : bit field to enable signals, see AXIS_ENABLE_* +// StrictEnablement : when true check that EnableSignals matches other parameters +// clang-format on +template +struct axis { + static_assert((EnableSignals & 0b10000000) == 0, + "Template parameter 'EnableSignals' is invalid only " + "low 7 bits can be set!"); + friend class stream< + axis>; + + static constexpr bool has_data = !std::is_void::value; + static constexpr bool has_user = WUser > 0; + static constexpr bool has_id = WId > 0; + static constexpr bool has_dest = WDest > 0; + static constexpr bool has_keep = EnableSignals & AXIS_ENABLE_KEEP; + static constexpr bool has_strb = EnableSignals & AXIS_ENABLE_STRB; + static constexpr bool has_last = EnableSignals & AXIS_ENABLE_LAST; + + static constexpr std::size_t width_user = has_user ? WUser : 1; + static constexpr std::size_t width_id = has_id ? WId : 1; + static constexpr std::size_t width_dest = has_dest ? WDest : 1; + static constexpr std::size_t width_keep = bytewidth; + static constexpr std::size_t width_strb = bytewidth; + static constexpr std::size_t width_last = 1; + + static_assert(has_data || has_user || has_id || has_dest || has_keep || + has_strb || has_last, + "No axis signals are enabled"); + + static_assert(StrictEnablement + ? has_data == (bool)(EnableSignals & AXIS_ENABLE_DATA) + : true, + "Found mismatched enablement for DATA signal"); + static_assert(StrictEnablement + ? has_user == (bool)(EnableSignals & AXIS_ENABLE_USER) + : true, + "Found mismatched enablement for USER signal"); + static_assert(StrictEnablement + ? has_id == (bool)(EnableSignals & AXIS_ENABLE_ID) + : true, + "Found mismatched enablement for ID signal"); + static_assert(StrictEnablement + ? has_dest == (bool)(EnableSignals & AXIS_ENABLE_DEST) + : true, + "Found mismatched enablement for DEST signal"); + + typedef typename std::conditional::type + Type_data; + Type_data data; + +#ifdef AESL_SYN + + NODEBUG Type_data get_data() const { +#pragma HLS inline + assert(has_data); + return data; + } + NODEBUG void set_data(Type_data d) { +#pragma HLS inline + assert(has_data); + data = d; + } + +#define _AXIS_CHANNEL_API(CHAN_NAME) \ + typedef \ + typename std::conditional, \ + axis_disabled_signal>::type Type_##CHAN_NAME; \ + Type_##CHAN_NAME CHAN_NAME; \ + __attribute__((nodebug)) __attribute__((always_inline)) \ + Type_##CHAN_NAME get_##CHAN_NAME() const { \ + assert(has_##CHAN_NAME); \ + return CHAN_NAME; \ + } \ + __attribute__((nodebug)) __attribute__( \ + (always_inline)) void set_##CHAN_NAME(Type_##CHAN_NAME value) { \ + assert(has_##CHAN_NAME); \ + CHAN_NAME = value; \ + } + +#else + + Type_data get_data() const { + if (!has_data) + throw std::runtime_error("CHAN_NAME is not enabled"); + return data; + } + void set_data(Type_data d) { + if (!has_data) + throw std::runtime_error("CHAN_NAME is not enabled"); + data = d; + } + +#define _AXIS_CHANNEL_API(CHAN_NAME) \ + typedef \ + typename std::conditional, \ + axis_disabled_signal>::type Type_##CHAN_NAME; \ + Type_##CHAN_NAME CHAN_NAME; \ + Type_##CHAN_NAME get_##CHAN_NAME() const { \ + if (!has_##CHAN_NAME) \ + throw std::runtime_error("CHAN_NAME is not enabled"); \ + return CHAN_NAME; \ + } \ + void set_##CHAN_NAME(Type_##CHAN_NAME value) { \ + if (!has_##CHAN_NAME) \ + throw std::runtime_error("CHAN_NAME is not enabled"); \ + CHAN_NAME = value; \ + } + +#endif + + _AXIS_CHANNEL_API(keep) + _AXIS_CHANNEL_API(strb) + _AXIS_CHANNEL_API(user) + _AXIS_CHANNEL_API(last) + _AXIS_CHANNEL_API(id) + _AXIS_CHANNEL_API(dest) +#undef _AXIS_CHANNEL_API + +// For original `qdma_axis` +#ifdef AESL_SYN + NODEBUG +#endif + void keep_all() { +#pragma HLS inline +#ifdef AESL_SYN + assert(has_keep); +#else + if (!has_data) + throw std::runtime_error("CHAN_NAME is not enabled"); +#endif + ap_uint k = 0; + keep = ~k; + } + +private: +#ifdef AESL_SYN +#define _AXIS_CHANNEL_INTERNAL_API(CHAN_NAME) \ + __attribute__((nodebug)) __attribute__((always_inline)) \ + Type_##CHAN_NAME *get_##CHAN_NAME##_ptr() { \ + return (!has_##CHAN_NAME) ? nullptr : &CHAN_NAME; \ + } + + _AXIS_CHANNEL_INTERNAL_API(data) + _AXIS_CHANNEL_INTERNAL_API(keep) + _AXIS_CHANNEL_INTERNAL_API(strb) + _AXIS_CHANNEL_INTERNAL_API(user) + _AXIS_CHANNEL_INTERNAL_API(last) + _AXIS_CHANNEL_INTERNAL_API(id) + _AXIS_CHANNEL_INTERNAL_API(dest) +#undef _AXIS_CHANNEL_INTERNAL_API +#endif +}; + +// clang-format off +// Struct: axis_data (alternative to axis) +// DATA signal always enabled +// All other signals are optional, disabled by default +// Example usage: +// hls::axis_data A; // DATA and LAST signals only +// hls::axis_data B; // DATA, LAST, and USER signals only (USER width is 32) +// hls::axis_data C; // All signals enabled +// hls::axis_data D; // All signals enabled, this throw an exception due to zero size for WUser/WId/WDest +// clang-format on +template +using axis_data = axis; + +// Struct: axis_user (alternative to axis) +// USER signal always enabled +// DATA signal always disabled +// All other signals are optional, disabled by default +// Example usage: +// hls::axis_user<32> C; // USER signal only +// hls::axis_user<32, AXIS_ENABLE_LAST> D; // USER and LAST signals only +template +using axis_user = axis; + +} // namespace hls + +template +using ap_axis = hls::axis, WUser, WId, WDest, EnableSignals, + StrictEnablement>; + +template +using ap_axiu = hls::axis, WUser, WId, WDest, EnableSignals, + StrictEnablement>; + +// original usage: qdma_axis, and TSTRB is omitted. +template +using qdma_axis = hls::axis, WUser, WId, WDest, + AXIS_ENABLE_ALL ^ AXIS_ENABLE_STRB, false>; + +#ifdef AESL_SYN +#if ((__clang_major__ != 3) || (__clang_minor__ != 1)) +namespace hls { + +template +class stream> + final { + typedef axis + __STREAM_T__; + +public: + /// Constructors + INLINE NODEBUG stream() {} + + INLINE NODEBUG stream(const char *name) { (void)name; } + + /// Make copy constructor and assignment operator private +private: + INLINE NODEBUG stream(const stream<__STREAM_T__> &chn) : V(chn.V) {} + +public: + /// Overload >> and << operators to implement read() and write() + INLINE NODEBUG void operator>>(__STREAM_T__ &rdata) { read(rdata); } + + INLINE NODEBUG void operator<<(const __STREAM_T__ &wdata) { write(wdata); } + + /// empty & full + NODEBUG bool empty() { +#pragma HLS inline + bool tmp = __fpga_axis_valid( + V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), V.get_user_ptr(), + V.get_last_ptr(), V.get_id_ptr(), V.get_dest_ptr()); + return !tmp; + } + + NODEBUG bool full() { +#pragma HLS inline + bool tmp = __fpga_axis_ready( + V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), V.get_user_ptr(), + V.get_last_ptr(), V.get_id_ptr(), V.get_dest_ptr()); + return !tmp; + } + + /// Blocking read + NODEBUG void read(__STREAM_T__ &dout) { +#pragma HLS inline + __STREAM_T__ tmp; + __fpga_axis_pop(V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), + V.get_user_ptr(), V.get_last_ptr(), V.get_id_ptr(), + V.get_dest_ptr(), tmp.get_data_ptr(), tmp.get_keep_ptr(), + tmp.get_strb_ptr(), tmp.get_user_ptr(), tmp.get_last_ptr(), + tmp.get_id_ptr(), tmp.get_dest_ptr()); + dout = tmp; + } + + NODEBUG __STREAM_T__ read() { +#pragma HLS inline + __STREAM_T__ tmp; + __fpga_axis_pop(V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), + V.get_user_ptr(), V.get_last_ptr(), V.get_id_ptr(), + V.get_dest_ptr(), tmp.get_data_ptr(), tmp.get_keep_ptr(), + tmp.get_strb_ptr(), tmp.get_user_ptr(), tmp.get_last_ptr(), + tmp.get_id_ptr(), tmp.get_dest_ptr()); + return tmp; + } + + /// Blocking write + NODEBUG void write(const __STREAM_T__ &din) { +#pragma HLS inline + __STREAM_T__ tmp = din; + __fpga_axis_push(V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), + V.get_user_ptr(), V.get_last_ptr(), V.get_id_ptr(), + V.get_dest_ptr(), tmp.get_data_ptr(), tmp.get_keep_ptr(), + tmp.get_strb_ptr(), tmp.get_user_ptr(), tmp.get_last_ptr(), + tmp.get_id_ptr(), tmp.get_dest_ptr()); + } + + /// Non-Blocking read + NODEBUG bool read_nb(__STREAM_T__ &dout) { +#pragma HLS inline + __STREAM_T__ tmp; + if (__fpga_axis_nb_pop(V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), + V.get_user_ptr(), V.get_last_ptr(), V.get_id_ptr(), + V.get_dest_ptr(), tmp.get_data_ptr(), + tmp.get_keep_ptr(), tmp.get_strb_ptr(), + tmp.get_user_ptr(), tmp.get_last_ptr(), + tmp.get_id_ptr(), tmp.get_dest_ptr())) { + dout = tmp; + return true; + } else { + return false; + } + } + + /// Non-Blocking write + NODEBUG bool write_nb(const __STREAM_T__ &in) { +#pragma HLS inline + __STREAM_T__ tmp = in; + bool full_n = __fpga_axis_nb_push( + V.get_data_ptr(), V.get_keep_ptr(), V.get_strb_ptr(), V.get_user_ptr(), + V.get_last_ptr(), V.get_id_ptr(), V.get_dest_ptr(), tmp.get_data_ptr(), + tmp.get_keep_ptr(), tmp.get_strb_ptr(), tmp.get_user_ptr(), + tmp.get_last_ptr(), tmp.get_id_ptr(), tmp.get_dest_ptr()); + return full_n; + } + +private: + __STREAM_T__ V NO_CTOR; +}; + +} // namespace hls +#endif +#endif +#endif diff --git a/hls4ml/writer/__init__.py b/hls4ml/writer/__init__.py index 8de19fe1d2..52b00604b5 100644 --- a/hls4ml/writer/__init__.py +++ b/hls4ml/writer/__init__.py @@ -2,6 +2,7 @@ from hls4ml.writer.oneapi_writer import OneAPIWriter from hls4ml.writer.quartus_writer import QuartusWriter from hls4ml.writer.symbolic_writer import SymbolicExpressionWriter +from hls4ml.writer.vitis_unified_writer import VitisUnifiedWriter from hls4ml.writer.vitis_writer import VitisWriter from hls4ml.writer.vivado_accelerator_writer import VivadoAcceleratorWriter from hls4ml.writer.vivado_writer import VivadoWriter @@ -10,6 +11,7 @@ register_writer('Vivado', VivadoWriter) register_writer('VivadoAccelerator', VivadoAcceleratorWriter) register_writer('Vitis', VitisWriter) +register_writer('VitisUnified', VitisUnifiedWriter) register_writer('Quartus', QuartusWriter) register_writer('oneAPI', OneAPIWriter) register_writer('Catapult', CatapultWriter) diff --git a/hls4ml/writer/vitis_unified_writer/__init__.py b/hls4ml/writer/vitis_unified_writer/__init__.py new file mode 100644 index 0000000000..51a0c2fba7 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/__init__.py @@ -0,0 +1,88 @@ +import os + +from hls4ml.backends.vitis_unified.vitis_unified_config import VitisUnifiedConfig +from hls4ml.writer.vitis_writer import VitisWriter + +from .meta import VitisUnifiedWriterMeta + + +class VitisUnifiedWriter(VitisWriter): + + def __init__(self): + super().__init__() + self.writer_meta = VitisUnifiedWriterMeta() + + from .build_gen import VitisUnified_BuildGen + from .driver_gen import VitisUnified_DriverGen + from .meta_gen import VitisUnified_MetaGen + from .test_bridge_gen import VitisUnified_BridgeGen + from .test_cosim_gen import VitisUnified_TestGen + from .wrap_gen import VitisUnified_WrapperGen + + self.bg = VitisUnified_BuildGen + self.dg = VitisUnified_DriverGen + self.mg = VitisUnified_MetaGen + self.tbg = VitisUnified_BridgeGen + self.tcg = VitisUnified_TestGen + self.wg = VitisUnified_WrapperGen + + def write_board_script_override(self, model): + pass + + def write_build_prj_override(self, model): + pass + + def write_build_opts(self, model): + pass + + def write_tar(self, model): + pass + + def write_bridge(self, model): # test bench gen + self.tbg.write_bridge(self.writer_meta, model, self.mg) + + def write_build_script(self, model): + # for bridge simulation + self.bg.write_bridge_build_script(self.writer_meta, model, self.mg) + # for hls kernel generation + self.bg.build_unified_project_ske(self.writer_meta, model, self.mg) + self.bg.write_hls_kernel_cfg(self.writer_meta, model, self.mg, True) + self.bg.write_hls_kernel_cfg(self.writer_meta, model, self.mg, False) + # for v++ to link hls to the system + self.bg.write_launch_vitis_linker_dir(self.writer_meta, model, self.mg) + self.bg.write_launch_vitis_linker_launcher(self.writer_meta, model, self.mg) + self.bg.write_launch_vitis_linker_cfg(self.writer_meta, model, self.mg) + + def generate_config(self, model): + + self.writer_meta.vitis_unified_config = VitisUnifiedConfig( + model.config, model.get_input_variables(), model.get_output_variables() + ) + + def make_export_path(self, model): + export_path = f'{model.config.get_output_dir()}/export' + if not os.path.exists(export_path): + os.makedirs(export_path) + + def write_hls(self, model, is_multigraph=False): + + if is_multigraph: + raise Exception( + "Vitis Unified does not support multigraphs; however, vitis unified partial backend is please use it instead" + ) + + # generate kernel and its driver + self.generate_config(model) + super().write_hls(model, is_multigraph=False) + self.wg.write_wrapper(self.writer_meta, model, self.mg) + + self.make_export_path(model) + self.dg.write_driver(self.writer_meta, model, self.mg) + self.tcg.write_wrapper_test(self.writer_meta, model, self.mg) + + # self.write_new_tar(model) + # if not is_multigraph: + + # else: + # self.write_bridge_multigraph(model) + # self.modify_write_build_script_multigraph(model) diff --git a/hls4ml/writer/vitis_unified_writer/build_gen.py b/hls4ml/writer/vitis_unified_writer/build_gen.py new file mode 100644 index 0000000000..9abbc99f73 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/build_gen.py @@ -0,0 +1,143 @@ +import os +import stat +from pathlib import Path + +from .meta import VitisUnifiedWriterMeta + + +class VitisUnified_BuildGen: + + @classmethod + def write_bridge_build_script(self, meta: VitisUnifiedWriterMeta, model, mg): + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/build_lib.sh')) + fout = open(f"{model.config.get_output_dir()}/build_lib.sh", 'w') + + for line in fin.readlines(): + if 'myprojectBaseName' in line: + line = line.replace('myprojectBaseName', format(model.config.get_project_name())) + if 'myprojectWrapName' in line: + line = line.replace('myprojectWrapName', mg.get_wrapper_file_name(model)) + if 'mystamp' in line: + line = line.replace('mystamp', model.config.get_config_value('Stamp')) + + fout.write(line) + + fin.close() + fout.close() + + # change permission + build_lib_dst = Path(f'{model.config.get_output_dir()}/build_lib.sh').resolve() + build_lib_dst.chmod(build_lib_dst.stat().st_mode | stat.S_IEXEC) + + @classmethod + def write_hls_kernel_cfg(self, meta, model, mg, is_csim=False): # True is_csim else is cosim+fifo_optimization + # This will gen hls_kernel_config_.cfg file which Vitis_hls unified will use it to config + # the synthesizer + filedir = os.path.dirname(os.path.abspath(__file__)) + sufix = "csim" if is_csim else "cosim" + fin = open(os.path.join(filedir, '../../templates/vitis_unified/hls_kernel_config.cfg')) + fout = open(f"{model.config.get_output_dir()}/hls_kernel_config_{sufix}.cfg", 'w') + + for line in fin.readlines(): + if "{PART}" in line: + line = line.replace("{PART}", model.config.get_config_value('Part')) + if "{CLK}" in line: + line = line.replace("{CLK}", model.config.get_config_value('ClockPeriod')) + if "{CLK_UC}" in line: + line = line.replace("{CLK_UC}", model.config.get_config_value('ClockUncertainty')) + if "{OUTDIR}" in line: + line = line.replace("{OUTDIR}", model.config.get_output_dir()) + if "{TOP_NAME}" in line: + line = line.replace("{TOP_NAME}", mg.get_top_wrap_func_name(model)) + if "{FILE_NAME_WRAP}" in line: + line = line.replace("{FILE_NAME_WRAP}", mg.get_wrapper_file_name(model)) + if "{SIM_FILE_NAME}" in line: + line = line.replace("{SIM_FILE_NAME}", mg.get_sim_file_name()) + if "{FILE_NAME_BASE}" in line: + line = line.replace("{FILE_NAME_BASE}", mg.get_main_file_name(model)) + if "{OUTPUT_KERNEL_TYPE}" in line: + line = line.replace("{OUTPUT_KERNEL_TYPE}", mg.get_output_kernel_type()) + if is_csim and (("enable_fifo_sizing" in line) or ("-DRTL_SIM" in line)): + line = "#" + line + + fout.write(line) + + fin.close() + fout.close() + + @classmethod + def build_unified_project_ske(self, meta, model, mg, workspaceDir=None): + # this will generate the vitis-comp.json file, the file will enable vitis ide gui to see it + # as a project + if workspaceDir is None: + workspaceDir = mg.get_vitis_unified_working_directory_dir(model) + hlsDir = mg.get_vitis_hls_dir(model) + execDir = mg.get_vitis_hls_dir(model) + vitisComp = os.path.join(str(hlsDir), "vitis-comp.json") + + # create my own project for this graph + os.makedirs(workspaceDir, exist_ok=True) + os.makedirs(hlsDir, exist_ok=True) + os.makedirs(execDir, exist_ok=True) + # create project vitis-comp.json to + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, "../../templates/vitis_unified/workspace/projectName/vitis-comp.json")) + fout = open(vitisComp, 'w') + + for line in fin.readlines(): + if "{HLS_NAME}" in line: + line = line.replace("{HLS_NAME}", model.config.get_project_name()) + if "{CONFIG_FILE}" in line: + line = line.replace("{CONFIG_FILE}", f"{model.config.get_output_dir()}/hls_kernel_config.cfg") + fout.write(line) + + fin.close() + fout.close() + + @classmethod + def write_launch_vitis_linker_dir(self, meta, model, mg): + os.makedirs(mg.get_vitis_linker_dir(model), exist_ok=True) + + @classmethod + def write_launch_vitis_linker_launcher(self, meta, model, mg): + # This section generate buildAcc.sh file to combine the platform and the hls kernel together + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/workspace/sysProj/buildAcc.sh')) + fout = open(f"{mg.get_vitis_linker_dir(model)}/buildAcc.sh", 'w') + + for line in fin.readlines(): + if "{PLATFORM_XPFM}" in line: + line = line.replace("{PLATFORM_XPFM}", meta.vitis_unified_config.get_XPFMPath()) + if "{KERNEL_XO}" in line: + line = line.replace("{KERNEL_XO}", mg.get_xo_file_path(model)) + if "{PROJECT_NAME}" in line: + line = line.replace("{PROJECT_NAME}", model.config.get_project_name()) + + fout.write(line) + + fin.close() + fout.close() + + link_lib_dst = Path(f"{mg.get_vitis_linker_dir(model)}/buildAcc.sh").resolve() + link_lib_dst.chmod(link_lib_dst.stat().st_mode | stat.S_IEXEC) + + @classmethod + def write_launch_vitis_linker_cfg(self, meta, model, mg): + # this will generate the config file that linker (platform + vitis) + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/workspace/sysProj/buildConfig.cfg')) + fout = open(f"{mg.get_vitis_linker_dir(model)}/buildConfig.cfg", 'w') + + for line in fin.readlines(): + if "{CLK}" in line: + line = line.replace("{CLK}", str(100_000_000)) # model.config.get_config_value('ClockPeriod')) + if "{KERNEL_NAME}" in line: + line = line.replace("{KERNEL_NAME}", mg.get_top_wrap_func_name(model)) + if "{GUI_STATUS}" in line: + line = line.replace("{GUI_STATUS}", "true") + line = "" + fout.write(line) + + fin.close() + fout.close() diff --git a/hls4ml/writer/vitis_unified_writer/driver_gen.py b/hls4ml/writer/vitis_unified_writer/driver_gen.py new file mode 100644 index 0000000000..e8e6ef3088 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/driver_gen.py @@ -0,0 +1,48 @@ +import os + + +class VitisUnified_DriverGen: + + @classmethod + def write_driver(self, meta, model, mg): + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/driver/pynq/pynq_driver.py.hls4ml')) + fout = open(f'{model.config.get_output_dir()}/export/pynq_driver.py', 'w') + + inp_gmem_t, out_gmem_t, inps, outs = meta.vitis_unified_config.get_corrected_types() + + strideInPtrAddr = 4 * 3 + strideOutPtrAddr = 4 * 3 + + startInPtrAddr = 0x10 + startOutPtrAddr = startInPtrAddr + strideInPtrAddr * len(inps) + startAmtQueryAddr = startOutPtrAddr + strideOutPtrAddr * len(outs) + + def genHexAddrList(startAddr, stride, size, indent): + addrs = [f"{indent}{hex(startAddr + inp_idx * stride)}" for inp_idx in range(size)] + return addrs + + indentAmt = 3 + indentStr = indentAmt * " " if indentAmt > 0 else "" + + for line in fin.readlines(): + + if "REG_ADDR_AMT_QUERY" in line: + line = line.replace("VAL", str(hex(startAmtQueryAddr))) + if "# hls-driver-input-dbg-name" in line: + input_names = [f'{indentStr}"{mg.get_io_port_name(inp, True, idx)}"' for idx, inp in enumerate(inps)] + line += ",\n".join(input_names) + "\n" + if "# hls-driver-input-ptr" in line: + line += ",\n".join(genHexAddrList(startInPtrAddr, strideInPtrAddr, len(inps), indentStr)) + "\n" + if "# hls-driver-output-dbg-name" in line: + output_names = [f'{indentStr}"{mg.get_io_port_name(out, False, idx)}"' for idx, out in enumerate(outs)] + line += ",\n".join(output_names) + "\n" + if "# hls-driver-output-ptr" in line: + line += ",\n".join(genHexAddrList(startOutPtrAddr, strideOutPtrAddr, len(outs), indentStr)) + "\n" + if "" in line: + line = line.replace("", mg.get_top_wrap_func_name(model)) + + fout.write(line) + + fin.close() + fout.close() diff --git a/hls4ml/writer/vitis_unified_writer/meta.py b/hls4ml/writer/vitis_unified_writer/meta.py new file mode 100644 index 0000000000..19917da0d9 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/meta.py @@ -0,0 +1,5 @@ +class VitisUnifiedWriterMeta: + + def __init__(self): + super().__init__() + self.vitis_unified_config = None diff --git a/hls4ml/writer/vitis_unified_writer/meta_gen.py b/hls4ml/writer/vitis_unified_writer/meta_gen.py new file mode 100644 index 0000000000..a30a5769fd --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/meta_gen.py @@ -0,0 +1,90 @@ +import os + +# file and directory + + +class VitisUnified_MetaGen: + + @classmethod + def get_wrapper_file_name(self, model): + return f"{model.config.get_project_name()}_dm" + + @classmethod + def get_sim_file_name(cls): + return "myproject_test" + + @classmethod + def get_main_file_name(self, model): + return model.config.get_project_name() + + @classmethod + def get_vitis_unified_working_directory_dir(self, model): + return os.path.join(model.config.get_output_dir(), "unifiedWorkspace") + + @classmethod + def get_vitis_hls_dir(self, model): + vitisWorkingDir = self.get_vitis_unified_working_directory_dir(model) + return os.path.join(vitisWorkingDir, model.config.get_project_name()) + + @classmethod + def get_vitis_hls_exec_dir(self, model): + hlsDir = self.get_vitis_hls_dir(model) + return os.path.join(hlsDir, "unifiedPrj") + + @classmethod + def get_vitis_linker_dir(self, model): + vitisWorkingDir = self.get_vitis_unified_working_directory_dir(model) + return os.path.join(vitisWorkingDir, "linker") + + @classmethod + def get_xo_file_name(self, model): + return f"{self.get_top_wrap_func_name(model)}.xo" + + @classmethod + def get_xo_file_path(self, model): + return os.path.join(self.get_vitis_hls_exec_dir(model), self.get_xo_file_name(model)) + + # naming of variable function helper + + # FOR GMEM WRAPPER + + @classmethod + def get_io_port_name(self, tensorVar, isInput: bool, idx: int): + ioDirect = "in" if isInput else "out" + return f"gmem_{ioDirect}{str(idx)}_ptr_{tensorVar.name}" + + @classmethod + def get_io_port_size_name(self, tensorVar, isInput: bool, idx: int): + ioDirect = "in" if isInput else "out" + return f"gmem_{ioDirect}{str(idx)}_size_{tensorVar.name}" + + @classmethod + def get_local_stream_name(self, tensorVar, isInput: bool, idx: int): + ioDirect = "in" if isInput else "out" + return f"stream_{ioDirect}{str(idx)}_{tensorVar.name}" + + @classmethod + def get_dma_type_name(self): + return "dma_data_packet" + + @classmethod + def get_wrapper_port_name(self, tensorVar, isInput: bool): + ioStr = "in" if isInput else "out" + return f"par_{ioStr}_{tensorVar.name}" + + @classmethod + def get_top_model_name(self, model): + return f"{model.config.get_project_name()}" + + @classmethod + def get_top_wrap_func_name(self, model): + return f"{model.config.get_project_name()}_gem" + + # it is renamed for stitch layer + @classmethod + def rename_type(self, tensorVar, layerIdx: int, isInput: bool): + return "result_" + tensorVar.type.name + f"_at_layer_{str(layerIdx)}" + + @classmethod + def get_output_kernel_type(cls): + return "xo" diff --git a/hls4ml/writer/vitis_unified_writer/test_bridge_gen.py b/hls4ml/writer/vitis_unified_writer/test_bridge_gen.py new file mode 100644 index 0000000000..999ebe9818 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/test_bridge_gen.py @@ -0,0 +1,110 @@ +import os + +from .meta import VitisUnifiedWriterMeta + + +class VitisUnified_BridgeGen: + + @classmethod + def write_bridge(self, meta: VitisUnifiedWriterMeta, model, mg): + + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/myproject_bridge.cpp')) + fout = open(f"{model.config.get_output_dir()}/{model.config.get_project_name()}_bridge.cpp", 'w') + + model_inputs = model.get_input_variables() + model_outputs = model.get_output_variables() + model_brams = [var for var in model.get_weight_variables() if var.storage.lower() == 'bram'] + + indent = ' ' + + for line in fin.readlines(): + newline = "" + if 'MYPROJECT' in line: + newline = line.replace('MYPROJECT', format(model.config.get_project_name().upper())) + + elif 'myproject' in line: + newline = line.replace('myproject', format(model.config.get_project_name())) + + elif 'PROJECT_FILE_NAME' in line: + newline = line.replace('PROJECT_FILE_NAME', format(mg.get_wrapper_file_name(model))) + + elif '// hls-fpga-machine-learning insert bram' in line: + newline = line + for bram in model_brams: + newline += f'#include \"firmware/weights/{bram.name}.h\"\n' + + elif '// hls-fpga-machine-learning insert header' in line: + # this section will write the function arment composed of input and output of hls kernel + # for both myproject_float and myproject_double + dtype = line.split('#', 1)[1].strip() + + input_ios = [] + output_ios = [] + + for idx, inp in enumerate(model_inputs): + input_ios.append(f"{dtype} {mg.get_io_port_name(inp, True, idx)}[{inp.size_cpp()}]") + for idx, out in enumerate(model_outputs): + output_ios.append(f"{dtype} {mg.get_io_port_name(out, False, idx)}[{out.size_cpp()}]") + + inputs_str = ', '.join(input_ios) + outputs_str = ', '.join(output_ios) + + newline = '' + newline += indent + inputs_str + ',\n' + newline += indent + outputs_str + '\n' + + elif '// hls-fpga-machine-learning insert wrapper' in line: + + # This section will write the calling function to main kernel + + dtype = line.split('#', 1)[1].strip() + if dtype == meta.vitis_unified_config.get_input_type(): + newline = '' + input_vars = [] + input_sizes = [] + output_vars = [] + otuput_sizes = [] + + for idx, inp in enumerate(model_inputs): + input_vars.append(mg.get_io_port_name(inp, True, idx)) + input_sizes.append(inp.size_cpp()) + for idx, out in enumerate(model_outputs): + output_vars.append(mg.get_io_port_name(out, False, idx)) + otuput_sizes.append(out.size_cpp()) + + inputs_str = ', '.join(input_vars) + outputs_str = ', '.join(output_vars) + + newline = '' + newline += indent + mg.get_top_wrap_func_name(model) + "(\n" + newline += indent + inputs_str + ',\n' + newline += indent + outputs_str + ',\n' + newline += indent + "1);\n" # amount query should be one only + + elif '// hls-fpga-machine-learning insert trace_outputs' in line: + newline = '' + for layer in model.get_layers(): + func = layer.get_attr('function_cpp', None) + if func and model.config.trace_output and layer.get_attr('trace', False): + vars = layer.get_variables() + for var in vars: + newline += ( + indent + + 'nnet::trace_outputs->insert(std::pair(' + + f'"{layer.name}", (void *) malloc({var.size_cpp()} * element_size)));\n' + ) + + elif '// hls-fpga-machine-learning insert namespace' in line: + newline = '' + + namespace = model.config.get_writer_config().get('Namespace', None) + if namespace is not None: + newline += indent + f'using namespace {namespace};\n' + + else: + newline = line + fout.write(newline) + + fin.close() + fout.close() diff --git a/hls4ml/writer/vitis_unified_writer/test_cosim_gen.py b/hls4ml/writer/vitis_unified_writer/test_cosim_gen.py new file mode 100644 index 0000000000..f9f6fe4be5 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/test_cosim_gen.py @@ -0,0 +1,140 @@ +import os + + +class VitisUnified_TestGen: + + @classmethod + def write_wrapper_test(self, meta, model, mg): + pass + + inp_gmem_t, out_gmem_t, inps, outs = meta.vitis_unified_config.get_corrected_types() + + filedir = os.path.dirname(os.path.abspath(__file__)) + f = open(os.path.join(filedir, '../../templates/vitis_unified/myproject_test.cpp')) + fout = open(f'{model.config.get_output_dir()}/{mg.get_sim_file_name()}.cpp', 'w') + + model_inputs = model.get_input_variables() + model_outputs = model.get_output_variables() + model_brams = [var for var in model.get_weight_variables() if var.storage.lower() == 'bram'] + + fout.write("//// generated by Vitis Unified Backend\n") + + for line in f.readlines(): + indent = ' ' * (len(line) - len(line.lstrip(' '))) + + # Insert numbers + if 'myproject' in line: + newline = line.replace('myproject', model.config.get_project_name()) + elif '// hls-fpga-machine-learning insert include' in line: + newline = line + f'#include "firmware/{mg.get_wrapper_file_name(model)}.h"\n' + + elif '// hls-fpga-machine-learning insert bram' in line: + newline = line + for bram in model_brams: + newline += f'#include \"firmware/weights/{bram.name}.h\"\n' + + elif '// hls-fpga-machine-learning insert data' in line: + # This section will convert the input which stored in vector to the float pointer + # the float pointer will point to the start section of for each input for + newline = line + offset = 0 + for inputIdx, inp in enumerate(model_inputs): + # input should be float + newline += indent + 'float* {inputPortName} = &in[{startIdx}];\n'.format( + # can not be double because it fix by template + inputPortName=mg.get_io_port_name(inp, True, inputIdx), + startIdx=str(offset), + ) + offset += inp.size() + # This section will declare float arrays used to store input from output layer + for outputIdx, out in enumerate(model_outputs): + newline += indent + f"float {mg.get_io_port_name(out, False, outputIdx)}[{out.size()}];\n" + + elif '// hls-fpga-machine-learning insert top-level-function' in line: + + # This function will invoke the _dm.cpp which is the wrapper of the system + + newline = line + + input_ios = [] + output_ios = [] + bram_ios = [b.name for b in model_brams] + + for inpIdx, inp in enumerate(model_inputs): + input_ios.append(mg.get_io_port_name(inp, True, inpIdx)) + + for outIdx, out in enumerate(model_outputs): + output_ios.append(mg.get_io_port_name(out, False, outIdx)) + + # Concatenate the input, output, and bram variables. Filter out empty/null values + all_vars = ' ,'.join(filter(None, [*input_ios, *output_ios, *bram_ios, "1"])) + top_level = indent + f'{mg.get_top_wrap_func_name(model)}({all_vars});\n' + newline += top_level + + elif '// hls-fpga-machine-learning insert predictions' in line: + newline = line + for out in model_outputs: + # TODO fix this size retrieve + + newline += indent + f'for(int i = 0; i < {out.size()}; i++) {{\n' + newline += indent + ' std::cout << pr[i] << " ";\n' + newline += indent + '}\n' + newline += indent + 'std::cout << std::endl;\n' + elif '// hls-fpga-machine-learning insert zero' in line: + newline = line + for inpIdx, inp in enumerate(model_inputs): + newline += indent + f'float {mg.get_io_port_name(inp, True, inpIdx)}[{str(inp.size())}] = {{}};\n' + + for outIdx, out in enumerate(model_outputs): + newline += indent + f"float {mg.get_io_port_name(out, False, outIdx)}[{str(out.size())}] = {{}};\n" + + elif '// hls-fpga-machine-learning insert tb-output' in line: + newline = line + tb_stream = model.config.get_writer_config().get('TBOutputStream', 'both') + if tb_stream != "stdout": # it can be both or file + for outIdx, out in enumerate(model_outputs): + newline += ( + indent + + 'nnet::print_result<{actualType}, {cpysize}>({portName}, {des}, {keepOutput});\n'.format( + actualType="float", + cpysize=out.size(), + portName=mg.get_io_port_name(out, False, outIdx), + des="fout", + keepOutput="false", + ) + ) + elif ( + '// hls-fpga-machine-learning insert output' in line + or '// hls-fpga-machine-learning insert quantized' in line + ): + + newline = line + tb_stream = model.config.get_writer_config().get('TBOutputStream', 'both') + keep_output = str(tb_stream != "stdout").lower() + + if tb_stream != "file": + for outIdx, out in enumerate(model_outputs): + newline += ( + indent + + 'nnet::print_result<{actualType}, {cpysize}>({portName}, {des}, {keepOutput});\n'.format( + actualType="float", + cpysize=out.size(), + portName=mg.get_io_port_name(out, False, outIdx), + des="std::cout", + keepOutput=keep_output, + ) + ) + + elif '// hls-fpga-machine-learning insert namespace' in line: + newline = '' + + namespace = model.config.get_writer_config().get('Namespace', None) + if namespace is not None: + newline += indent + f'using namespace {namespace};\n' + + else: + newline = line + + fout.write(newline) + f.close() + fout.close() diff --git a/hls4ml/writer/vitis_unified_writer/wrap_gen.py b/hls4ml/writer/vitis_unified_writer/wrap_gen.py new file mode 100644 index 0000000000..59b21a49c3 --- /dev/null +++ b/hls4ml/writer/vitis_unified_writer/wrap_gen.py @@ -0,0 +1,144 @@ +import os + +from .meta import VitisUnifiedWriterMeta + +# main function + + +class VitisUnified_WrapperGen: + + @classmethod + def gen_io_str(self, mg, indent, inp_gmem_t, out_gmem_t, inps, outs, meta=None): + + inputPtrList = [] + outputPtrList = [] + + for inp_idx, inp in enumerate(inps): + inputPtrList.append(f"{indent} {inp_gmem_t}* {mg.get_io_port_name(inp, True, inp_idx)}") + + for out_idx, out in enumerate(outs): + outputPtrList.append(f"{indent} {out_gmem_t}* {mg.get_io_port_name(out, False, out_idx)}") + + line = ", ".join(inputPtrList) + ",\n" + line += ", ".join(outputPtrList) + "\n" + + return line + + @classmethod + def write_wrapper(self, meta: VitisUnifiedWriterMeta, model, mg): + + inp_gmem_t, out_gmem_t, inps, outs = meta.vitis_unified_config.get_corrected_types() + indent = ' ' + + # start write myproject_dm.cpp + + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/myproject_dm.cpp')) + fout = open(f'{model.config.get_output_dir()}/firmware/myproject_dm.cpp', 'w') + + for line in fin.readlines(): + + if "MY_PROJECT_DM_INC" in line: + line = line.replace("MY_PROJECT_DM_INC", mg.get_wrapper_file_name(model)) + elif "MY_PROJECT_TOP_FUNC" in line: + line = line.replace("MY_PROJECT_TOP_FUNC", mg.get_top_wrap_func_name(model)) + elif "STREAM_BUF_IN_SZ" in line: + line = line.replace("VAL", str(meta.vitis_unified_config.get_in_stream_bufferSz())) + elif "STREAM_BUF_OUT_SZ" in line: + line = line.replace("VAL", str(meta.vitis_unified_config.get_out_stream_bufferSz())) + + elif "// vitis-unified-wrapper-io" in line: + line = self.gen_io_str(mg, indent, inp_gmem_t, out_gmem_t, inps, outs) + "\n" + elif "// vitis-unified-wrapper-interface" in line: + # This section will generate the pragma to specify interface type + # --> axi master (memory read input) + # --> axi master (memory write output) + # BOTH IS MASTER + # Please note that gmem_in/out depth size must match with the cosim array allocation + # if the cosim allocation is larger than depth, the result will not correct + # if the cosim allocation is lower than depth, the result is correct, + # but the system will throw segment falut error + # the depth size will not impact the resource usage in hls generation + for inp_idx, inp in enumerate(inps): + line += ( + f"#pragma HLS INTERFACE m_axi port={mg.get_io_port_name(inp, True, inp_idx)} " + f"bundle = gmem_in{inp_idx} depth={str(inp.size())}\n" + ) + for out_idx, out in enumerate(outs): + line += ( + f"#pragma HLS INTERFACE m_axi port={mg.get_io_port_name(out, False, out_idx)} " + f"bundle = gmem_out{out_idx} depth={str(out.size())}\n" + ) + elif "// vitis-unified-wrapper-stream-dec" in line: + # this declare the stream buffer that axi master read will store the input and axi master write + # will retrieve the output + for inp_idx, inp in enumerate(inps): + line += f"{indent} static hls::stream<{inp.type.name}> {mg.get_local_stream_name(inp, True, inp_idx)};\n" + for out_idx, out in enumerate(outs): + line += ( + f"{indent} static hls::stream<{out.type.name}> {mg.get_local_stream_name(out, False, out_idx)};\n" + ) + + elif "// vitis-unified-wrapper-stream-config" in line: + for inp_idx, inp in enumerate(inps): + line += ( + f"#pragma HLS STREAM variable={mg.get_local_stream_name(inp, True, inp_idx)} " + f"depth=STREAM_BUF_IN_SZ\n" + ) + for out_idx, out in enumerate(outs): + line += ( + f"#pragma HLS STREAM variable={mg.get_local_stream_name(out, False, out_idx)} " + f"depth=STREAM_BUF_OUT_SZ\n" + ) + + elif "// vitis-unified-wrapper-load" in line: + # this call the load_input function to convert axi_master read to axi stream (buffer) + for inp_idx, inp in enumerate(inps): + line += ( + f"load_input({mg.get_io_port_name(inp, True, inp_idx)}, " + f"{mg.get_local_stream_name(inp, True, inp_idx)}, amtQuery, {str(inp.size())});\n" + ) + elif "// vitis-unified-wrapper-compute" in line: + # + poolList = [] + for inp_idx, inp in enumerate(inps): + poolList.append(f"{mg.get_local_stream_name(inp, True, inp_idx)}") + for out_idx, out in enumerate(outs): + poolList.append(f"{mg.get_local_stream_name(out, False, out_idx)}") + joinedIo = f",\n{indent}{indent}{indent}".join(poolList) + line += f"{indent} {mg.get_top_model_name(model)}({joinedIo});\n" + + elif "// vitis-unified-wrapper-store" in line: + # this call the store_result function to convert axi_master read to axi stream (buffer) + for out_idx, out in enumerate(outs): + line += ( + f"store_result({mg.get_io_port_name(out, False, out_idx)}, " + f"{mg.get_local_stream_name(out, False, out_idx)}, amtQuery, {str(out.size())});\n" + ) + + fout.write(line) + + fin.close() + fout.close() + + # + # start write myproject_dm.h + + filedir = os.path.dirname(os.path.abspath(__file__)) + fin = open(os.path.join(filedir, '../../templates/vitis_unified/myproject_dm.h')) + fout = open(f'{model.config.get_output_dir()}/firmware/myproject_dm.h', 'w') + + for line in fin.readlines(): + + if "FILENAME" in line: + line = line.replace("FILENAME", mg.get_wrapper_file_name(model).upper()) + elif "MY_PROJECT_INC.h" in line: + line = line.replace("MY_PROJECT_INC", mg.get_main_file_name(model)) + elif "MY_PROJECT_TOP_FUNC" in line: + line = line.replace("MY_PROJECT_TOP_FUNC", mg.get_top_wrap_func_name(model)) + elif "// vitis-unified-wrapper-io" in line: + line += self.gen_io_str(mg, indent, inp_gmem_t, out_gmem_t, inps, outs) + "\n" + fout.write(line) + + fin.close() + fout.close() diff --git a/hls4ml/writer/vitis_writer.py b/hls4ml/writer/vitis_writer.py index 46c0ba3044..35b13c2201 100644 --- a/hls4ml/writer/vitis_writer.py +++ b/hls4ml/writer/vitis_writer.py @@ -64,11 +64,14 @@ def write_build_prj_override(self, model): dstpath = f'{model.config.get_output_dir()}/build_prj.tcl' copyfile(srcpath, dstpath) - def write_hls(self, model): + def write_hls(self, model, is_multigraph=False): """ Write the HLS project. Calls the steps from VivadoWriter, adapted for Vitis """ - super().write_hls(model) + if is_multigraph: + super().write_hls(model, is_multigraph=True) + return + super().write_hls(model, is_multigraph=False) self.write_nnet_utils_overrides(model) self.write_board_script_override(model) self.write_build_prj_override(model) diff --git a/test/pytest/test_backend/cmpResult.py b/test/pytest/test_backend/cmpResult.py new file mode 100644 index 0000000000..b6753240b3 --- /dev/null +++ b/test/pytest/test_backend/cmpResult.py @@ -0,0 +1,26 @@ +import os +from pathlib import Path + +import numpy as np + +test_root_path = Path(__file__).parent +os.environ['XILINX_VITIS'] = "/tools/Xilinx/Vitis/2023.2" +os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH'] + + +def checkEqual(a, b): + equal = np.array_equal(a, b) + if equal: + print("Test pass both are equal \U0001f642") + else: + print("Test Fail both are not equal \U0001f62c") + + +bridge_result = np.load(test_root_path / "output_file/outputGenbit.npy") +zcu_result = np.load(test_root_path / "output_file/out_hw.npy") +zcu_flat = zcu_result.reshape(zcu_result.shape[0], -1) + +print(bridge_result.shape) +print(zcu_result.shape) + +checkEqual(bridge_result, zcu_flat) diff --git a/test/pytest/test_backend/pynq_example.ipynb b/test/pytest/test_backend/pynq_example.ipynb new file mode 100644 index 0000000000..f0e62618b7 --- /dev/null +++ b/test/pytest/test_backend/pynq_example.ipynb @@ -0,0 +1,755 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "id": "a0e69eaf", + "metadata": {}, + "outputs": [], + "source": [ + "# import the library\n", + "from pynq import Overlay # import the overlay\n", + "from pynq import allocate # import for CMA (contingeous memory allocation)\n", + "from pynq import DefaultIP # import the ip connector library for extension\n", + "from pynq import Interrupt\n", + "import pynq_driver\n", + "import asyncio\n", + "import numpy as np\n", + "import os\n", + "import subprocess\n", + "import re\n", + "import time" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "id": "89082d04", + "metadata": {}, + "outputs": [ + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%microblaze/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "data": { + "application/javascript": [ + "\n", + "try {\n", + "require(['notebook/js/codecell'], function(codecell) {\n", + " codecell.CodeCell.options_default.highlight_modes[\n", + " 'magic_text/x-csrc'] = {'reg':[/^%%pybind11/]};\n", + " Jupyter.notebook.events.one('kernel_ready.Kernel', function(){\n", + " Jupyter.notebook.get_cells().map(function(cell){\n", + " if (cell.cell_type == 'code'){ cell.auto_highlight(); } }) ;\n", + " });\n", + "});\n", + "} catch (e) {};\n" + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Help on Overlay in module pynq.overlay:\n", + "\n", + "\n", + " Default documentation for overlay system.bit. The following\n", + " attributes are available on this overlay:\n", + " \n", + " IP Blocks\n", + " ----------\n", + " myproject_gem_1 : pynq_driver.MyDfxCtrl\n", + " axi_intc_0 : pynq.overlay.DefaultIP\n", + " ps_e : pynq.overlay.DefaultIP\n", + " \n", + " Hierarchies\n", + " -----------\n", + " None\n", + " \n", + " Interrupts\n", + " ----------\n", + " None\n", + " \n", + " GPIO Outputs\n", + " ------------\n", + " None\n", + " \n", + " Memories\n", + " ------------\n", + " PSDDR : Memory\n", + "\n" + ] + } + ], + "source": [ + "overlay = Overlay(\"system.bit\")\n", + "help(overlay)" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "id": "b19361e3", + "metadata": {}, + "outputs": [ + { + "data": { + "application/json": { + "axi_intc_0_intr_1_interrupt_concat/In0": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In0", + "index": 0 + }, + "axi_intc_0_intr_1_interrupt_concat/In1": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In1", + "index": 1 + }, + "axi_intc_0_intr_1_interrupt_concat/In10": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In10", + "index": 10 + }, + "axi_intc_0_intr_1_interrupt_concat/In11": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In11", + "index": 11 + }, + "axi_intc_0_intr_1_interrupt_concat/In12": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In12", + "index": 12 + }, + "axi_intc_0_intr_1_interrupt_concat/In13": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In13", + "index": 13 + }, + "axi_intc_0_intr_1_interrupt_concat/In14": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In14", + "index": 14 + }, + "axi_intc_0_intr_1_interrupt_concat/In15": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In15", + "index": 15 + }, + "axi_intc_0_intr_1_interrupt_concat/In16": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In16", + "index": 16 + }, + "axi_intc_0_intr_1_interrupt_concat/In17": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In17", + "index": 17 + }, + "axi_intc_0_intr_1_interrupt_concat/In18": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In18", + "index": 18 + }, + "axi_intc_0_intr_1_interrupt_concat/In19": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In19", + "index": 19 + }, + "axi_intc_0_intr_1_interrupt_concat/In2": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In2", + "index": 2 + }, + "axi_intc_0_intr_1_interrupt_concat/In20": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In20", + "index": 20 + }, + "axi_intc_0_intr_1_interrupt_concat/In21": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In21", + "index": 21 + }, + "axi_intc_0_intr_1_interrupt_concat/In22": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In22", + "index": 22 + }, + "axi_intc_0_intr_1_interrupt_concat/In23": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In23", + "index": 23 + }, + "axi_intc_0_intr_1_interrupt_concat/In24": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In24", + "index": 24 + }, + "axi_intc_0_intr_1_interrupt_concat/In25": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In25", + "index": 25 + }, + "axi_intc_0_intr_1_interrupt_concat/In26": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In26", + "index": 26 + }, + "axi_intc_0_intr_1_interrupt_concat/In27": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In27", + "index": 27 + }, + "axi_intc_0_intr_1_interrupt_concat/In28": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In28", + "index": 28 + }, + "axi_intc_0_intr_1_interrupt_concat/In29": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In29", + "index": 29 + }, + "axi_intc_0_intr_1_interrupt_concat/In3": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In3", + "index": 3 + }, + "axi_intc_0_intr_1_interrupt_concat/In30": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In30", + "index": 30 + }, + "axi_intc_0_intr_1_interrupt_concat/In31": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In31", + "index": 31 + }, + "axi_intc_0_intr_1_interrupt_concat/In4": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In4", + "index": 4 + }, + "axi_intc_0_intr_1_interrupt_concat/In5": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In5", + "index": 5 + }, + "axi_intc_0_intr_1_interrupt_concat/In6": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In6", + "index": 6 + }, + "axi_intc_0_intr_1_interrupt_concat/In7": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In7", + "index": 7 + }, + "axi_intc_0_intr_1_interrupt_concat/In8": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In8", + "index": 8 + }, + "axi_intc_0_intr_1_interrupt_concat/In9": { + "controller": "axi_intc_0", + "fullpath": "axi_intc_0_intr_1_interrupt_concat/In9", + "index": 9 + }, + "irq_const_tieoff/dout": { + "controller": "axi_intc_0", + "fullpath": "irq_const_tieoff/dout", + "index": 31 + }, + "myproject_gem_1/interrupt": { + "controller": "axi_intc_0", + "fullpath": "myproject_gem_1/interrupt", + "index": 1 + } + }, + "text/plain": [ + "{'irq_const_tieoff/dout': {'controller': 'axi_intc_0',\n", + " 'index': 31,\n", + " 'fullpath': 'irq_const_tieoff/dout'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In0': {'controller': 'axi_intc_0',\n", + " 'index': 0,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In0'},\n", + " 'myproject_gem_1/interrupt': {'controller': 'axi_intc_0',\n", + " 'index': 1,\n", + " 'fullpath': 'myproject_gem_1/interrupt'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In1': {'controller': 'axi_intc_0',\n", + " 'index': 1,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In1'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In2': {'controller': 'axi_intc_0',\n", + " 'index': 2,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In2'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In3': {'controller': 'axi_intc_0',\n", + " 'index': 3,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In3'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In4': {'controller': 'axi_intc_0',\n", + " 'index': 4,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In4'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In5': {'controller': 'axi_intc_0',\n", + " 'index': 5,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In5'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In6': {'controller': 'axi_intc_0',\n", + " 'index': 6,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In6'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In7': {'controller': 'axi_intc_0',\n", + " 'index': 7,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In7'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In8': {'controller': 'axi_intc_0',\n", + " 'index': 8,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In8'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In9': {'controller': 'axi_intc_0',\n", + " 'index': 9,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In9'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In10': {'controller': 'axi_intc_0',\n", + " 'index': 10,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In10'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In11': {'controller': 'axi_intc_0',\n", + " 'index': 11,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In11'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In12': {'controller': 'axi_intc_0',\n", + " 'index': 12,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In12'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In13': {'controller': 'axi_intc_0',\n", + " 'index': 13,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In13'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In14': {'controller': 'axi_intc_0',\n", + " 'index': 14,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In14'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In15': {'controller': 'axi_intc_0',\n", + " 'index': 15,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In15'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In16': {'controller': 'axi_intc_0',\n", + " 'index': 16,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In16'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In17': {'controller': 'axi_intc_0',\n", + " 'index': 17,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In17'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In18': {'controller': 'axi_intc_0',\n", + " 'index': 18,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In18'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In19': {'controller': 'axi_intc_0',\n", + " 'index': 19,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In19'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In20': {'controller': 'axi_intc_0',\n", + " 'index': 20,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In20'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In21': {'controller': 'axi_intc_0',\n", + " 'index': 21,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In21'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In22': {'controller': 'axi_intc_0',\n", + " 'index': 22,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In22'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In23': {'controller': 'axi_intc_0',\n", + " 'index': 23,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In23'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In24': {'controller': 'axi_intc_0',\n", + " 'index': 24,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In24'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In25': {'controller': 'axi_intc_0',\n", + " 'index': 25,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In25'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In26': {'controller': 'axi_intc_0',\n", + " 'index': 26,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In26'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In27': {'controller': 'axi_intc_0',\n", + " 'index': 27,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In27'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In28': {'controller': 'axi_intc_0',\n", + " 'index': 28,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In28'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In29': {'controller': 'axi_intc_0',\n", + " 'index': 29,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In29'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In30': {'controller': 'axi_intc_0',\n", + " 'index': 30,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In30'},\n", + " 'axi_intc_0_intr_1_interrupt_concat/In31': {'controller': 'axi_intc_0',\n", + " 'index': 31,\n", + " 'fullpath': 'axi_intc_0_intr_1_interrupt_concat/In31'}}" + ] + }, + "execution_count": 3, + "metadata": { + "application/json": { + "expanded": false, + "root": "interrupt_pins" + } + }, + "output_type": "execute_result" + } + ], + "source": [ + "\n", + "overlay.interrupt_pins" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "id": "0e5b0e03", + "metadata": {}, + "outputs": [], + "source": [ + "# create an instance of the interrupt\n", + "my_interrupt = Interrupt('myproject_gem_1/interrupt')" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "id": "9459e8ae", + "metadata": {}, + "outputs": [], + "source": [ + "# Load input from .npy file\n", + "input_array = np.load(\"inputGenbit.npy\").astype(np.float32) # shape (20,4,4,1)\n", + "output_array = np.zeros(input_array.shape, dtype=np.float32)\n", + "\n", + "# Allocate physically contiguous memory for input and output\n", + "input_buffer = allocate(shape=input_array.shape, dtype=np.float32)\n", + "output_buffer = allocate(shape=output_array.shape, dtype=np.float32)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "id": "625c2b1f", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input array shape (10000, 4, 4, 1)\n", + "output array shape (10000, 4, 4, 1)\n" + ] + } + ], + "source": [ + "# check input shape\n", + "print(f\"input array shape {input_array.shape}\")\n", + "print(f\"output array shape {output_array.shape}\")" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "id": "7de66a89", + "metadata": {}, + "outputs": [], + "source": [ + "# copy data to input buffer\n", + "np.copyto(input_buffer, input_array)\n", + "input_buffer.flush()" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "id": "d18bac75", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "input gmem_in0_ptr_input_1 will be set to addr: 0x78400000 with elements: 160000\n", + "output gmem_out0_ptr_layer12_out will be set to addr: 0x78500000 with elements: 160000\n", + "amount of queries will be set to: 10000 at address: 0x28\n", + "prepare your interrupt\n", + "global interrupt enable register\n", + "enable gie successful\n", + "ap_done interrupt enable register\n", + "enable ap_done interrupt successful\n", + "ap_done register clear\n", + "clear ap_done interrupt successful\n", + "----------------------\n" + ] + } + ], + "source": [ + "# get the ip and initialize the system\n", + "ip = overlay.myproject_gem_1 # Replace with your IP instance name\n", + "ip.set_input (0, input_buffer)\n", + "ip.set_output(0, output_buffer)\n", + "ip.set_amt_query(input_array.shape[0])\n", + "ip.prepare_intr()" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "id": "d12a031a", + "metadata": {}, + "outputs": [], + "source": [ + "async def wait_for_acc():\n", + " print(\"starting the accelerator\")\n", + " ip.ctrl_start()\n", + " print(\"waiting for the accelerator to finish\")\n", + " await my_interrupt.wait()\n", + " print(\"accelerator has finished\")\n" + ] + }, + { + "cell_type": "code", + "execution_count": 10, + "id": "81750e60", + "metadata": {}, + "outputs": [], + "source": [ + "#### get event loop from asyncio\n", + "loop = asyncio.get_event_loop()" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "id": "fe5f7eb2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "starting the accelerator\n", + "waiting for the accelerator to finish\n", + "accelerator has finished\n" + ] + } + ], + "source": [ + "task = loop.create_task(wait_for_acc())\n", + "loop.run_until_complete(task)" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "id": "372b1982", + "metadata": {}, + "outputs": [], + "source": [ + "output_buffer.invalidate()" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "id": "6a7834c2", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "[[[[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.49609375]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.4921875 ]\n", + " [0.4765625 ]]]\n", + "\n", + "\n", + " [[[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.48046875]\n", + " [0.48046875]]]\n", + "\n", + "\n", + " [[[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.49609375]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.48828125]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.4921875 ]]]\n", + "\n", + "\n", + " ...\n", + "\n", + "\n", + " [[[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.48046875]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.4921875 ]\n", + " [0.46875 ]]]\n", + "\n", + "\n", + " [[[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.48046875]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.49609375]\n", + " [0.484375 ]]]\n", + "\n", + "\n", + " [[[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.5 ]\n", + " [0.49609375]]\n", + "\n", + " [[0.5 ]\n", + " [0.5 ]\n", + " [0.48828125]\n", + " [0.48828125]]]]\n" + ] + } + ], + "source": [ + "print(output_buffer)" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "31e1098e", + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "we got output shape: (10000, 4, 4, 1)\n" + ] + } + ], + "source": [ + "# convert it to numpy array\n", + "print(\"we got output shape:\", output_buffer.shape)\n", + "outNp = np.array(output_buffer)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "9bf5d406", + "metadata": {}, + "outputs": [], + "source": [ + "# save it to .npy file\n", + "np.save(\"out_hw.npy\", outNp)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.4" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/test/pytest/test_backend/vitis_unified.py b/test/pytest/test_backend/vitis_unified.py new file mode 100644 index 0000000000..04f445021c --- /dev/null +++ b/test/pytest/test_backend/vitis_unified.py @@ -0,0 +1,259 @@ +import os +from pathlib import Path + +import numpy as np +import pytest +from tensorflow.keras.layers import ( + Concatenate, + Conv2D, + Input, + MaxPooling2D, + UpSampling2D, +) +from tensorflow.keras.models import Model, load_model + +import hls4ml +import hls4ml.model + +test_root_path = Path(__file__).parent + +os.environ['XILINX_VITIS'] = "/tools/Xilinx/Vitis/2023.2" +os.environ['PATH'] = os.environ['XILINX_VITIS'] + '/bin:' + os.environ['PATH'] + +XPFM_PATH = "/tools/Xilinx/Vitis/2023.2/base_platforms/" "xilinx_zcu102_base_202320_1/xilinx_zcu102_base_202320_1.xpfm" +LOG_STD = True + + +def create_io_file_dir(): + os.makedirs(test_root_path / "input_file", exist_ok=True) + os.makedirs(test_root_path / "output_file", exist_ok=True) + + +def checkEqual(a, b): + + equal = np.array_equal(a, b) + if equal: + print("Test pass both are equal \U0001f642") + else: + print("Test Fail both are not equal \U0001f62c") + return equal + + +def create_simple_testcase(inputShape=(4, 4, 1), fileName="inputX.npy"): + n_in = np.random.rand(*inputShape).astype(np.float32) + os.makedirs(test_root_path / "input_file", exist_ok=True) + np.save(test_root_path / "input_file" / fileName, n_in) + + +def create_simple_unet(input_shape=(4, 4, 1), modelName="simpleSkip.keras"): + inputs = Input(input_shape) + # Encoder + c1 = Conv2D(2, (3, 3), activation='relu', padding='same')(inputs) + p1 = MaxPooling2D((2, 2))(c1) + # Bottleneck + bn = Conv2D(4, (3, 3), activation='relu', padding='same')(p1) + # Decoder + u1 = UpSampling2D((2, 2))(bn) + concat1 = Concatenate()([u1, c1]) + c2 = Conv2D(2, (3, 3), activation='relu', padding='same')(concat1) + # Output layer (1 channel) + outputs = Conv2D(1, (1, 1), activation='sigmoid')(c2) + model = Model(inputs, outputs) + model.compile(optimizer='adam', loss='binary_crossentropy') + model.save(test_root_path / "input_file" / modelName) + + +def gen_prj_dir(backend, io_type, strategy, granularity, prefix): + return str(test_root_path / f"hls4mlprj_{prefix}_{backend}_{strategy}_{io_type}_{granularity}") + + +def create_hls_model(model, config, backend, io_type, strategy, granularity, prefix): + output_dir = gen_prj_dir(backend, io_type, strategy, granularity, prefix) + # mono model build + hls_model = hls4ml.converters.convert_from_keras_model( + model, + hls_config=config, + output_dir=output_dir, + backend=backend, + io_type=io_type, + board='zcu102', + part='xczu9eg-ffvb1156-2-e', + clock_period='10ns', + input_type="float", + output_type="float", + xpfmPath=XPFM_PATH, + ) + hls_model.compile() + return hls_model + + +def create_hls_model4_cosim(model, config, backend, io_type, strategy, granularity, input_data_tb, output_data_tb, prefix): + output_dir = gen_prj_dir(backend, io_type, strategy, granularity, prefix) + # mono model build + hls_model = hls4ml.converters.convert_from_keras_model( + model, + hls_config=config, + output_dir=output_dir, + backend=backend, + io_type=io_type, + board='zcu102', + part='xczu9eg-ffvb1156-2-e', + clock_period='10ns', + input_type="float", + output_type="float", + input_data_tb=input_data_tb, + output_data_tb=output_data_tb, + ) + hls_model.compile() + return hls_model + + +def predict_hls_model(hls_model, input_data): + y_hls4ml = hls_model.predict(input_data) + return y_hls4ml + + +@pytest.mark.parametrize('io_type', ['io_stream']) +@pytest.mark.parametrize('strategy', ['latency']) +@pytest.mark.parametrize('granularity', ['name']) +@pytest.mark.parametrize('amt_query', [10]) +def test_backend_predict(io_type, strategy, granularity, amt_query): + create_io_file_dir() + # create and load data set + create_simple_testcase(inputShape=(amt_query, 4, 4, 1), fileName="inputX.npy") + input_data = np.load(test_root_path / "input_file" / "inputX.npy") + # create and load model + model_name = "simpleSkip.keras" + create_simple_unet(modelName=model_name) + model = load_model(test_root_path / "input_file" / model_name) + # config the keras model + config = hls4ml.utils.config_from_keras_model(model, granularity=granularity) + + # create hls4ml model + vitis_unified_model = create_hls_model(model, config, "VitisUnified", io_type, strategy, granularity, "bridge") + vitis_model = create_hls_model(model, config, "Vitis", io_type, strategy, granularity, "bridge") + + # predict test + + y_hls4ml_unified = predict_hls_model(vitis_unified_model, input_data) + y_hls4ml = predict_hls_model(vitis_model, input_data) + + assert checkEqual(y_hls4ml_unified, y_hls4ml), "the result from vitis unified and vitis are not equal!" + + +# test_backend_predict("io_stream", 'latency', 'name', 10) + + +@pytest.mark.parametrize('io_type', ['io_stream']) +@pytest.mark.parametrize('strategy', ['latency']) +@pytest.mark.parametrize('granularity', ['name']) +@pytest.mark.parametrize('amt_query', [10]) +def test_co_simulation(io_type, strategy, granularity, amt_query): + create_io_file_dir() + # create and load data set + create_simple_testcase(inputShape=(amt_query, 4, 4, 1), fileName="inputCosim.npy") + input_data = np.load(test_root_path / "input_file" / "inputCosim.npy") + # create and load model + model_name = "simpleSkipCosim.keras" + create_simple_unet(modelName=model_name) + model = load_model(test_root_path / "input_file" / model_name) + # config the keras model + config = hls4ml.utils.config_from_keras_model(model, granularity=granularity) + + # predict it first + vitis_unified_model = create_hls_model(model, config, "VitisUnified", io_type, strategy, granularity, "precosim") + y_hls4ml_unified = predict_hls_model(vitis_unified_model, input_data) + np.save(test_root_path / "output_file" / "outputCosim.npy", y_hls4ml_unified) + + input_data_tb = str(test_root_path / "input_file" / "inputCosim.npy") + output_data_tb = str(test_root_path / "output_file" / "outputCosim.npy") + + # create hls4ml model + vitis_unified_model_cosim = create_hls_model4_cosim( + model, config, "VitisUnified", io_type, strategy, granularity, input_data_tb, output_data_tb, "cosim" + ) + # do cosim + vitis_unified_model_cosim.compile() + vitis_unified_model_cosim.build(synth=True, cosim=True, log_to_stdout=LOG_STD) + + bridge_result_path = ( + gen_prj_dir("VitisUnified", io_type, strategy, granularity, "cosim") + "/tb_data/tb_output_predictions.dat" + ) + cosim_result_path = ( + gen_prj_dir("VitisUnified", io_type, strategy, granularity, "cosim") + "/tb_data/rtl_cosim_results.log" + ) + + bridge_result = np.loadtxt(bridge_result_path) + cosim_result = np.loadtxt(cosim_result_path) + + assert np.allclose(bridge_result, cosim_result, rtol=0.0, atol=1e-4), "the result from bridge and cosim are not equal!" + + +# test_co_simulation("io_stream", 'latency', 'name', 10) + + +@pytest.mark.parametrize('io_type', ['io_stream']) +@pytest.mark.parametrize('strategy', ['latency']) +@pytest.mark.parametrize('granularity', ['name']) +@pytest.mark.parametrize('amt_query', [10]) +def test_fifo_depth(io_type, strategy, granularity, amt_query): + create_io_file_dir() + # create and load data set + create_simple_testcase(inputShape=(amt_query, 4, 4, 1), fileName="inputFifoDepth.npy") + input_data = np.load(test_root_path / "input_file" / "inputFifoDepth.npy") + # create and load model + model_name = "simpleSkipFifoDepth.keras" + create_simple_unet(modelName=model_name) + model = load_model(test_root_path / "input_file" / model_name) + # config the keras model + config = hls4ml.utils.config_from_keras_model(model, granularity=granularity) + + # predict it first + vitis_unified_model = create_hls_model(model, config, "VitisUnified", io_type, strategy, granularity, "fifodepth") + y_hls4ml_unified = predict_hls_model(vitis_unified_model, input_data) + np.save(test_root_path / "output_file" / "outputFifoDepth.npy", y_hls4ml_unified) + + input_data_tb = str(test_root_path / "input_file" / "inputFifoDepth.npy") + output_data_tb = str(test_root_path / "output_file" / "outputFifoDepth.npy") + + # create hls4ml model + config['Flows'] = ['vitisunified:fifo_depth_optimization'] + vitis_unified_model_fifo = create_hls_model4_cosim( + model, config, "VitisUnified", io_type, strategy, granularity, input_data_tb, output_data_tb, "fifodepth" + ) + # do cosim + vitis_unified_model_fifo.compile() + + fifodepth_result_path = gen_prj_dir("VitisUnified", io_type, strategy, granularity, "fifodepth") + "/fifo_depths.json" + assert os.path.exists(fifodepth_result_path), "the fifo_depth file is not exist" + + +# test_fifo_depth("io_stream", 'latency', 'name', 10) + + +@pytest.mark.parametrize('io_type', ['io_stream']) +@pytest.mark.parametrize('strategy', ['latency']) +@pytest.mark.parametrize('granularity', ['name']) +@pytest.mark.parametrize('amt_query', [10000]) +def test_gen_unified(io_type, strategy, granularity, amt_query): + create_io_file_dir() + # create and load data set + create_simple_testcase(inputShape=(amt_query, 4, 4, 1), fileName="inputGenbit.npy") + input_data = np.load(test_root_path / "input_file" / "inputGenbit.npy") + # create and load model + model_name = "simpleSkipGenBit.keras" + create_simple_unet(modelName=model_name) + model = load_model(test_root_path / "input_file" / model_name) + # config the keras model + config = hls4ml.utils.config_from_keras_model(model, granularity=granularity) + + # predict it first + vitis_unified_model = create_hls_model(model, config, "VitisUnified", io_type, strategy, granularity, "gen_unified") + y_hls4ml_unified = predict_hls_model(vitis_unified_model, input_data) + np.save(test_root_path / "output_file" / "outputGenbit.npy", y_hls4ml_unified) + + vitis_unified_model.compile() + vitis_unified_model.build(synth=True, bitfile=True, log_to_stdout=LOG_STD) + + +# test_gen_unified("io_stream", 'latency', 'name', 10000)