Skip to content

Commit 8409d0c

Browse files
Update host code for clarity & better data handling
1 parent 361f698 commit 8409d0c

File tree

11 files changed

+630
-441
lines changed

11 files changed

+630
-441
lines changed

hls4ml/backends/vitis_accelerator/vitis_accelerator_backend.py

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ def create_initial_config(
2222
clock_uncertainty='27%',
2323
io_type="io_parallel",
2424
num_kernel=1,
25-
num_thread=1,
25+
num_worker=1,
2626
batchsize=8192,
2727
hw_quant=False,
2828
vivado_directives=None,
@@ -36,7 +36,7 @@ def create_initial_config(
3636
clock_period: clock period passed to hls project
3737
io_type: io_parallel or io_stream
3838
num_kernel: how many compute units to create on the fpga
39-
num_thread: how many threads the host cpu uses to drive the fpga
39+
num_worker: how many threads the host cpu uses to drive each CU on the fpga
4040
batchsize: how many samples to process within a single buffer on the fpga
4141
vivado_directives: Directives passed down to Vivado that controls the hardware synthesis and implementation steps
4242
Returns:
@@ -47,7 +47,7 @@ def create_initial_config(
4747
config["AcceleratorConfig"] = {}
4848
config["AcceleratorConfig"]["Board"] = board
4949
config["AcceleratorConfig"]["Num_Kernel"] = num_kernel
50-
config["AcceleratorConfig"]["Num_Thread"] = num_thread
50+
config["AcceleratorConfig"]["Num_Worker"] = num_worker
5151
config["AcceleratorConfig"]["Batchsize"] = batchsize
5252
config["AcceleratorConfig"]["HW_Quant"] = hw_quant
5353
config["AcceleratorConfig"]["Vivado_Directives"] = vivado_directives
@@ -112,11 +112,15 @@ def dat_to_numpy(self, model):
112112
y = np.loadtxt(output_file, dtype=float).reshape(-1, expected_shape)
113113
return y
114114

115-
def hardware_predict(self, model, x, target="hw", debug=False):
115+
def hardware_predict(self, model, x, target="hw", debug=False, profilingRepeat=-1):
116116
if debug:
117117
command = "DEBUG=1 "
118+
if isinstance(profilingRepeat, int) and profilingRepeat > 0:
119+
command += "PROFILING_DATA_REPEAT_COUNT=" + profilingRepeat + " "
118120
self._validate_target(target)
121+
119122
self.numpy_to_dat(model, x)
123+
120124
currdir = os.getcwd()
121125
os.chdir(model.config.get_output_dir())
122126
command += "TARGET=" + target + " make run"

hls4ml/backends/vitis_accelerator/vitis_accelerator_config.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ def __init__(self, config):
3030
self.config["Part"] = self.part
3131

3232
self.num_kernel = accel_config.get("Num_Kernel", 1)
33-
self.num_thread = accel_config.get("Num_Thread", 1)
33+
self.num_worker = accel_config.get("Num_Worker", 1)
3434
self.batchsize = accel_config.get("Batchsize", 8192)
3535
self.hw_quant = accel_config.get("HW_Quant", False)
3636

@@ -42,8 +42,8 @@ def get_board_type(self):
4242
def get_platform(self):
4343
return self.platform
4444

45-
def get_num_thread(self):
46-
return self.num_thread
45+
def get_num_worker(self):
46+
return self.num_worker
4747

4848
def get_num_kernel(self):
4949
return self.num_kernel

hls4ml/templates/vitis_accelerator/Makefile

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@ CARD_CFG ?= accelerator_card.cfg
3232
PLATFORM ?= $(shell awk -F '=' '/platform=/ {print $$2}' $(CARD_CFG))
3333

3434
# Board Type (determines whether design will go through packaging step)
35-
BOARD_TYPE :=
35+
BOARD_TYPE := #BOARDTYPE
3636

3737
# Kernel name
3838
KERNEL_NAME := #PRJNAME
@@ -115,8 +115,8 @@ xclbin: $(BUILD_DIR)/$(WRAPPER_NAME).xclbin
115115
INCLUDES := -I$(XILINX_XRT)/include/ -I$(XILINX_VIVADO)/include/ -I$(XILINX_HLS)/include/
116116
INCLUDES += -I$(PWD)/libs/ -I$(PWD)/firmware/ -I$(PWD)/firmware/nnet_utils/
117117

118-
host: $(KERNEL_NAME)_host_cl.cpp libs/xcl2.cpp
119-
$(CXX) $(CXXFLAGS) $^ -o $@ $(INCLUDES) $(LDFLAGS)
118+
host: $(KERNEL_NAME)_host_cl.cpp libs/xcl2.cpp $(wildcard libs/*.hpp)
119+
$(CXX) $(CXXFLAGS) $(KERNEL_NAME)_host_cl.cpp libs/xcl2.cpp -o $@ $(INCLUDES) $(LDFLAGS)
120120

121121
# Execute program #############################################################
122122

@@ -128,7 +128,7 @@ else
128128
@echo "Setting XCL_EMULATION_MODE to $(TARGET)"
129129
$(eval EMULATION_MODE := XCL_EMULATION_MODE=$(TARGET))
130130
endif
131-
@cd firmware && $(EMULATION_MODE) ../host ../$(BUILD_DIR)/$(WRAPPER_NAME).xclbin
131+
@cd firmware && $(EMULATION_MODE) ../host ../$(BUILD_DIR)/$(WRAPPER_NAME).xclbin $(PROFILING_DATA_REPEAT_COUNT)
132132

133133
# Cleanup #####################################################################
134134

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
#pragma once
2+
3+
#include <cmath>
4+
#include <cstdint>
5+
#include <fstream>
6+
#include <iostream>
7+
#include <list>
8+
#include <stdexcept>
9+
#include <sstream>
10+
#include <string>
11+
#include <vector>
12+
13+
#include "Types.hpp"
14+
15+
template <class T, class U> class DataBatcher {
16+
public:
17+
/**
18+
* \brief Constructor
19+
* \param batchsize Number of samples
20+
* \param sampleInputSize Flattened length of a single input to the model
21+
* \param sampleOutputSize Flattened length of a single output from the model
22+
* \param numWorkers Total number of workers
23+
* \param profiling If true, the given data will be iterated over multiple times,
24+
* for more accurate throughput testing.
25+
* \param profilingDataRepeat Only used if profiling is set to True. Additional number of
26+
* times the given data is iterated over.
27+
*/
28+
DataBatcher(int batchsize, int sampleInputSize, int sampleOutputSize, int numWorkers,
29+
bool profiling, int profilingDataRepeat)
30+
: _batchsize(batchsize), _sampleInputSize(sampleInputSize), _sampleOutputSize(sampleOutputSize),
31+
_numWorkers(numWorkers), _profiling(profiling), _profilingDataRepeat(profilingDataRepeat) {}
32+
33+
/**
34+
* \brief Read in data to a buffer. Allocate space for results.
35+
* \param filename Filename.
36+
* \param s Type of input, currently supports text files used by VitisAccelerator backend, and
37+
* binary files produced by NumPy's toFile() function
38+
*/
39+
void read(const std::string& filename) {
40+
std::cout << "\nReading data from text file " << filename << std::endl;
41+
42+
// Read in text file
43+
std::ifstream fin(filename);
44+
if (!fin.is_open()) {
45+
throw std::runtime_error("Error opening file " + filename);
46+
}
47+
48+
std::string line;
49+
while (std::getline(fin, line)) {
50+
originalSampleCount++;
51+
std::istringstream parser(line);
52+
T val;
53+
while (parser >> val) {
54+
inputData.push_back(val);
55+
}
56+
if (!parser.eof()) {
57+
throw std::runtime_error("Failed to parse value on line " + std::to_string(originalSampleCount));
58+
}
59+
}
60+
std::cout << "Read in " << originalSampleCount << " lines" << std::endl;
61+
fin.close();
62+
63+
// Zero-pad
64+
numBatches = std::ceil(static_cast<double>(originalSampleCount) / _batchsize);
65+
if (numBatches * _batchsize > originalSampleCount) {
66+
inputData.resize(numBatches * _batchsize * _sampleInputSize, (T)0);
67+
}
68+
}
69+
70+
/**
71+
* \brief Allocate space for writing results to.
72+
*/
73+
void createResultBuffers() {
74+
storedEvalResults.resize(numBatches * _batchsize * _sampleOutputSize, (U)0);
75+
76+
// Allocate space to dump the extra arbitrary data used during profiling
77+
if (_profiling) {
78+
profilingResultsDump.resize(_numWorkers * _batchsize * _sampleOutputSize, (U)0);
79+
}
80+
}
81+
82+
/**
83+
* \brief Splits data into batches and distributes batches evenly amongst Workers.
84+
* \param batchedData A vector of containers for each Worker's batches/workload.
85+
* Size must be equal to _numWorkers.
86+
*/
87+
void batch(std::vector<std::list<Batch<T, U>>>& batchedData) {
88+
if (inputData.size() == 0 || originalSampleCount == 0) {
89+
throw std::runtime_error("No data to batch");
90+
}
91+
if (storedEvalResults.size() == 0) {
92+
throw std::runtime_error("Create result buffers first");
93+
}
94+
95+
batchedData.reserve(_numWorkers);
96+
for (int i = 0; i < _numWorkers; i++) {
97+
batchedData.emplace_back();
98+
}
99+
100+
uint64_t batchIndex = 0;
101+
while (batchIndex < numBatches) {
102+
int worker = batchIndex % _numWorkers;
103+
uint64_t inputLocation = batchIndex * _batchsize * _sampleInputSize;
104+
uint64_t outputLocation = batchIndex * _batchsize * _sampleOutputSize;
105+
106+
const T* in = &inputData[inputLocation];
107+
U* out = &storedEvalResults[outputLocation];
108+
Batch<T, U> newBatch = {in, out};
109+
110+
batchedData[worker].push_back(newBatch);
111+
batchIndex++;
112+
}
113+
114+
if (_profiling) {
115+
std::cout << "Creating profiling batches" << std::endl;
116+
profilingBatchCount = numBatches * (_profilingDataRepeat + 1);
117+
while (batchIndex < profilingBatchCount) {
118+
int worker = batchIndex % _numWorkers;
119+
uint64_t inputLocation = (batchIndex % numBatches) * _batchsize * _sampleInputSize;
120+
uint64_t outputLocation = worker * _batchsize * _sampleOutputSize;
121+
122+
const T* in = &inputData[inputLocation];
123+
U* out = &profilingResultsDump[outputLocation];
124+
Batch<T, U> newBatch = {in, out};
125+
126+
batchedData[worker].push_back(newBatch);
127+
batchIndex++;
128+
}
129+
}
130+
}
131+
132+
/**
133+
* \brief Releases resources used when reading from input files. Note: Data from those files
134+
* will be cleared and will no longer be accessible.
135+
*/
136+
void closeFile() {
137+
inputData.clear();
138+
139+
originalSampleCount = 0;
140+
numBatches = 0;
141+
profilingBatchCount = 0;
142+
}
143+
144+
void write(const std::string& filename) {
145+
std::cout << "\nWriting HW results to file " << filename << std::endl;
146+
std::ofstream fout;
147+
fout.open(filename, std::ios::trunc);
148+
149+
if (fout.is_open()) {
150+
for (uint64_t i = 0; i < originalSampleCount; i++) {
151+
std::stringstream line;
152+
for (int n = 0; n < _sampleOutputSize; n++) {
153+
line << (float)storedEvalResults[(i * _sampleOutputSize) + n] << " ";
154+
}
155+
fout << line.str() << "\n";
156+
}
157+
fout.close();
158+
} else {
159+
throw std::runtime_error("Error writing to file " + filename);
160+
}
161+
162+
storedEvalResults.clear();
163+
profilingResultsDump.clear();
164+
}
165+
166+
uint64_t getSampleCount() {
167+
return originalSampleCount;
168+
}
169+
170+
uint64_t getPaddedSampleCount() {
171+
return numBatches * _batchsize;
172+
}
173+
174+
uint64_t getProfilingSampleCount() {
175+
return profilingBatchCount * _batchsize;
176+
}
177+
178+
bool isProfilingMode() {
179+
return _profiling;
180+
}
181+
182+
private:
183+
int _batchsize;
184+
int _sampleInputSize;
185+
int _sampleOutputSize;
186+
int _numWorkers;
187+
bool _profiling;
188+
int _profilingDataRepeat;
189+
190+
/// @brief Number of floats read in. (Not including padding).
191+
uint64_t originalSampleCount = 0;
192+
/// @brief Number of batches of data. (After padding).
193+
uint64_t numBatches = 0;
194+
/// @brief Effective number of batches of data being evaluted.
195+
uint64_t profilingBatchCount = 0;
196+
/// @brief Vector with values.
197+
std::vector<T> inputData;
198+
/// @brief Vector to store evaluation results.
199+
std::vector<U> storedEvalResults;
200+
/// @brief Vector for dumping results from extra arbitrary data used during profiling.
201+
std::vector<U> profilingResultsDump;
202+
};

hls4ml/templates/vitis_accelerator/libs/DdrFpga.hpp

Lines changed: 0 additions & 33 deletions
This file was deleted.

0 commit comments

Comments
 (0)