From d0b536c8d056a1382054442092669eee67207268 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sun, 12 Oct 2025 22:41:11 +0530 Subject: [PATCH 01/34] python AST to backend flattened AST --- .vscode/settings.json | 37 + build/lib/charmnumeric/__init__.py | 1 + build/lib/charmnumeric/array.py | 355 ++++++++ build/lib/charmnumeric/ast.py | 127 +++ build/lib/charmnumeric/ccs.py | 124 +++ build/lib/charmnumeric/linalg.py | 22 + charmnumeric.egg-info/PKG-INFO | 143 ++++ charmnumeric.egg-info/SOURCES.txt | 12 + charmnumeric.egg-info/dependency_links.txt | 1 + charmnumeric.egg-info/requires.txt | 12 + charmnumeric.egg-info/top_level.txt | 1 + charmnumeric/array.py | 14 +- charmnumeric/ast.py | 58 +- config.cmake | 12 + dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 0 -> 22101 bytes examples/graph.py | 23 +- examples/run.sh | 4 + playground/ha.py | 12 + src/.gitignore | 1 + src/CMakeLists.txt | 51 ++ src/ast.hpp | 267 ++++-- src/server.cpp | 7 +- src/server.decl.h | 148 ++++ src/server.def.h | 306 +++++++ src/server.hpp | 926 +-------------------- 25 files changed, 1605 insertions(+), 1059 deletions(-) create mode 100644 .vscode/settings.json create mode 100644 build/lib/charmnumeric/__init__.py create mode 100644 build/lib/charmnumeric/array.py create mode 100644 build/lib/charmnumeric/ast.py create mode 100644 build/lib/charmnumeric/ccs.py create mode 100644 build/lib/charmnumeric/linalg.py create mode 100644 charmnumeric.egg-info/PKG-INFO create mode 100644 charmnumeric.egg-info/SOURCES.txt create mode 100644 charmnumeric.egg-info/dependency_links.txt create mode 100644 charmnumeric.egg-info/requires.txt create mode 100644 charmnumeric.egg-info/top_level.txt create mode 100644 config.cmake create mode 100644 dist/charmnumeric-0.1.dev0-py3.12.egg create mode 100755 examples/run.sh create mode 100644 playground/ha.py create mode 100644 src/.gitignore create mode 100644 src/CMakeLists.txt create mode 100644 src/server.decl.h create mode 100644 src/server.def.h diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..8c99cd8 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,37 @@ +{ + "files.associations": { + "*.sage": "python", + "type_traits": "cpp", + "ostream": "cpp", + "__node_handle": "cpp", + "cstdint": "cpp", + "array": "cpp", + "deque": "cpp", + "forward_list": "cpp", + "list": "cpp", + "string": "cpp", + "unordered_map": "cpp", + "vector": "cpp", + "string_view": "cpp", + "initializer_list": "cpp", + "ranges": "cpp", + "span": "cpp", + "chrono": "cpp", + "format": "cpp", + "text_encoding": "cpp", + "__bit_reference": "cpp", + "__hash_table": "cpp", + "__split_buffer": "cpp", + "__tree": "cpp", + "iterator": "cpp", + "map": "cpp", + "bitset": "cpp", + "utility": "cpp", + "queue": "cpp", + "random": "cpp", + "set": "cpp", + "stack": "cpp", + "tuple": "cpp", + "iosfwd": "cpp" + } +} \ No newline at end of file diff --git a/build/lib/charmnumeric/__init__.py b/build/lib/charmnumeric/__init__.py new file mode 100644 index 0000000..a1c1976 --- /dev/null +++ b/build/lib/charmnumeric/__init__.py @@ -0,0 +1 @@ +__version__ = '0.1.dev' diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py new file mode 100644 index 0000000..47162f7 --- /dev/null +++ b/build/lib/charmnumeric/array.py @@ -0,0 +1,355 @@ +import sys +import warnings +import numpy as np +import weakref +import sys +from charmnumeric.ast import get_max_depth, ASTNode +from charmnumeric.ccs import to_bytes, from_bytes, send_command_raw, send_command, \ + send_command_async, connect, get_creation_command, \ + get_epoch, get_name, get_fetch_command, Handlers, OPCODES, is_debug + + +deletion_buffer = b'' +deletion_buffer_size = 0 + + +def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None): + z = ndarray(ndim, dtype=dtype, shape=shape, name=name, + command_buffer=command_buffer) + return z + + +def from_numpy(nparr): + return ndarray(nparr.ndim, dtype=nparr.dtype, shape=nparr.shape, + nparr=nparr) + + +class ndarray: + def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, + nparr=None, name=None, command_buffer=None): + """ + This is the wrapper class for AUM array objects. + The argument 'name' should be None except when wrapping + an array that already exists on the AUM backend server + """ + + if ndim > 2: + raise NotImplementedError("Arrays of dimensionality greater than" + "2 not supported yet") + self.dtype = dtype + self.ndim = ndim + self.itemsize = np.dtype(dtype).itemsize + self.init_value = init_value + self.command_buffer = command_buffer + if isinstance(shape, np.ndarray) or isinstance(shape, list) or \ + isinstance(shape, tuple): + self.shape = np.asarray(shape, dtype=np.int32) + elif shape is not None: + self.shape = np.asarray([shape], dtype=np.int32) + else: + self.shape = np.zeros(self.ndim, dtype=np.int32) + self.valid = False + if command_buffer is None: + self.valid = True + if name: + self.name = name + #self.command_buffer = None + self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) + else: + self.name = get_name() + if nparr is not None: + buf = nparr.tobytes() + else: + buf = None + cmd = get_creation_command(self, self.name, self.shape, buf=buf) + send_command_async(Handlers.creation_handler, cmd) + #self.command_buffer = None + self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) + else: + self.name = name + max_depth = get_max_depth() + if self.command_buffer.depth >= max_depth: + if is_debug(): + print("Maximum AST depth exceeded for %i, " + "flushing buffer" % self.name) + self._flush_command_buffer() + + def __del__(self): + global deletion_buffer, deletion_buffer_size + if self.valid: + deletion_buffer += to_bytes(self.name, 'L') + deletion_buffer_size += 1 + + def __len__(self): + return self.shape[0] + + #def __str__(self): + # print(self.get()) + + #def __repr__(self): + # #self._flush_command_buffer() + # # FIXME add repr + # pass + + def __setitem__(self, key, value): + if not isinstance(key, slice) or key.start != None or \ + key.stop != None or key.step != None: + raise ValueError("Can't set items or slices") + self.cmd_buffer = ASTNode(res, OPCODES.get('setitem'), [self, value]) + + def __neg__(self): + return self * -1 + + def __add__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('+'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __radd__(self, other): + return self + other + + def __sub__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('-'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rsub__(self, other): + return -1 * (self - other) + + def __lt__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('<'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rlt__(self, other): + return self >= other + + def __gt__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('>'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rgt__(self, other): + return self <= other + + def __le__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('<='), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rle__(self, other): + return self > other + + def __ge__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('>='), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rge__(self, other): + return self < other + + def __eq__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('=='), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __req__(self, other): + return self == other + + def __ne__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('!='), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rne__(self, other): + return self != other + + def __and__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('&'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rand__(self, other): + return self & other + + def __or__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('|'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __ror__(self, other): + return self | other + + def __invert__(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('!'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __mul__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('*'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rmul__(self, other): + return self * other + + def __truediv__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('/'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __rtruediv__(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('/'), [1., self/other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def __matmul__(self, other): + if self.ndim == 2 and other.ndim == 2: + res_ndim = 2 + shape = np.array([self.shape[0], other.shape[1]], dtype=np.int32) + elif self.ndim == 2 and other.ndim == 1: + res_ndim = 1 + shape = np.array([self.shape[0]], dtype=np.int32) + elif self.ndim == 1 and other.ndim == 1: + res_ndim = 0 + shape = np.array([1], dtype=np.int32) + else: + raise RuntimeError("Dimension mismatch") + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('@'), [self, other]) + return create_ndarray(res_ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer) + + def _flush_command_buffer(self): + # send the command to server + # finally set command buffer to array name + global deletion_buffer, deletion_buffer_size + debug = is_debug() + if debug: + self.command_buffer.plot_graph() + if self.valid: + return + validated_arrays = {self.name : self} + cmd = self.command_buffer.get_command(validated_arrays) + reply_size = 0 + for name, arr in validated_arrays.items(): + reply_size += 8 + 8 * arr.ndim + if not debug: + cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd + cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd + send_command_async(Handlers.operation_handler, cmd) + deletion_buffer = b'' + deletion_buffer_size = 0 + for i in range(len(validated_arrays)): + arr = validated_arrays[name] + arr.validate() + else: + for name, arr in validated_arrays.items(): + arr.validate() + self.validate() + + def get(self): + self._flush_command_buffer() + cmd = get_fetch_command(self) + if self.ndim == 0: + total_size = self.itemsize + data_bytes = send_command_raw(Handlers.fetch_handler, cmd, reply_size=total_size) + return from_bytes(data_bytes, np.dtype(self.dtype).char) + else: + total_size = self.itemsize + for i in self.shape: + total_size*=i + data_ptr = send_command_raw(Handlers.fetch_handler, cmd, reply_size=int(total_size)) + return np.frombuffer(data_ptr, np.dtype(self.dtype)).copy().reshape(self.shape) + + def evaluate(self): + self._flush_command_buffer() + + def validate(self): + self.valid = True + self.command_buffer = ASTNode(self.name, 0, [self]) + + def copy(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('copy'), [self]) + return create_ndarray(self.ndim, self.dtype,shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + def sqrt(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=0.5) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def cbrt(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=1/3) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def pow(self, exponent): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=exponent) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log(self, base=np.e): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=base) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log10(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=10) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log2(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=2) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def exp(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('exp'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def absolute(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def where(self, other, third): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def any(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('any'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + def all(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('all'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + diff --git a/build/lib/charmnumeric/ast.py b/build/lib/charmnumeric/ast.py new file mode 100644 index 0000000..54011be --- /dev/null +++ b/build/lib/charmnumeric/ast.py @@ -0,0 +1,127 @@ +import numpy as np +import networkx as nx +import matplotlib.pyplot as plt +from ctypes import c_long +from networkx.drawing.nx_pydot import graphviz_layout +from charmnumeric.ccs import OPCODES, INV_OPCODES, to_bytes + + +max_depth = 10 + + +def set_max_depth(d): + global max_depth + max_depth = d + + +def get_max_depth(): + global max_depth + return max_depth + + +class ASTNode(object): + def __init__(self, name, opcode, operands, arg=0.0): + from charmtiles.array import ndarray + # contains opcode, operands + # operands are ndarrays + self.name = name + self.opcode = opcode + self.operands = operands + self.depth = 0 + self.arg = arg + if self.opcode != 0: + for op in self.operands: + if isinstance(op, ndarray): + self.depth = max(self.depth, 1 + op.command_buffer.depth) + + def get_command(self, validated_arrays, save=True): + from charmnumeric.array import ndarray + if self.opcode == 0: + cmd = to_bytes(self.opcode, 'L') + cmd += to_bytes(False, '?') + cmd += to_bytes(self.operands[0].name, 'L') + return cmd + cmd = to_bytes(self.opcode, 'L') + to_bytes(self.name, 'L') + cmd += to_bytes(save, '?') + to_bytes(len(self.operands), 'B') + for op in self.operands: + # an operand can also be a double + if isinstance(op, ndarray): + if op.name in validated_arrays: + opcmd = to_bytes(0, 'L') + opcmd += to_bytes(False, '?') + opcmd += to_bytes(op.name, 'L') + cmd += to_bytes(len(opcmd), 'I') + cmd += opcmd + else: + save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False + if save_op: + print("SAVING THIS LIL BISH") + print(op.name) + else: + print("NO SAVE") + print(op.name) + opcmd = op.command_buffer.get_command(validated_arrays, + save=save_op) + if not op.valid and save_op: + validated_arrays[op.name] = op + cmd += to_bytes(len(opcmd), 'I') + cmd += opcmd + elif isinstance(op, float) or isinstance(op, int): + opcmd = to_bytes(0, 'L') + opcmd += to_bytes(True, '?') + opcmd += to_bytes(op, 'd') + cmd += to_bytes(len(opcmd), 'I') + cmd += opcmd + cmd += to_bytes(self.arg, 'd') + return cmd + + def plot_graph(self, validated_arrays={}, G=None, node_map={}, + color_map={}, next_id=0, parent=None, save=True): + from charmnumeric.array import ndarray + if G is None: + G = nx.Graph() + if self.opcode == 0: + node_map[next_id] = 'a' + str(self.operands[0].name) + G.add_node(next_id) + if parent is not None: + G.add_edge(parent, next_id) + return next_id + 1 + opnode = next_id + G.add_node(next_id) + if parent is not None: + G.add_edge(parent, next_id) + node_map[next_id] = INV_OPCODES.get(self.opcode, '?') + if save: + color_map[next_id] = 'tab:red' + node_map[next_id] += (': a%i' % self.name) + next_id += 1 + for op in self.operands: + # an operand can also be a double + if isinstance(op, ndarray): + if op.name in validated_arrays: + G.add_node(next_id) + G.add_edge(opnode, next_id) + node_map[next_id] = 'a' + str(op.name) + color_map[next_id] = 'tab:green' + next_id += 1 + else: + save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False + if not op.valid and save_op: + #color_map[next_id] = 'tab:red' + validated_arrays[op.name] = op + next_id = op.command_buffer.plot_graph( + validated_arrays, G, node_map, color_map, next_id, + opnode, save_op) + elif isinstance(op, float) or isinstance(op, int): + G.add_node(next_id) + G.add_edge(opnode, next_id) + node_map[next_id] = op + next_id += 1 + if parent is None: + pos = graphviz_layout(G, prog='dot') + color_map_list = [color_map.get(node, 'tab:blue') for node in G] + nx.draw(G, pos, labels=node_map, node_color=color_map_list, + node_size=600, font_size=10) + plt.show() + return next_id + diff --git a/build/lib/charmnumeric/ccs.py b/build/lib/charmnumeric/ccs.py new file mode 100644 index 0000000..3e4639f --- /dev/null +++ b/build/lib/charmnumeric/ccs.py @@ -0,0 +1,124 @@ +import struct +import atexit +from pyccs import Server +from charmnumeric import array + +debug = False +server = None +client_id = 0 +next_name = 0 +epoch = 0 + +OPCODES = {'+': 1, '-': 2, '*': 3 ,'/': 4, '@': 5, 'copy': 6, 'axpy': 7, + 'axpy_multiplier': 8, 'setitem': 9, 'pow': 10, '>': 11, + '<': 12, '>=': 13, '<=': 14, '==': 15, '!=': 16, '&': 17, + '|': 18, '!':19, 'where':20, 'log': 21, 'exp': 22, 'abs': 23, 'any':24, 'all':25} + +INV_OPCODES = {v: k for k, v in OPCODES.items()} + +def enable_debug(): + global debug + debug = True + +def disable_debug(): + global debug + debug = False + +def is_debug(): + global debug + return debug + +def get_name(): + global next_name + curr_name = next_name + next_name += 1 + return (client_id << 56) + curr_name + +def to_bytes(value, dtype='I'): + return struct.pack(dtype, value) + +def from_bytes(bvalue, dtype='I'): + return struct.unpack(dtype, bvalue)[0] + +def send_command_raw(handler, msg, reply_size): + if server is not None: + server.send_request(handler, 0, msg) + return server.receive_response(reply_size) + +def send_command(handler, msg, reply_size=1, reply_type='B'): + global server + if server is not None: + return from_bytes(send_command_raw(handler, msg, reply_size), reply_type) + +def send_command_async(handler, msg): + global server + if server is not None: + server.send_request(handler, 0, msg) + +def get_epoch(): + global epoch + curr_epoch, epoch = epoch, epoch + 1 + return curr_epoch + +def connect(server_ip, server_port): + global server, client_id, debug + if not debug: + server = Server(server_ip, server_port) + server.connect() + client_id = send_command(Handlers.connection_handler, "") + atexit.register(disconnect) + +def disconnect(): + from charmnumeric.array import deletion_buffer, deletion_buffer_size + global client_id, deletion_buffer, deletion_buffer_size + if deletion_buffer_size > 0: + cmd = to_bytes(len(deletion_buffer), 'I') + deletion_buffer + cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd + send_command_async(Handlers.delete_handler, cmd) + deletion_buffer = b'' + deletion_buffer_size = b'' + cmd = to_bytes(client_id, 'B') + cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd + send_command_async(Handlers.disconnection_handler, cmd) + +def get_creation_command(arr, name, shape, buf=None): + """ + Generate array creation CCS command + """ + cmd = to_bytes(name, 'L') + cmd += to_bytes(arr.ndim, 'I') + cmd += to_bytes(buf is not None, '?') + cmd += to_bytes(arr.init_value is not None, '?') + for s in shape: + cmd += to_bytes(int(s), 'L') + if buf is not None: + cmd += buf + elif arr.init_value is not None: + cmd += to_bytes(arr.init_value, 'd') + print(cmd) + cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd + return cmd + +def get_fetch_command(arr): + """ + Generate CCS command to fetch entire array data + """ + cmd = to_bytes(arr.name, 'L') + cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd + return cmd + +def sync(): + # FIXME remove the size from the cmd + cmd = to_bytes(get_epoch(), 'i') + to_bytes(0, 'I') + send_command_raw(Handlers.sync_handler, cmd, 1) + +class Handlers(object): + connection_handler = b'aum_connect' + disconnection_handler = b'aum_disconnect' + creation_handler = b'aum_creation' + operation_handler = b'aum_operation' + fetch_handler = b'aum_fetch' + delete_handler = b'aum_delete' + sync_handler = b'aum_sync' + exit_handler = b'aum_exit' + diff --git a/build/lib/charmnumeric/linalg.py b/build/lib/charmnumeric/linalg.py new file mode 100644 index 0000000..823b7d5 --- /dev/null +++ b/build/lib/charmnumeric/linalg.py @@ -0,0 +1,22 @@ +import sys +import struct +import numpy as np +from pyccs import Server +from charmnumeric.ccs import OPCODES, get_name, send_command, Handlers +from charmnumeric.array import create_ndarray +from charmnumeric.ast import ASTNode + + +def axpy(a, x, y, multiplier=None): + operands = [a, x, y] + if multiplier is not None: + operands.append(multiplier) + operation = 'axpy_multiplier' + else: + operation = 'axpy' + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get(operation), operands) + return create_ndarray(x.ndim, x.dtype, + name=res, command_buffer=cmd_buffer) + + diff --git a/charmnumeric.egg-info/PKG-INFO b/charmnumeric.egg-info/PKG-INFO new file mode 100644 index 0000000..ec960a2 --- /dev/null +++ b/charmnumeric.egg-info/PKG-INFO @@ -0,0 +1,143 @@ +Metadata-Version: 2.4 +Name: charmnumeric +Version: 0.1.dev0 +Summary: A python library for distributed array computations +Home-page: https://github.com/UIUC-PPL/PyProject +Author: Aditya Bhosale +Author-email: adityapb1546@gmail.com +License: BSD +Classifier: Development Status :: 4 - Beta +Classifier: Intended Audience :: Developers +Classifier: Intended Audience :: Science/Research +Classifier: License :: OSI Approved :: BSD License +Classifier: Natural Language :: English +Classifier: Operating System :: MacOS :: MacOS X +Classifier: Operating System :: POSIX +Classifier: Operating System :: Unix +Classifier: Programming Language :: Python +Classifier: Programming Language :: Python :: 3 +Classifier: Topic :: Software Development :: Libraries +Classifier: Topic :: Utilities +Requires-Dist: numpy +Requires-Dist: charm4py +Provides-Extra: docs +Requires-Dist: sphinx; extra == "docs" +Provides-Extra: tests +Requires-Dist: pytest; extra == "tests" +Provides-Extra: dev +Requires-Dist: sphinx; extra == "dev" +Requires-Dist: pytest; extra == "dev" +Dynamic: author +Dynamic: author-email +Dynamic: classifier +Dynamic: description +Dynamic: home-page +Dynamic: license +Dynamic: provides-extra +Dynamic: requires-dist +Dynamic: summary + +charmnumeric +========== + +:code:`charmnumeric` is a python interface to a C++ distributed array library +implemented using Charm++ [#charm]_. +charmnumeric uses a client-server model with a client-side python +interface and a Charm++ server on the backend. The client and server +are connected using CCS [#ccs]_. +The server maintains a symbol table of distributed arrays which +are then looked up for computation when a CCS message is +received. + +:code:`charmnumeric.array` +---------------------- + +.. highlight:: python + +:code:`charmnumeric.array.ndarray`, analogous to :code:`numpy.ndarray`, is a proxy +object that wraps the name of the corresponding array on the server. +We use a lazy evaluation scheme for array computations. +The array operations incrementally build an AST which is stored in a buffer in the +:code:`ndarray` object. This AST is encoded into a CCS message when +either the data from the array is requested on the frontend or +when the size of the AST grows beyond a user configurable +threshold. +The server side Charm++ program decodes the CCS message and +rebuilds the AST which is then executed. + +The lazy evaluation scheme reduces the number of CCS messages required to +be sent from the client to the server. +It also helps in reducing the number of temporary arrays created on the +server side by accessing the reference counts of the frontend arrays in +the python runtime. For example:: + + v = ndarray(1, 10, np.float64) + b = ndarray(1, 10, np.float64) + c = ndarray(1, 10, np.float64) + w = c + for i in range(2): + y = v + b + w + z = v - y + w = 2 * (c - z) + b + w.evaluate() + +The above code snippet generates the following AST. Nodes with labels +starting with the letter :code:`a` are arrays. Nodes with an operation +label that are colored blue are operations that generate a temporary +array. Nodes with an operation label that are colored red are operations +that generate arrays that are to be stored on the server side. +The red node labels also show the name of the resulting array. +Note that the arrays :code:`y`, :code:`z` and :code:`w` for the first iteration +of the loop are considered to be temporary because they are overwritten +in the next iteration. These operations will be executed inplace +on the server side. + +.. figure:: docs/images/simple_ast.png + :alt: simple_ast + + *AST generated by the above code snippet* + +Here's another example of a conjugate gradient solver:: + + def solve(A, b): + x = ndarray(1, 1000, np.float64) + r = b - A @ x + p = r.copy() + rsold = r @ r + + for i in range(1000): + Ap = A @ p + alpha = rsold / (p @ Ap) + + x = lg.axpy(alpha, p, x) + r = lg.axpy(alpha, Ap, r, multiplier=-1.) + + rsnew = r @ r + + if np.sqrt(rsnew.get()) < 1e-8: + print("Converged in %i iterations" % (i + 1)) + break + + p = lg.axpy(rsnew / rsold, p, r) + rsold = rsnew + + return x + +This generates the following AST, + +.. figure:: docs/images/conj_ast.png + :alt: conj_ast + + *AST generated by the conjugate gradient example* + +Here the green nodes are arrays that do not exist on the server when the AST is +sent, but will be created and stored as a result of an operation in the current +AST before being referenced. + + +References +---------- + +.. [#charm] Charm++ Documentation - https://charm.readthedocs.io/en/latest/ +.. [#ccs] CCS Documentation - https://charm.readthedocs.io/en/latest/converse/manual.html?converse-client-server-interface#converse-client-server-interface + diff --git a/charmnumeric.egg-info/SOURCES.txt b/charmnumeric.egg-info/SOURCES.txt new file mode 100644 index 0000000..88cd83e --- /dev/null +++ b/charmnumeric.egg-info/SOURCES.txt @@ -0,0 +1,12 @@ +README.rst +setup.py +charmnumeric/__init__.py +charmnumeric/array.py +charmnumeric/ast.py +charmnumeric/ccs.py +charmnumeric/linalg.py +charmnumeric.egg-info/PKG-INFO +charmnumeric.egg-info/SOURCES.txt +charmnumeric.egg-info/dependency_links.txt +charmnumeric.egg-info/requires.txt +charmnumeric.egg-info/top_level.txt \ No newline at end of file diff --git a/charmnumeric.egg-info/dependency_links.txt b/charmnumeric.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/charmnumeric.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/charmnumeric.egg-info/requires.txt b/charmnumeric.egg-info/requires.txt new file mode 100644 index 0000000..33d7698 --- /dev/null +++ b/charmnumeric.egg-info/requires.txt @@ -0,0 +1,12 @@ +numpy +charm4py + +[dev] +sphinx +pytest + +[docs] +sphinx + +[tests] +pytest diff --git a/charmnumeric.egg-info/top_level.txt b/charmnumeric.egg-info/top_level.txt new file mode 100644 index 0000000..590bcaf --- /dev/null +++ b/charmnumeric.egg-info/top_level.txt @@ -0,0 +1 @@ +charmnumeric diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 47162f7..77c9e58 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -244,7 +244,7 @@ def _flush_command_buffer(self): if self.valid: return validated_arrays = {self.name : self} - cmd = self.command_buffer.get_command(validated_arrays) + cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape) reply_size = 0 for name, arr in validated_arrays.items(): reply_size += 8 + 8 * arr.ndim @@ -290,37 +290,37 @@ def copy(self): name=res, command_buffer=cmd_buffer) def sqrt(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=0.5) + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[0.5]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def cbrt(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=1/3) + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[1/3]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def pow(self, exponent): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=exponent) + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[exponent]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def log(self, base=np.e): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=base) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def log10(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=10) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[10]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def log2(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=2) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[2]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index f67a98b..e581f91 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -20,7 +20,7 @@ def get_max_depth(): class ASTNode(object): - def __init__(self, name, opcode, operands, arg=0.0): + def __init__(self, name, opcode, operands, args=[]): from charmtiles.array import ndarray # contains opcode, operands # operands are ndarrays @@ -28,45 +28,53 @@ def __init__(self, name, opcode, operands, arg=0.0): self.opcode = opcode self.operands = operands self.depth = 0 - self.arg = arg + self.args = args if self.opcode != 0: for op in self.operands: if isinstance(op, ndarray): self.depth = max(self.depth, 1 + op.command_buffer.depth) - def get_command(self, validated_arrays, save=True): + ###################################################################################################################################### + # Encoding = | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # + # | 8 | 64 | 32 | 1 | 32 | 32 | 64 | 8 | 32 | ........................ | # + # NB: If opcode is 0, the encoding is limited to ID # + # Encoding = | dim | val | # + # | 8 | 64 | # + # NB: Latter encoding for double constants # + ###################################################################################################################################### + def get_command(self, validated_arrays, ndim, shape, save=True): from charmnumeric.array import ndarray + + # Ndims and Shape setup + cmd = to_bytes(ndim, 'B') + for _shape in shape: + cmd += to_bytes(_shape, 'L') + if self.opcode == 0: - cmd = to_bytes(self.opcode, 'L') - cmd += to_bytes(False, '?') - cmd += to_bytes(self.operands[0].name, 'L') + cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'I') return cmd - cmd = to_bytes(self.opcode, 'L') + to_bytes(self.name, 'L') - cmd += to_bytes(save, '?') + to_bytes(len(self.operands), 'B') + + cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'I') + to_bytes(len(self.operands), 'B') + cmd += to_bytes(len(self.args), 'I') + for arg in self.args: + cmd += to_bytes(arg, 'd') + for op in self.operands: - # an operand can also be a double if isinstance(op, ndarray): if op.name in validated_arrays: - opcmd = to_bytes(0, 'L') - opcmd += to_bytes(False, '?') - opcmd += to_bytes(op.name, 'L') - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd + opcmd = to_bytes(op.ndim, 'B') + for _shape in op.shape: + opcmd += to_bytes(_shape, 'L') + opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - opcmd = op.command_buffer.get_command(validated_arrays, - save=save_op) + opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op) if not op.valid and save_op: validated_arrays[op.name] = op - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd - elif isinstance(op, float) or isinstance(op, int): - opcmd = to_bytes(0, 'L') - opcmd += to_bytes(True, '?') - opcmd += to_bytes(op, 'd') - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd - cmd += to_bytes(self.arg, 'd') + elif isinstance(op, float): + opcmd = to_bytes(0, 'B') + to_bytes(op, 'd') + cmd += to_bytes(len(opcmd), 'I') + cmd += opcmd return cmd def plot_graph(self, validated_arrays={}, G=None, node_map={}, diff --git a/config.cmake b/config.cmake new file mode 100644 index 0000000..973ccd8 --- /dev/null +++ b/config.cmake @@ -0,0 +1,12 @@ +set(CHARM_DIR "/home/shogo/master/Kale/charm/netlrts-linux-x86_64") +set(BASE_DIR "/home/shogo/master/Kale/LibCharmtyles") +set(EIGEN_DIR "/usr/include/eigen3") +set(CUDA_DIR "/path/to/CUDA/directory") +set(KOKKOS_DIR "/home/shogo/master/Kale/LibCharmtyles/kokkos/install") + +set(CHARMC "${CHARM_DIR}/bin/charmc") +set(CPU_OPTS "-c++-option -std=c++20 -O3 -march=native -DNDEBUG") +set(GPU_OPTS "-std=c++20 -O3 -march=native -DNDEBUG") +set(GPU_LINK_OPTS -O3 -language charm++ -L${KOKKOS_DIR}/lib64 -lkokkoscore -L${CUDA_DIR} -lcuda -lcudart) +set(LD_OPTS "") +set(INCS "-I${BASE_DIR}") diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg new file mode 100644 index 0000000000000000000000000000000000000000..8662739553bb9d0b8a2a62ec550bca57b1ee7ee5 GIT binary patch literal 22101 zcmagFbF64h(>-`>+qP}rW81cE+xOre+qP}nwrzXvGv8#s_jxnlov!I&FC^J;sBhy2 z7A)GcH|2OZc38|&iA#cUe0DqPY?hI=t7pCwT&5~5+V~86`4Lb^=#Yd=4 z2;H7AT_cRI{P@+tvW}nr9ex}4Xez<3g|9s2$v<&_Fm=6`Gh^E-syt)mWBYh406C<^ zrLj`kjDO-XIPsG^F6YNL`(v!7Gu*i@qApbe7n$ZHi5ac=Hqzn)w|?3?(Nns^eU+G_ z16a=qczgAn$I#r5c?j_!s}}cD3n_c7ymmhtXs6b`_8EdwG5f~ZrC)_E<`s0mXU?n% zx&Jk#aOKYKdD~-Od9BNHLujdEgROCaB!i-LzUO6Gm(_@q(4IipK7W?X?M-$N?`R+!qAxJ&9XjQ{v_QBvdsY5Boa?scuHhn{@ zzwRS-kdACk8Ati1a3`j8KvIElf?hu*ns1L(BjQ~`%AP#2*`hbC!<^oZV}9N#v(pAb zyr1-&L%F94+KOVdpQ1kMz}j82hQ&MzmjJG|t4VKF1pCc5b}KL31aoV7-0Rx-DnzMh zo}lS+?%sHAtSXiF9hqptyeNe_s3-2^*LZewBCAThd*G$GQ3~2&jOGirVam-Ji8V*Z z8kE*;ND%Y5fEBqG{sP?TQ0J|a1+Ku_dhz}xS>@xBMB3CKKZ+3m+wabuKkW&{Qf+rf zQ1X$K+}&u*kw<5=WT7D#=CnE>xh$qqA|&T$sFupiV zD{j;(uL42_xf^Tml2ykWwXY7t`&>>TU#zVT!k1b|^a)=S>7Eyq1S*6ZeQOU;1Q18! z=UruPf8h=bJ&C#yt8X$F1(jE;i1Gu~TN0;1KWIY`wHG*@E3k6%<}Iv{FE+0At7CTa zX4H$`Rf@h<4r5`F^4rgxrs7V5Ti%aw|a?O2)g5kFt5V_u|0wYlw$(y4FT=VX?tX^0iq>(`1uTl@6J8ZDWUrSWhQ=?9 z8c;yFM=JnY^~uHekeV!@+ye)jT{b$1GN)x!*mptp>SV}@4CIXrm;9A@T8KzRG`FY3 zcM6G5cmLfDeb*}jG|`HfYT8PU#-%?1F|yd*xooeil#O&gw|zkzBeLQ~7X6tX0nb)? zXj6#GUzmW|>DSrwyT#zAx%+o%1na^y_`Cr}%PH<2meofBJC6BI2ggM929%0qvD2xY zlB#`W(*gyh-Ug-nBpvs|o#!(+s-~6E>SZshfTNZDM%2ikp%)zz!ADITrjvG4-?R^o z=uax2(RI_Y*QuhtHM_)-WpB)}rW%zYZ3H~fXDLG8{`j9@MHV(i@11>Z@G;J}DlNyh zzFL(-Iy>0@OUzuwD3Vm|O!+zX!(#)f9zI)3M|rB*F!V=WoBYC%!9B04Em==g^=fqp zRP-(0LIAcdBS3F9!3@~u0`YWL(lyaW))I-RqbOG@QP`Me8s@fuol+m6D@dE-fGw*`su=oc#dJfocH*_)uVy$w9G}*Hn=W^4Xt7*iBJ$qC+ zUF~(SHQnNvAU*^-5zEYRycg{)y5MK{xlf3O7s6J5N(Kj|>-e8N8cPD|2dq~?`b?ogJNJoHGYjvYj{q@t zjK4C_;fpu-5)5(ds;$?C*67>Qe*fqpEir*;^wE>^EVx#ux9vfNc-TSz^23q;mZLTq z%u##LWXob%qgp(+3$k~;^D(d+h<6%79YkpEYH_5s3Z4gQ;#SpWe5{>j?^Z+=#i zQ&IdSqD1HH?(8!q2-i=55VGfUqv#1%U>-fkW}}TPBt|k%2^SQS!=L*x&i<>WUw_w%OG^Bjeh+G&aSj zg8}}kX=hyUHUm8p5oi~}y#C#ncVANSRyszXc8`++p8mGy82I0)6TnuOfd8WX3mO3T zKT#W-*qhiIo7ft8=viCXS~>j#{{ZN}d}efx?PK~o>(^gH{mpoyRzRlKd?m#R!?s9hCZ-3fRwybd#V9F(z_+m>dxdKAIEpyrYWn$M++l*Jv|Fs3uiq&I(v_(Sb8a1s_Ds@s93dH z6?yhq=0zz9TBVc!L;!6iuC*+?aNq<10Km`oKPUCC^$Z*x4LtrXwxszJyU~pL^Ysli z9JZ_|>oC1ygAqujK3i<=Uxd4CjS+_jqnU3*i&XT-`8u=d#|ytFypTk5>}fiUc6S=- zt=o6|fSngrMTt4!H6{o3{TYl|bGFH+c%yuca1zmm2D0x4L!&A7kH^qClnM4ERW5nu zsA(#+F~qVk4V8Dts01+^r03m22L5>_h3s1f-ja%R?h(!H!HgYCzJPr zjl}GX8ALT4>M?gZDMbY(X_|Nit3d9BB4cG7Qv{894CX*b z__0V*uJ$EXt-qc9uX%h+sF!&%rwz|i(DLEewEF{K2sm*`?)ybQVtJ2pbuTqJu@^>F zoak`cg3}-e;SecO8DVUOzEtDVN|k!6S>^NWK=!BR$+PU<2$O<}QGsNIIAI{Q(G0_uEkPJ8(6fU3aa+mM!j~C< zZLm2l;&86QQwNhZM72v$U?840t`vt$#=~sI2*pjnC=NsRqZfZxU7s+lhXJCzPLW(1 zfFU|y`6@x_DbQ0*Ni0V!n3>CPiAUoI1tAfYb@<_Xv5}%^2T8BNWY$XGn>JD}>MYeS z87NC%&&LC#C2Q3h1oJ|<5f$(uDm;t_+Vm2M0!mRh8GOS8YvJAL z*>=b9=rtkjQqgsZg=2=zfFGM|W=UZ~a{&81IJgS|4Ge%48wy2LRRZadAD3ws8$y>_ zxR)^XMsVYQd4Hg_KpUXIFIX0x^nE$l(K9HENKK#Qt44VU=&oXn;DeAXCh!XbeagSf$ zs=@y`+bOxj$k|o^df0r)C2>9tSc`7kyM4abWX`gpqf<}0*bI?$5bPFq*xWNHgk;Q$ zHM|;~b*uS)cqi@mh$$qIqqdIpj^*HhwW{5k?IP{~DO*~8ke8KVAohUHAv!RLM{E-B zKY;PvExZ$lpS&F}8#^B^nYS5i4r%85Nq$bnoncRvHHa1J#c6YkBTB^W>`@A6Gv)wh z*pI=4dM*Rh#~KLEC6wRRb7M;R^JKm`XY^Q@KmpJh(ikd3aB+KgD|s7%rsv*FwYFVo zVttM!C-k6`qpqRsUdb&&MypAqIFi&kRHUcBF4#Ob&TMK>#=HJY!xGCN(2Q z9`VoXjF{JLq9z>ZX-Y}>2?qh(Ed7A{@}<80w`Zh%%>Lf&Cu_8+;0`jG5O|pvoD1|3 zqv0u_yv;<=(ZYe!C^>)1aNlk0a*$GSezYDMP|fx==(o30myV0!7Si}HoN6eDTIVLyEH#Eg@-C-eAYrOWi2CM#d` zj%(wQPgz%O-@%x-EGU(CkzM{4WfYYUs@sj`!-(?0UzH70B)+xIKO0#m{>v}=2 zL}2_V%8Ld1iqt49#x-e{Vc0Y1KDs^qW0GtXQ4pHE16?1 zh?tY=x~2IQ(j8CuWGymGU`101mAhi$D(7A9eB(zFT;ub5@}Xb7qSxj4y%`QS-T|`L zakbzeL8%27)D|#=;m;}lcuA*!ZGs;=u+SzVY-k{1jqPWn?x3@&l2>B)^Qlt6_>r*) zF-A*brwNII$ii9WPvQx)%X=mmNP{bQQyY2X5@k0P&aGy?fG-#ehhdYnZ^@E5j$g>& z;#aY>;b3?#7MWb>nA3NFMQQHV{|+O<@kYzP90a;>Ry5wSThVSd|Me*?(HWn>W#XWp zecK%ymDxS$)3RUu>`vCxj2*Q@{%+zlT{Su2&!rK&U(H*=cK2$@6ldwC*ys%jn1sq#gp^n0Wq7LByS?{$V|}i`?11E45*TAgvxPa zF;U(@UA!`xc%-Ou1uTX1_}uyz9s)+(Hn?axC!qXs3OuhZkoqLO96&_H48Y)0-2uSB zTb=jA&+efuVy8~qC?;9)i#1Uq(r_>UuY7Y+XAQ$3I6%8W1g|FXdue+jKY7@E&45C8xiWB`D_55T`!ij(s{97SEr zZi5ZQcco?*3nf`W#{RiB3}AiMHp_~@9*pBu3IQTfoO|6^G52)vy7s60S7f4aM7J?D zn>2jHq|=dGwQF5fYh3tM^Ag}5Rvi_^Bw`Z1_GkA>iI-RVP(4)}O%zQv+__AAg(8?$ zUkgP_Smg5P?Xkt!T5+Db4I|=fk$y(%)xx=ez&3*&NOg906um_RytmmCuR$H?PASDb z$BfpLThF)i%fpCDFxte7#fRWM3%9r?xM1zjN8+p`Uml3g%&z||Yk8TpmXLTcJ7A!h z(mhN{De37zHxJ`Jw%BK4W3D5)`V9lbJ<#V3JY8#!rQ+7!SL@@CcV5&<_KujSmzh_ zOGNboRS_{YxZ!br0nslf$lsa-tn|O~6*8lzEWx5)CwW7)1_(cGVxyXjuCWo$2T9xCw?wlvN?~#FUT)`3BMQ?g7Gas(dDvQ=^yNdG(WrNt3 zsbXQGAYYj2paV%sEr4BvEkC6byy^2P0`5qUw z6J^yPm@`%>!rc7B3!;glA$CkiU0U@L+4DQ7J;OEl{YcBpM2VR9(~(?Djfi0T=c@ z(!Rre`I~A|&jYxtRP=L5`Ds?pUhSj#s8{lJ-R(3aFbTY` z#q6}2xq5{4-jDd1j*-o_<~?Drmb&1wdVJ5x%m`e(K<1j&GWkob((aOkuExFU0op^ag5BZfq_6kmjiEYO&@V7yZ;d<{D{O*j|h8O3U{Bol?BJR1TN?@ zn3a`jvMTnJ;9+7(;Qx-E<|lFTd+!UR#)7Qt6bdUnHh;+oSMP}&pX8y-Eu>Ffm3Fo9g` z{z6%xqn(5EJOc2dvIh%9f*EsO6w zf0bf!wliJwg1$DfU$dUo=xP)^%qR5iU(aQ>cPe{0fLvNQy`FugmP zX$y@-BxC$_N>W#U0!oC z7X4(Ut$DTJ&X<2XHaze(%uH&Uh}dS?ePzXoB1T7S5a`qtYDVN5V@twX>);Xgwon2IRCDwSP$sf~P%m_=G{BtP(F9hd zz74(V?eE)rF!-DP1A8i2VnpAF9p+EpfSnQ|sGk0t758tvo|9LZd&8(hW|AY`gqZ?? zM*ibrjWjM5eKauCZb2PrS*4c)`i9?<;_SF+d&%=HX#dzG1*dOZmgaXElsFd$84cVbT%wG&JmUL85641 zk?40WKUPxrhLC!E{S$-U(DCos-+*dZI*hBvpm>D931FO)(vn7&I8UUs{V`_yjo3zu zLU;Q@nf%yM2g|XsSNHDSF8X5FX~_r?Asc86&@}^8DdPz_*$F~qEySvQ*$I8CUU?2> zpOg4UrHeXh+oqo*hjKU5fx3ed*(OLmqT9ch#S8U`;@4Dyt0SGUwVvRWyHV3_A*iUG zuifEFU~#%GgS)U^J3uZpW076CY!fedx_$?MEP~BWqO~LdD+CBLM zldNDNvEOqJm4}x<8fu5Zg2~e4P*>#c1)7X}p+fs#BCJ224E2UWGk4AYeZ#2JU5EzS@Wm@T67ZeyY7c zy~@%u*q7Hu=s1r7$24GlY~U__-Z5JaGk^1#jyBGh6kBoIca8<|q(3Uc2AqWvM^i%> zof`?l%f*3WHAdZ|J#B8L%)!IOwSN{W znnqmV&7LhR=-GzhH6KbBW4r|LnjBE2E9-7`*BFvc!%ff;0ygVnL_YB9HYZD|Sae*7 z>kWbF_1talZfs)TqHO$Cy0iWJ>m;Q#;RqYS7B-MPJ%ujIk{tifSRwFsR!I-0l4JpU zRy@ZLpp2qLRcVvx+b6vzj(2m1(Nu+4vW9lVC)#=kAl8zq=2fL9>g8)Ch%PPtY5vO@ zD^krVA+xffZIZ6tSF;D>Mul_60tLcC)EVLyA~G{f;C~8)|42$GYk2RZe|0t+p#Raa z{zb0-YHSA9W`84+P5hK)Fat`+3*XSzZ=qX2C^|{4Q%lp{Ysq+)wr0SG0U4U(E9{>u z=X!)E-;={bVGA=#|5!ey39DS);HQblb&*}X=n2hwENztj0j8>W-?-y|*W*Ium_lI> zeY%yBi3h2vUFcs%t)gIfzpaCp(y_;OS$Vlh{6<2N?^>}Sh45+_9pAlTpAAg#FIpOs zQccnZ(O!2Ar!iq*h=8};1lwc@Rm`kv-A4&%FX9vt;88p2Pe)kfuLJ?9SPg)q`!(Da zlQ-L(8@*4WdZgu-3t?rAa!pHQkwdb2o_Ow=g*nKyD<;kS@=PAqT5QJEs>EiH$CmFW zNJiO50&OCqhFBhrZK0;a=EaMYP>W#Z2siS-^o)`%*X`SuNM)PIspoY*f&Mqn|8p}M zK;qPr`j@TN|24+{X03X9_8vwCM&>4ZdjG528QFU{o7>saGBYy$J#r)Nsfmdp01&_o zd=qhivk@3^e?&>YV*sQDOht@*3@S|4wDMKhN0=u?#feWAOgo2>k(44dRI}rgp^+Nn zR~W_b36?SzW^xgJO45c_VR~}rnnt0P{(hS6Nnw6gULAf;;yMcU8W#2@3T6#{K0app z1`9YpNKd&9U5J_cqcrtMv1yF)rdKu5rTyIF9>8TRs50QHpx z0HFInx%oeG;(z&grEO)6t>&7&Hz|%H5`jWFPV|(o+f1RAe4u7L63g-DK+JH>in{|g zD_H6*hGN{CVnD&#;>^|+QUA!gIewjn#%@z{8_Wt)!2opC)|jQV>ogy`(OfW8aww~` zGzv^aDc65Rw$A*!@nv%lCT#z7&pI7+zo?3t`J{V>^JUZSn)h%gB{7xavrXac=Ig^v zaj{1s9X_LN`-hP%g+iE+OW5Ox`l@JbY1d>l>8U>5`BykJx;u1#$hHtxc3K>x_8~1) zMrW~oT?>cxbNT@3_8c(VxC3)+vX(w zFOiCzEX&llzKj^ZOGP66C*VcBK1hi-ML!L6se(HAbT6;e9x&mJu=YbzmEAvuPwuLN zN=S?tLb29C(S&r)TZVB(-<R$ z!Up#Qb@vUwe7y}M>?D%_pJ1Mz?gGP-p@>K6A-)ad0H*^34f1f*h&lX~0W`uQgkuMx zZkJ^Ea%`?Jp+e=o;Gu>Gr}FfTPrWzZVzRokHa6Y_a=ZTCu?G|XGSG#1ZcZp?hle`| z71ZpcUr3k#UD2@LgR$8uO^ZcqbUD~ZIdXaCJdiAh*=~L@0e+2=p~pwFDnQUEN&p%u zkUfDs>K`!vp-)#aK|6b3s9@?iVQ5@`AJ;lcaNQY(N)>Pfl27Joo`)6+oaF$m5~tJ5 zMibFzkO#*skXc9O(K^&zhnwOWdz0;b{GMiIcHF8p#2&RUt;u8G@IfcyWb3iI-c7mH z^&^87I;F&b+XW{4FzL_eM-iU<^MpzOoGprDc*{a1KvNvzZm6J3oTe_qP}WIiJlKeV zYG5f!CM_k;SX?w3T?U$@Zz)ognoI~?%w{~a7&Fd5E5;^`I~rpFN;Tq)>!`08L8ZT+ z?~>XsPg<-p8e#GG%0)A%oS{l&DRnsyOM0smZNXDH%mUcaXg&O#(MAY1m90QjdZmC& znmyN4T5Hx*n%5k-xT+L%;kP=HUmSe|AC^Zg$htiBbOWZ%U?Z+gzcaMuz$0QK=0a5| z?t+aubn!-=kw-8^TZbC8h6`EuP$1a8db?Ri?RQiISvZWH*>#W9L7K!{{jkIV`4Z2N z(2?wlF=i1O@6^7ha!^detHcx9ozQ1tdXu@r8<@fooce>6vbN=9meO!?itUsHy}u&*2zL*zG6h@3mwL^L+fXFX<3{v4ansE}<*CB;j_QQ*My%5g zWt0;z_JftGqaK4OOn8?*%0|7-`3tI(4wv>pw1PtmdgP#M2)(VsNRvv5ay*gVyHwm9 zQr4yXwK8U^{lxKSI=Jh3qbXO^3XMTVtCa<Tb-FSeVf0)<8!`0pj*l&dpHigim!&?5GPkF{0&B#$;JHwlJu|-rq3|ONc8}me zhYZ~O0iXdbW364p^7FabSU50Hh4I?LJRH)=qMTn6N@?VF&OKn(}Rr=G?>-W$8buOcJ|3`YEF6)l^^CpP@ksxQ3R z%nHmY>Q0E2z@aULFy;?)9Hs0Y^OA%5$`^r+l7A8wg4B;v0I~r!6juWQ(TYA+PPF8U z=?(=`mI0$hLHrT~WUJ!%I-2SuC2v?aIQ%M~1LFa~LSkeK2vkZQg#y~kjxM(S;L@tt*@Ltjg;uq_8jBvA9M?`0jjDx&x<%hqVtpx?KH2%ISiyB$y-EEN_QMtPdsE!OpG*Svi& znW9W(K0A%)aiQwQwR3^-W)rv!-^lmqXnd^rBHId2X530_Aq@_V8+qu-!VLD-*QBnb zV$0|fTLizxE<=btaG|fQ{-xXD;w@vQr-!%cMc8m=Me#x|rVd{do4?xD^MT?}dV?DI zxwx_7+H&KlnSEn~`=-@$gbSTov!r<@x#z-rgmcB^;&iIu#%)?H6T=9t3f9hrhUHp< z?KMH<(AfXPUH*ZJ?Qs3TdL z=W>~|DXqL_J!WT>krI@vRhJRXZhadWO)dUL+jcEs4Y%W8t^yPwoUrmBoC?5i)lo=n z;TqnFLoDqAxO3xJ41qE`a#2*WLSX#{VPyv61I|*93poOEJedF!QO1Ja;p9$&%{Bo_ zmuggce-48*qPIp0lq|pa;PfhL@pfogdUpg;o@-e$1f-ibUy6S<}tIwMf1mnRrq&m zBm7K3tjcJO>G-3QBDNla?^roMg=@_}V3eVNwHq*$O|EM_m2je7&0MEV2~ z-K3$N2p=Tnc}P7ZeuX-%b4KyqUdn1+r9>YLM^wXCUXM**gw^0?oA8^jcbiwpE;<)l zcOV_>L{-1HVC`_ZeQB+9vvyE78Qj=Bv+=tT9&xdafA`i0b^*Zwd5JtifH!m6oPHnr z-I-~w&=7kQtk6IL3yLx)F;3KFM-4z46>Szjt4R1@e)x~GN}#4<3NHoUMTMQqCp(a9 zIOR4R+Xo;G6m9s)rFHy+Oe1EIBwoq z_i=iu`eJ`6!SmwUYV~zJR0?f&op(p2JTH7;O*a#9@C+U`Out5x9oU66W&pbU7=$qf zph`qI{I<#so`j@E9bX>Qizbg&fiGcgf~@5v;(AJl`ZNR zZkjL0(fQl2GN5*-Gc;|CJ}WJi5pHb{VQ+8orF-cKS3Td(-50oi0IQMh31gWBH>DgL zgFhe7^664Xhxaq8?L6RwzX(s9m)1%F-a@PbPydtiN#1MXeQ60}0N~ zl)T>g8NeviGkoa*v&&b71s=P#&Ca>ERgf2A6(dh=0p8;!^03Pb@PxiBY_-QnbVMzS zUxCo=yY~U-N#LDQd*YMu%KQYL;X8eNF22eLDS_8`;Qg!K`PT@n zM|;`KxvA#1{lew%L+h5bUHN@dT>u`N1w7@^&YWKX2aA!|Bn@)KII&;Lv$qxeCM4}H zDkW%mn)MH|7(In}=JWPe4wjBmnkAwfKLWgqvQ<7Nu&5v2`3!>vyHaf#3slHF<4oy_ zLn+1G;&+6=a~_N$U(>;(=E0+`&?$F10;Zc=J@q_OtC#gav?~||<6qj#fF(Gd*}w#D+y8_`=rz}>{)g+PN75C{w`h8K#4 z;+uQ=G?(aJAN&RQf-x{`YMiKz>Kng-VWu32ThI&I!$5?cTt9w;&`j# zqJi)q;%_w-)zGYr2P{=u13HLnc(GdZZqf24f8>p8npW@MtD`!|PSsvyZq>l);H7b2 z6fu+KtL+#Tr?KV{j*ogoI1Dly4 zKsf~XRHY9&Q9g^h9-I8)j>oD@g*^%W<6s*6D5+$+HUCYpgl+D~xl~ssSh>dLXj!c| zQ5ui}-1KG?!MtYAKlmmS{)`^+#C26xuUxIJ+d6{|br5Rj6bCy|sV@MFG3ooa&1z~) zF+N$PX?ND#pb2ZGPM@0k7<0H=C}!^!#9A^xR@^ymwQWSSzkzAbC@8P=6dK~Y&lhQe zNURDap@zt~TcbA~0dy@OzBlCH9JSmkw8Oj}9%lt2ghwQrUIGBw_7Xjd7qIna;A2rdiSuWWW&EprHJwchebPn3X)Cae)xtK5-A;1Cfz6_UR_ChQ^YA{Nl1*VL znL)K!$_BdVX>9&IWWL$hLd)=>Tlwr;f$XXjgS99t8`R8eel6RP&k>$G={I_8wCiuy z;we<*C*<$3;S1UoO}el@bmj;9a~&|#cQwiPA{kcQV^;hmQe;RQ;dl)Ua+14Z0 z)LgsXR#6h_ZPGb^@R(QL)P^^HmpzjwG?VYNklV-|&>XK*pP-++l3ut@*<`Ta$gM2L z;>oeIkxpKVgk6j`3+s55TS|=wa2rvSVHYf*UXTwN4;3?l39(Kd9OEZ{rw>rN{Z_s@ z4oSBeOSdUY$89|!bvY0*x>T=8hka=(ypEas^c)*P>JorOZFixbmB~mqOT}uflBK$o ze(DQ?NuECKkiNR;F73C`38(F;HVg1X<6@xx71m0p_PdaNMPh;@S!sgf9eM%)_7VuH zug;L5KW3F+$4Z2z=^62yvkH@TaV$oO-ZK5e<}f$fBUh9>R?#ZUwu(fU>DtI9S_r#92a)BFYIh5Ffd_xh*1jo%RoGgI6-Bhp?| zgT2xNzmw0B*RF1^ea+m;U3P@5%<7w%O>QJpJFY2;7T``uZ(7mGTxg#GR96NA?*2F+ zOP{XCuGpk9(luU6HD7j2P3CYUaeN7#2iy5>u`_PhbJJbn?r>{xHav-rIFDGbbcpS6 zrmwg1oA`P6`7&7Wr`F?Z*5b>p$CArt>T>MKWZxEFnjsxCQ-e4NLfo8p8>nv!^qrG= z`eu&9-Z0%Gw1=WcUcyE8Yk<$wP9mp;j~e~%)}{615ANNDYr zX5pZ&DFhiX3rbCa5q*VbKybksM>-93XgzY^$V52nP~YH*s0ObAHJcc%*1Iw)xD1nQ zof~NElRPdpH3NprcWPFV2t{llQiM>cC{kOOxRPSMOf@poljM9pveReTvg}&(toSnZ zvV2jpayO7K;+ON8n->ek6#WvNNT~o+EdZ?6G6X9)VakwpdzM@99|fYe5H>WThcZ+p zynw-X*CPa;&jTZx(;L@ijeDLRllS1p^%;tY3t zH;0S2$X@y2Zjq5lD-?OV#!IjhK9G4Mji$Wz)ohMLbt3{1_b zI$wTc4Csk3+e$t@lSPOSwd6%k8P4AU{h|0o12B=>E;^JqpW`1akzWYl@CGRKaBsgH zBfT57R?#Ehhq5+8w&dRd;$#WP{ds^7JqUOO-u)keIiW$imB>0FNWhFCMm-4ZYe@$K zJ_+Me0^;c1I)M(o1n7V~U`O!*-;4~rzkbfY8g;#q$Nd=4{(hdj5!*37W$>Wk}W+g*vW&bl`pwkcS>l#%6xU`v-HWthIsNra8p&NIXOS z2=+DG4u9>KnE(9|hsj4+gD>A2U!myrTd&lnhY{m+;L)y>=F3j#CY{1-XaWH!E_eI} zoxypGw;{mwXV^Kspl+AKk+A9;|KAcijknpk91H+}+uyccn*XDO{EC@&oHW>m02tu3DX2gndrtCh(r-}nYK+8hqb9gbK$TQcF zhH}IboixwZwwqW^dt)nL8XkXX?w`yoFQ7o z<#Mrlcu}*_avB7xq!oG*QhR~F#Ptci1tW|ccw#&t(imlXk<+}0_9O`T=fauES*!iY zRjwg3rhh;1WBJvff8L?VmnohDJYQSDXvsSiXQHTRd-$oPV04=w(}3PLnH(lKEHySc zcjMgfd`{FuU#VZJ34~7>*bZStjUrf_GhCTy+-x#CpW9ZMC-;X#gQTg43F}t`5K#4R zBM_GpDd4wi6%r|gK82p8mL|$-lXO+;ErJ;a*hfEv`b3Khv4o52k)Qyv%^XfHq71Yz z}ifUY5z z+h~QTERvu^Vdc#OX33Vi3Yj5LQROW_u`MbDPCH)=P9c|t+PO3d9d#egDxPwRVhrDs z)R|OB$rNYcZ)mn)rO&7E=e#-_PSv|wG?>NRU~XPw9_`gwu`JjVa^9xI|4`25+aN7) z7mjvc5C;~A3`gbv9?-qpnYi#&q|FGuY7*$)J8|>L_2)4|jx2&cn<|=X zI;}yx8hn|^(+DyP0CQU{QgCX*D30w@_YMr}ODEuJj7z4dd!wNsQEPM9n%7W42>ouZ zPw!5N$X9eQEGO@5;^Fyj!45tamXkgQ8_z8UE|zNsCN?KSEmp@FXpcspL}b$Iw?TZr zZH75*un?#Ujh0iK?^=J*d7L;{8bg$STN@pLYsIusq zK&sK4MmSfnn`=~tu$Z+{Uw8_##9%O0dWIo9fo_BFvm?w^JWE8KO{(^dxL&wve5^33 z@-hN#c|0awX&{9OC-ek?7l_9snOZLpC|tfjhlFvIzL8FjXs==&iCwtnXjC8HN&)a;?6IK^vZUpOWmKy`-XT{CZ&z3jgEvbhO~1H zES+<|bIT29+!LLV9$RnsGyLl4i^pQCa5q?9%Uzule2*R%olcpW9|t1NM<10hRhKPK zpBGUVco@Y5x388jKW4rV1O0+_Qg)XS3XXGj+^fz4p9P0ZHyphiY(!aDAehpNjlSD< zC-?iy*d`aZ*srJ6a?Mk|cHOOd!>@eHrpTIW&$M#<7ds70C9`WvcA9GNDyx+4W}g*L z$BOl|X8(l>tv`}|+72aLTOi$K76(%{_rgZ&DPivG227}iRsf_Fj0;i69Xeppn)~v%y`5`SFL(E9wLNiiFJ1a#IX+(_PfjB> zfZU^0#iTqfcT#1z`4q;+EfajmgdESsJ%0#w~Je{d)6}WQcD_%vnY;AfnYL+6showx&4( zmCgf}&I6TJSPorjSWYTSU6?9zA_xJH>Jz$6y8gDE0)-CgciPuHbb#Yjk36j+b@skX z**RJ4RdlTR0VHk&5S8D0MPA9)5}ADm=ajC^jYS4tr45R`HR9XX;hx0Uk31`9>cBKF z=(LPbuT<8D9tQm-$ahZB5E_0VNKI^$KLtDloMjyp9L6kC6!FgkhKQa3-&R6oHudf@F{Y3*Te}FJc4ts! zn4-Osv8i_pDo1rrcF~dTZ9)k*-*#l#>2m+rAMZs!QJdj!G{GT9R<4gMvFu(+3l3c# zw4a{|qUhp%lNHrVu>~96TQcab3tPbtfec_G?XW#ZKF2vPrx4J=_L5K0uTjU-zzX<% zHFf{0iIvS2lKjj61gppLyZ+5L&hAVqTdHed10L%7`p|xSWs=VF)vtDgqutOa27#^khn=P0z6*&^ zo3LAORWOxJeAWDIy6ou~L8el6ovhAI6dqidQutoxDCgL!bYIf@q17dx3sp|+Gqf=f z#pA(uDJNK!-#~c3Ue4y`(h-IBhQA!q91c>ps2&-ieIy>-g{wx8BWIZfS(d$cZf4wr zG@zmYxSLsFRzn z!ye@h4NO#2fizE=qa@wgBc}z+!~3(0TQ3lwyEK~dbr_^ZC-SI~V5D7zPi7&mixwMB zim1;Hj`O8yKB|?8Ypw*)`4OJog3Wm6I7hA5y!Gik;UC@%X~w4&+sC>?2zv>A5|jBb z2YhleO}(|32Mrtiigaf@FW@gplD~v;x#6bRA`V{RQ=h;q_aU2qAXOhgF5b{x14m@7 zD;R>#LXX=bd#h%eKy>dCo~dHgNQ-@k<>@27`Tt#*Ov{|5nf@(I4*vfNlYiP)|En!_&KwhuE(r=;|e7)nAyLOMmdhDIcYkdTrNM>?eA1z})d=o(sT zknWc5kd_byk^1m`zjNU5`JVMX`;S@skNaBl?6voL)_(5&yN;)k)n>v?t5NBUs3CM` zOcJ_8x|sE6A$Z`5x$0eeqfTc!{A%L0Y1>q$;uy6Ad3kfR5l;hn&XzV_wg$r&vDFwP zXiL%1A7QUYnqL{akZFFZ2KBVqjd~>?HktH%Y8@(c=r)l2Y9O`N@8CoP6DrPsX7fW- zTB=+RcGBJsWrs*=|!FZ}ab|Xw}k(y9}A=%A%Srn+t`Hg6g141F+TCYgY zwa4}B?A*1xc~!&3g`^eSp3VP>$$>`%l%- z(E=d^v~SWwc;b;iQHslhxCn5w%^b`9LfSrkLb|gj(=G_lJ2QI2y_%f9Z(VFS1Te*J z21uC=pAYBNy<*P-v2=H-8DbrpY;cnYP{4Thz{7`OJ@5C1vOYG&i0zHo-y33xK&Vs( zpG;u%Oy&sUmwTR)fJ)O^26N^0Nc}eew380BLOGP0sc%f3J;LtK_w2NP*Th^S5iM2X z2Cra2QBY;n6cCIpsNYg#hz*l71SKdo6l|k2GHT;`+}Tni7)knR(At2SOXL8-`YdaO zT0?M76*-$R=v&7(T#_2@gqlq@9e#^J@i^}akO0P;{JdV!pkWXA>wF|IUs|AD1CzF! z51ej~&Hqqtr#K=w5EA@$Q+Yb~;jWe@AC0p0h%TciUQZ-djJrO-J)6fYtS6=BUkbT$ zt9g&XtQT|pw=lDS8#{Js5OQ$R9cf8_Iyl4iz3{z<)y8Zy6i4ND_w$j#)cI=p~+o2 zG1$W01KPF3yLASJNTALN@6@H1{XlPjZH05v)#r~+>lG4nqaO#nU#!o{&^cH=6RX^j zaEjaI?6wH*b(sj&nFFPlr^c64KkC-jf0V|@n%JDoy~sM!yT4r6`5I5*+uaVoq!@x^ z<)AX^qFDPP6T84Ueabt8yc%}viO3-iJU%h|X;D8Hm|t1`kjvVzJe`BbGh_q1{`l0e zHDu|FPO;4o*B4#7;p`hnjirZ));2E;8!g7@Uouf-{~_6Kq7_L=RXjsnJ8BVP^v5ut z`TE14A}qhzGgeGU9Q!Pm=La*4N*;bn8uX4pN;^*srjf3o?o(vbC!C>urc6LN-W}!wd2Hb;0ls1T<8v8RuJy+E0H8&u_LCtv>N=n@G z8uwog8cC;K#n0EMW5;a#^_T?6eeNVP3rFl!{QRM84<{=2>air5C8Z|B^=sx&Mk;os?Mk_8%NPd@+f!-$ZR9UZ-RYGc!*Llt%G_dF}&}A*ZJe8%@3C$oDWr>6HncD zRaVp#i6am)01YExw={sjApTKuIzdTMpr;YVWVIbiZ3(&ZJ%bQfE0Yk8-)nqskxQc| z$1P*2;b&u2Uy}VsMZaHkOPxbcn%`b7>Lh)=xUi0S#_Q(CGhw{%b_QE7ypTQUbq*hH zfsT4yZa($*xD>LY7ns}gnD%B_@V0_omd$F~PV>;P*xMMy3Wy(SEeI!ulZn_(`3@|I ziZPRkd@+vj19E=zW972v+IbRloa(L>cbp1g8p-imQQan3-zMP7Q-!aT3O&5PKOJF3 zl;l4c&&N9TlD{u8$EruIXTLn_Q5f2o=HopgLUXMzD1aI@^SXe&#WA`&*mFHF{wbE zK6E;b_`ohT=bXfR%9{t(wiMwt9X4{b-0m9(9=pynFkkds*80(Ux~zRxnM{oJ@;oiZ zd4>2A_T2C2EpnnLLAH8yliw3uYS)m}b4oCC&-8OKS!4j7gVsF34?h`OsD4=~&l?8o zaJv=H4K3!U-Rt=^$Y||doB3s*9aTecp?2@TcMw-4%P{did>z4T_cu!TYyR_O<#l?>?wVx*xF!W&DJh3u2EBcTY#$O7LiLoxj^?21rA z3axZHavC#obD|o$r5|ddYvgBr+Tg6Nq8e=Wr!;T8CGZdTD3=gxSi!_uj5LTMb3zQQ^Jgpj{$oJA0VvN(s{*=kl3i-UTR+v^JG2Ra>NswERZ7o&%HDTK(+?;RdKxS@S>;c~X!Ro{ z=+(Zv!y?M@gkr6ZaWV0?`{?*%>30IH-W@DN6Zjfu1(gnuua|Rev()+MS(ddVz225_ zwWR5d-8d&#b?aXSw|$r@Np1dIw}xw9Vl%(ER!@d&ztpuXnP$-Nl_k?zc5ltlTyNEO zd-p^Wy}rrIr0d1PmI3_9>FjnoX(CN(v0Yac#m;zj?I`-pu#mVpA~=#g#m-ArtYCZ! z%v?~=jpdxXzG*s>b$qFmSeEYQ+s{eCkW=z9qh28{b;%KRRJ)V5xz2 z2`O@uC2|-GV_$qJ_a@H4q3X^GO`%G}agm_xY}FRw!Kpji#&bFTV)xV!KG`GTUOMmD z9A9M5VZk-p<`&$|8|;2|G!ZsjRQ!w#xr1>MbCD~J^j$r>vdrC-(Y+iijQVhuW4U`3 zujBfsM#}%@T5i3v*lElulBkn3xgp&Q-Nw8TdzBhYLqMX?5JvVG+N(s@8kowmWV^)K zGFZ5x>}~}@l0bprBvI;xC}`4uD1BJ*MVk{S%yUb5h&jE^ZI%>B&?gk^ynB-7?CXql zb=`W?-Iuh0ql>agAV??GyDM(Ch3D|*uF(rxJ*$>B?3}KmMFm}caF=st;fpb?GK-gj z;2ebg)-yvVVTWlYX5^0RazwHTa*Krvmi#UQcK*5XD>}S~oqmvzo3TJ;P)SMLQ$(?f z9vcT5NT`g_<&5o$X4dPR$KPHVF8{HIhhI`V1RH8bXe!H|DVFM!QYzS$sA8LqP+c`R zm#+?!#5IGljA!e54^t_EF)!kDXRTzX+7O&xZy7#)RUcdc5F79iTIZSgi-jAA9&Nh^HPa3QZ| z`pH6583or!Fh`Bx^3wp#I2PYVfN23B#c1wfjZRT^9`~45eN%qYd;N*+9Vb`NWr@+N zqj^J>^Q}fi{PeKb^SPHwJ1)n?P%i^RV?%>8D_bLqIm5{<+E*e|n}SW#O=2JTf0SId z4xPf*#vuJ(81za}djOWGJ!fCMe+ks(k;?!;Z=Neq~*yxwO>CaIK(Y}%m{YChl%;`4jw#(@n z6@L?T)9iE`aof~%jX1f9_?x%sHsj`ueyN)N9F-6etQ#}>Q`~g3{CD1_>vH?6^1nsy zZdjadQhsM^x~3p+Q~q=}-9-FOrErZfzY=)-Ex>=W%r^nQ z%Nt$;nyGF9{z@O-Wc)5#cg>)@y50UZi~mp1?l%5*r0p6Hp#2B@Zv$?(*|&TB*KC=` z|HS^ko&Ve9+naUQ#U=8u~B5y{n`6YMqW% H|8@31p1vN+ literal 0 HcmV?d00001 diff --git a/examples/graph.py b/examples/graph.py index c414995..e21b971 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -5,18 +5,23 @@ import numpy as np #enable_debug() -set_max_depth(100) +set_max_depth(1) def f(): - v = ndarray(1, 10, np.float64) + v = ndarray(1, 10, np.float64, init_value=20) b = ndarray(1, 10, np.float64, init_value=10) - c = ndarray(1, 10, np.float64) - w = c - for i in range(5): - y = v + b + w - z = v - y - w = 2 * (c - z) + b - w.evaluate() + c = ndarray(1, 10, np.float64, init_value=30) + g = v + c + k = g + b + # k = g + 2 * c - 3 * v + l = k.get() + print(l) + # w = c + # for i in range(5): + # y = v + b + w + # z = v - y + # w = 2 * (c - z) + b + # w.evaluate() if __name__ == '__main__': diff --git a/examples/run.sh b/examples/run.sh new file mode 100755 index 0000000..b887cdb --- /dev/null +++ b/examples/run.sh @@ -0,0 +1,4 @@ +cd .. +python setup.py install +cd examples +python graph.py diff --git a/playground/ha.py b/playground/ha.py new file mode 100644 index 0000000..550d8a2 --- /dev/null +++ b/playground/ha.py @@ -0,0 +1,12 @@ +def bazuka(kay): + for i in kay: + print(i) + +# a = 3 +# bazuka(a) + +b = [3] +bazuka(b) + +# c = (3) +# bazuka(c) \ No newline at end of file diff --git a/src/.gitignore b/src/.gitignore new file mode 100644 index 0000000..378eac2 --- /dev/null +++ b/src/.gitignore @@ -0,0 +1 @@ +build diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt new file mode 100644 index 0000000..67d384a --- /dev/null +++ b/src/CMakeLists.txt @@ -0,0 +1,51 @@ +cmake_minimum_required(VERSION 3.16) +project(charmTyles) + +include(${CMAKE_SOURCE_DIR}/../config.cmake) + +set(Kokkos_ROOT ${KOKKOS_DIR}) +find_package(Kokkos 4.5 REQUIRED CONFIG) + +if(Charm_ENABLE_GPU) + message(STATUS "Building for a GPU backend") + add_definitions(-DGPU_BACKEND) + add_definitions(-DCUDA_DIR=\"${CUDA_DIR}\") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${GPU_OPTS} ${LD_OPTS}") +else() + message(STATUS "Building for a CPU backend") + set(CMAKE_CXX_COMPILER "${CHARMC}") + set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${CPU_OPTS} ${LD_OPTS}") +endif() + +add_definitions(-DKOKKOS_DIR=\"${KOKKOS_DIR}\") + +add_custom_command( + OUTPUT ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h + COMMAND ${CMAKE_COMMAND} -E chdir ${BASE_DIR}/charmtyles/backend + ${CHARMC} ${BASE_DIR}/charmtyles/backend/charmtyles.ci + DEPENDS ${BASE_DIR}/charmtyles/backend/charmtyles.ci + COMMENT "Processing charmtyles ci files" +) + +add_custom_command( + OUTPUT ${CMAKE_SOURCE_DIR}/server.decl.h + COMMAND ${CMAKE_COMMAND} -E chdir ${CMAKE_SOURCE_DIR} + ${CHARMC} ${CMAKE_SOURCE_DIR}/server.ci + DEPENDS ${CMAKE_SOURCE_DIR}/server.ci + COMMENT "Processing server ci files" +) + +if(Charm_ENABLE_GPU) + add_library(server OBJECT server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) + target_include_directories(server PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}/include ${CHARM_DIR}/include) + target_link_libraries(server Kokkos::kokkos) + + add_custom_command(result ALL + COMMAND ${CHARMC} ${GPU_LINK_OPTS} $:server> -o ${CMAKE_BINARY_DIR}/server.out + DEPENDS server + COMMENT "Linking charm build against kokkos and cuda") +else() + add_executable(server.out server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) + target_include_directories(server.out PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}) + target_link_libraries(server.out Kokkos::kokkos) +endif() \ No newline at end of file diff --git a/src/ast.hpp b/src/ast.hpp index 093c019..8cd59b3 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -1,106 +1,197 @@ +#include +#include +#include +#include +#include +#include +#include #include #include -template -inline T extract(char *&msg, bool increment = true) -{ +template +inline T extract(char *&msg) noexcept { T arg = *(reinterpret_cast(msg)); - if (increment) - msg += sizeof(T); + msg += sizeof(T); return arg; } -enum class operation : uint64_t -{ - noop = 0, - add = 1, - sub = 2, - mul = 3, - div = 4, - matmul = 5, - copy = 6, - axpy = 7, - axpy_multiplier = 8, - pow = 10, - greater = 11, - lesser = 12, - geq = 13, - leq = 14, - eq = 15, - neq = 16, - logical_and = 17, - logical_or = 18, - logical_not = 19, - where = 20, - log = 21, - exp = 22, - abs = 23, - any = 24, - all = 25 -}; - -class astnode -{ -public: - bool store; - bool is_scalar; - double arg; - // FIXME double scalars fit into name, but should probably - // handle this better - uint64_t name; - operation oper; - std::vector operands; -}; - -operation lookup_operation(uint64_t opcode) -{ - return static_cast(opcode); +template +inline T peek(char* &msg) noexcept { + return *(reinterpret_cast(msg)); +} + +ct::util::Operation inline to_ctop(uint64_t opcode) noexcept { + using ctop = ct::util::Operation; + switch (opcode) { + case 0: return ctop::noop; + case 1: return ctop::add; + case 2: return ctop::sub; + case 3: return ctop::multiply; + case 4: return ctop::divide; + case 11: return ctop::greater; + case 12: return ctop::lesser; + case 13: return ctop::geq; + case 14: return ctop::leq; + case 15: return ctop::eq; + case 16: return ctop::neq; + case 17: return ctop::logical_and; + case 18: return ctop::logical_or; + case 19: return ctop::logical_not; + case 20: return ctop::where; + default: return ctop::noop; + } } -astnode *decode(char *cmd) +template +std::vector faster_tortoise(char *cmd) { - uint64_t opcode = extract(cmd); - astnode *node = new astnode; - node->oper = lookup_operation(opcode); - if (opcode == 0) - { - node->is_scalar = extract(cmd); - // if leaf is a scalar - if (node->is_scalar) - { - double value = extract(cmd); - memcpy(&(node->name), &value, sizeof(double)); - } - else - node->name = extract(cmd); - return node; + uint8_t dims = extract(cmd); + + if (dims == 0) { + double value = extract(cmd); + tensorAstNodeType temp_node{0, ct::util::Operation::broadcast, value, shape}; + return {temp_node}; + } + + std::vector shape; shape.reserve(2); + for(uint8_t i = 0; i < dims; i++) + shape.push_back(extract(cmd)); + + ct::util::Operation opcode = to_ctop(extract(cmd)); + if (opcode == ct::util::Operation::noop) { + const auto& tmp = std::get<1>(Server::lookup(extract(cmd))); + return tmp(); } - node->is_scalar = false; - node->name = extract(cmd); - node->store = extract(cmd); - uint8_t num_operands = extract(cmd); - for (uint8_t i = 0; i < num_operands; i++) - { + bool store = extract(cmd); + uint32_t tensorID = extract(cmd); + + // Args for custom unops/binops + uint32_t numArgs = extract(cmd); + std::vector args; + for(uint32_t i = 0; i < numArgs; i++) + args.push_back(extract(cmd)); + + tensorAstNodeType rootNode(opcode, shape); + std::vector ast; + + uint8_t numOperands = extract(cmd); + + if(numOperands <= 2) { uint32_t operand_size = extract(cmd); - astnode *opnode = decode(cmd); - node->operands.push_back(opnode); + std::vector left = faster_tortoise(cmd); + cmd += operand_size; + operand_size = extract(cmd); + std::vector right = faster_tortoise(cmd); cmd += operand_size; - } - node->arg = extract(cmd); - return node; -} -void delete_ast(astnode *node) -{ - if (node->oper == operation::noop) - { - delete node; - return; - } + rootNode.left_ = 1; + size_t right_size; + if (op == ct::util::Operation::unary_expr || + op == ct::util::Operation::logical_not || + op == ct::util::Operation::custom_expr) { + rootNode.right_ = -1; + right_size = 0; + } else { + rootNode.right_ = left.size() + 1; + right_size = right.size(); + } + ast.reserve(left.size() + right_size + 1); + ast.emplace_back(rootNode); + std::copy(left.begin(), left.end(), std::back_inserter(ast)); + + if (right_size) + std::copy(right.begin(), right.end(), std::back_inserter(ast)); - for (astnode *n : node->operands) - delete_ast(n); + for (int i = 1; i != left.size(); ++i) { + if (ast[i].left_ != -1) { + ast[i].left_ += 1; + } + + if (ast[i].right_ != -1) { + ast[i].right_ += 1; + } + + if (ast[i].ter_ != -1) { + ast[i].ter_ += 1; + } + } - delete node; + for (int i = 1 + left.size(); i != ast.size(); ++i) { + if (ast[i].left_ != -1) + { + ast[i].left_ += 1 + left.size(); + } + + if (ast[i].right_ != -1) + { + ast[i].right_ += 1 + left.size(); + } + + if (ast[i].ter_ != -1) + { + ast[i].ter_ += 1 + left.size(); + } + } + } else { + uint32_t operand_size = extract(cmd); + std::vector left = faster_tortoise(cmd); + cmd += operand_size; + operand_size = extract(cmd); + std::vector right = faster_tortoise(cmd); + cmd += operand_size; + operand_size = extract(cmd); + std::vector ter = faster_tortoise(cmd); + cmd += operand_size; + + rootNode.left_ = 1; + rootNode.right_ = left.size() + 1; + rootNode.ter_ = left.size() + right.size() + 1; + + ast.reserve(left.size() + right.size() + ter.size() + 1); + + ast.emplace_back(rootNode); + std::copy(left.begin(), left.end(), std::back_inserter(ast)); + std::copy(right.begin(), right.end(), std::back_inserter(ast)); + std::copy(ter.begin(), ter.end(), std::back_inserter(ast)); + + for (int i = 1; i != left.size(); ++i) { + if (ast[i].left_ != -1) + ast[i].left_ += 1; + + if (ast[i].right_ != -1) + ast[i].right_ += 1; + + if (ast[i].ter_ != -1) + ast[i].ter_ += 1; + } + + for (int i = 1 + left.size(); i != left.size() + right.size(); ++i) { + if (ast[i].left_ != -1) + ast[i].left_ += 1 + left.size(); + + if (ast[i].right_ != -1) + ast[i].right_ += 1 + left.size(); + + if (ast[i].ter_ != -1) + ast[i].ter_ += 1 + left.size(); + } + + for (int i = 1 + left.size() + right.size(); i != ast.size(); ++i) { + if (ast[i].left_ != -1) + ast[i].left_ += 1 + left.size() + right.size(); + + if (ast[i].right_ != -1) + ast[i].right_ += 1 + left.size() + right.size(); + + if (ast[i].ter_ != -1) + ast[i].ter_ += 1 + left.size() + right.size(); + } + } + + if (store) { + tensorType tensor(ast); + Server::insert(tensorID, std::move(tensor)); + } + return ast; } diff --git a/src/server.cpp b/src/server.cpp index 9e88a39..176374f 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -1,4 +1,3 @@ -#include #include "server.hpp" #include "converse.h" #include "conv-ccs.h" @@ -144,10 +143,8 @@ void Main::execute_operation(int epoch, int size, char *cmd) } CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); - astnode *head = decode(cmd); - std::vector metadata; - calculate(head, metadata); - delete_ast(head); + if(peek(cmd) == 1) faster_tortoise(cmd); + else if(peek(cmd) == 2) faster_tortoise(cmd); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) diff --git a/src/server.decl.h b/src/server.decl.h new file mode 100644 index 0000000..7bf7a1e --- /dev/null +++ b/src/server.decl.h @@ -0,0 +1,148 @@ +#ifndef _DECL_server_H_ +#define _DECL_server_H_ +#include "charm++.h" +#include "envelope.h" +#include +#include "sdag.h" +#include "libcharmtyles.decl.h" + + + + + +/* DECLS: mainchare Main: Chare{ +Main(CkArgMsg* impl_msg); +void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); +}; + */ + class Main; + class CkIndex_Main; + class CProxy_Main; +/* --------------- index object ------------------ */ +class CkIndex_Main:public CkIndex_Chare{ + public: + typedef Main local_t; + typedef CkIndex_Main index_t; + typedef CProxy_Main proxy_t; + typedef CProxy_Main element_t; + + static int __idx; + static void __register(const char *s, size_t size); + /* DECLS: Main(CkArgMsg* impl_msg); + */ + // Entry point registration at startup + + static int reg_Main_CkArgMsg(); + // Entry point index lookup + + inline static int idx_Main_CkArgMsg() { + static int epidx = reg_Main_CkArgMsg(); + return epidx; + } + + + static int ckNew(CkArgMsg* impl_msg) { return idx_Main_CkArgMsg(); } + + static void _call_Main_CkArgMsg(void* impl_msg, void* impl_obj); + + static void _call_sdag_Main_CkArgMsg(void* impl_msg, void* impl_obj); + /* DECLS: void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); + */ + // Entry point registration at startup + + static int reg_handle_command_marshall2(); + // Entry point index lookup + + inline static int idx_handle_command_marshall2() { + static int epidx = reg_handle_command_marshall2(); + return epidx; + } + + + inline static int idx_handle_command(void (Main::*)(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd) ) { + return idx_handle_command_marshall2(); + } + + + + static int handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd) { return idx_handle_command_marshall2(); } + + static void _call_handle_command_marshall2(void* impl_msg, void* impl_obj); + + static void _call_sdag_handle_command_marshall2(void* impl_msg, void* impl_obj); + + static int _callmarshall_handle_command_marshall2(char* impl_buf, void* impl_obj_void); + + static void _marshallmessagepup_handle_command_marshall2(PUP::er &p,void *msg); +}; +/* --------------- element proxy ------------------ */ +class CProxy_Main:public CProxy_Chare{ + public: + typedef Main local_t; + typedef CkIndex_Main index_t; + typedef CProxy_Main proxy_t; + typedef CProxy_Main element_t; + + CProxy_Main(void) {}; + CProxy_Main(CkChareID __cid) : CProxy_Chare(__cid){ } + CProxy_Main(const Chare *c) : CProxy_Chare(c){ } + + int ckIsDelegated(void) const + { return CProxy_Chare::ckIsDelegated(); } + inline CkDelegateMgr *ckDelegatedTo(void) const + { return CProxy_Chare::ckDelegatedTo(); } + inline CkDelegateData *ckDelegatedPtr(void) const + { return CProxy_Chare::ckDelegatedPtr(); } + CkGroupID ckDelegatedIdx(void) const + { return CProxy_Chare::ckDelegatedIdx(); } + + inline void ckCheck(void) const + { CProxy_Chare::ckCheck(); } + const CkChareID &ckGetChareID(void) const + { return CProxy_Chare::ckGetChareID(); } + operator const CkChareID &(void) const + { return ckGetChareID(); } + + void ckDelegate(CkDelegateMgr *dTo,CkDelegateData *dPtr=NULL) + { CProxy_Chare::ckDelegate(dTo,dPtr); } + void ckUndelegate(void) + { CProxy_Chare::ckUndelegate(); } + void pup(PUP::er &p) + { CProxy_Chare::pup(p); + } + + void ckSetChareID(const CkChareID &c) + { CProxy_Chare::ckSetChareID(c); } + Main *ckLocal(void) const + { return (Main *)CkLocalChare(&ckGetChareID()); } +/* DECLS: Main(CkArgMsg* impl_msg); + */ + static CkChareID ckNew(CkArgMsg* impl_msg, int onPE=CK_PE_ANY); + static void ckNew(CkArgMsg* impl_msg, CkChareID* pcid, int onPE=CK_PE_ANY); + +/* DECLS: void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); + */ + + void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd, const CkEntryOptions *impl_e_opts=NULL); + +}; +#define Main_SDAG_CODE +typedef CBaseT1CBase_Main; + + + + + + +/* ---------------- method closures -------------- */ +class Closure_Main { + public: + + + struct handle_command_2_closure; + +}; + +extern void _registerserver(void); +extern "C" void CkRegisterMainModule(void); +#endif diff --git a/src/server.def.h b/src/server.def.h new file mode 100644 index 0000000..a8b7cfe --- /dev/null +++ b/src/server.def.h @@ -0,0 +1,306 @@ + + + + + +/* ---------------- method closures -------------- */ +#ifndef CK_TEMPLATES_ONLY +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + + struct Closure_Main::handle_command_2_closure : public SDAG::Closure { + int epoch; + uint8_t kind; + uint32_t size; + char *cmd; + + CkMarshallMsg* _impl_marshall; + char* _impl_buf_in; + int _impl_buf_size; + + handle_command_2_closure() { + init(); + _impl_marshall = 0; + _impl_buf_in = 0; + _impl_buf_size = 0; + } + handle_command_2_closure(CkMigrateMessage*) { + init(); + _impl_marshall = 0; + _impl_buf_in = 0; + _impl_buf_size = 0; + } + int & getP0() { return epoch;} + uint8_t & getP1() { return kind;} + uint32_t & getP2() { return size;} + char *& getP3() { return cmd;} + void pup(PUP::er& __p) { + __p | epoch; + __p | kind; + __p | size; + packClosure(__p); + __p | _impl_buf_size; + bool hasMsg = (_impl_marshall != 0); __p | hasMsg; + if (hasMsg) CkPupMessage(__p, (void**)&_impl_marshall); + else PUParray(__p, _impl_buf_in, _impl_buf_size); + if (__p.isUnpacking()) { + char *impl_buf = _impl_marshall ? _impl_marshall->msgBuf : _impl_buf_in; + PUP::fromMem implP(impl_buf); + PUP::detail::TemporaryObjectHolder epoch; + implP|epoch; + PUP::detail::TemporaryObjectHolder kind; + implP|kind; + PUP::detail::TemporaryObjectHolder size; + implP|size; + int impl_off_cmd, impl_cnt_cmd; + implP|impl_off_cmd; + implP|impl_cnt_cmd; + impl_buf+=CK_ALIGN(implP.size(),16); + cmd = (char *)(impl_buf+impl_off_cmd); + } + } + virtual ~handle_command_2_closure() { + if (_impl_marshall) CmiFree(UsrToEnv(_impl_marshall)); + } + PUPable_decl(SINGLE_ARG(handle_command_2_closure)); + }; +#endif /* CK_TEMPLATES_ONLY */ + + + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(pow_t) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(log_t) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(exp_t) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(abs_t) +#endif /* CK_TEMPLATES_ONLY */ + +/* DEFS: mainchare Main: Chare{ +Main(CkArgMsg* impl_msg); +void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); +}; + */ +#ifndef CK_TEMPLATES_ONLY + int CkIndex_Main::__idx=0; +#endif /* CK_TEMPLATES_ONLY */ +#ifndef CK_TEMPLATES_ONLY +#endif /* CK_TEMPLATES_ONLY */ +#ifndef CK_TEMPLATES_ONLY +/* DEFS: Main(CkArgMsg* impl_msg); + */ +CkChareID CProxy_Main::ckNew(CkArgMsg* impl_msg, int impl_onPE) +{ + CkChareID impl_ret; + CkCreateChare(CkIndex_Main::__idx, CkIndex_Main::idx_Main_CkArgMsg(), impl_msg, &impl_ret, impl_onPE); + return impl_ret; +} +void CProxy_Main::ckNew(CkArgMsg* impl_msg, CkChareID* pcid, int impl_onPE) +{ + CkCreateChare(CkIndex_Main::__idx, CkIndex_Main::idx_Main_CkArgMsg(), impl_msg, pcid, impl_onPE); +} + +// Entry point registration function +int CkIndex_Main::reg_Main_CkArgMsg() { + int epidx = CkRegisterEp("Main(CkArgMsg* impl_msg)", + reinterpret_cast(_call_Main_CkArgMsg), CMessage_CkArgMsg::__idx, __idx, 0); + CkRegisterMessagePupFn(epidx, (CkMessagePupFn)CkArgMsg::ckDebugPup); + return epidx; +} + +void CkIndex_Main::_call_Main_CkArgMsg(void* impl_msg, void* impl_obj_void) +{ + Main* impl_obj = static_cast(impl_obj_void); + new (impl_obj_void) Main((CkArgMsg*)impl_msg); +} +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY +/* DEFS: void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); + */ +void CProxy_Main::handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd, const CkEntryOptions *impl_e_opts) +{ + ckCheck(); + //Marshall: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd + int impl_off=0; + int impl_arrstart=0; + int impl_off_cmd, impl_cnt_cmd; + impl_off_cmd=impl_off=CK_ALIGN(impl_off,sizeof(char)); + impl_off+=(impl_cnt_cmd=sizeof(char)*(size)); + { //Find the size of the PUP'd data + PUP::sizer implP; + implP|epoch; + //Have to cast away const-ness to get pup routine + implP|(typename std::remove_cv::type>::type &)kind; + //Have to cast away const-ness to get pup routine + implP|(typename std::remove_cv::type>::type &)size; + implP|impl_off_cmd; + implP|impl_cnt_cmd; + impl_arrstart=CK_ALIGN(implP.size(),16); + impl_off+=impl_arrstart; + } + CkMarshallMsg *impl_msg=CkAllocateMarshallMsg(impl_off,impl_e_opts); + { //Copy over the PUP'd data + PUP::toMem implP((void *)impl_msg->msgBuf); + implP|epoch; + //Have to cast away const-ness to get pup routine + implP|(typename std::remove_cv::type>::type &)kind; + //Have to cast away const-ness to get pup routine + implP|(typename std::remove_cv::type>::type &)size; + implP|impl_off_cmd; + implP|impl_cnt_cmd; + } + char *impl_buf=impl_msg->msgBuf+impl_arrstart; + memcpy(impl_buf+impl_off_cmd,cmd,impl_cnt_cmd); + if (ckIsDelegated()) { + int destPE=CkChareMsgPrep(CkIndex_Main::idx_handle_command_marshall2(), impl_msg, &ckGetChareID()); + if (destPE!=-1) ckDelegatedTo()->ChareSend(ckDelegatedPtr(),CkIndex_Main::idx_handle_command_marshall2(), impl_msg, &ckGetChareID(),destPE); + } else { + CkSendMsg(CkIndex_Main::idx_handle_command_marshall2(), impl_msg, &ckGetChareID(),0); + } +} + +// Entry point registration function +int CkIndex_Main::reg_handle_command_marshall2() { + int epidx = CkRegisterEp("handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd)", + reinterpret_cast(_call_handle_command_marshall2), CkMarshallMsg::__idx, __idx, 0+CK_EP_NOKEEP); + CkRegisterMarshallUnpackFn(epidx, _callmarshall_handle_command_marshall2); + CkRegisterMessagePupFn(epidx, _marshallmessagepup_handle_command_marshall2); + + return epidx; +} + +void CkIndex_Main::_call_handle_command_marshall2(void* impl_msg, void* impl_obj_void) +{ + Main* impl_obj = static_cast(impl_obj_void); + CkMarshallMsg *impl_msg_typed=(CkMarshallMsg *)impl_msg; + char *impl_buf=impl_msg_typed->msgBuf; + envelope *env = UsrToEnv(impl_msg_typed); + /*Unmarshall pup'd fields: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd*/ + PUP::fromMem implP(impl_buf); + PUP::detail::TemporaryObjectHolder epoch; + implP|epoch; + PUP::detail::TemporaryObjectHolder kind; + implP|kind; + PUP::detail::TemporaryObjectHolder size; + implP|size; + int impl_off_cmd, impl_cnt_cmd; + implP|impl_off_cmd; + implP|impl_cnt_cmd; + impl_buf+=CK_ALIGN(implP.size(),16); + /*Unmarshall arrays:*/ + char *cmd=(char *)(impl_buf+impl_off_cmd); + impl_obj->handle_command(std::move(epoch.t), std::move(kind.t), std::move(size.t), cmd); +} +int CkIndex_Main::_callmarshall_handle_command_marshall2(char* impl_buf, void* impl_obj_void) { + Main* impl_obj = static_cast(impl_obj_void); + envelope *env = UsrToEnv(impl_buf); + /*Unmarshall pup'd fields: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd*/ + PUP::fromMem implP(impl_buf); + PUP::detail::TemporaryObjectHolder epoch; + implP|epoch; + PUP::detail::TemporaryObjectHolder kind; + implP|kind; + PUP::detail::TemporaryObjectHolder size; + implP|size; + int impl_off_cmd, impl_cnt_cmd; + implP|impl_off_cmd; + implP|impl_cnt_cmd; + impl_buf+=CK_ALIGN(implP.size(),16); + /*Unmarshall arrays:*/ + char *cmd=(char *)(impl_buf+impl_off_cmd); + impl_obj->handle_command(std::move(epoch.t), std::move(kind.t), std::move(size.t), cmd); + return implP.size(); +} +void CkIndex_Main::_marshallmessagepup_handle_command_marshall2(PUP::er &implDestP,void *impl_msg) { + CkMarshallMsg *impl_msg_typed=(CkMarshallMsg *)impl_msg; + char *impl_buf=impl_msg_typed->msgBuf; + envelope *env = UsrToEnv(impl_msg_typed); + /*Unmarshall pup'd fields: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd*/ + PUP::fromMem implP(impl_buf); + PUP::detail::TemporaryObjectHolder epoch; + implP|epoch; + PUP::detail::TemporaryObjectHolder kind; + implP|kind; + PUP::detail::TemporaryObjectHolder size; + implP|size; + int impl_off_cmd, impl_cnt_cmd; + implP|impl_off_cmd; + implP|impl_cnt_cmd; + impl_buf+=CK_ALIGN(implP.size(),16); + /*Unmarshall arrays:*/ + char *cmd=(char *)(impl_buf+impl_off_cmd); + if (implDestP.hasComments()) implDestP.comment("epoch"); + implDestP|epoch; + if (implDestP.hasComments()) implDestP.comment("kind"); + implDestP|kind; + if (implDestP.hasComments()) implDestP.comment("size"); + implDestP|size; + if (implDestP.hasComments()) implDestP.comment("cmd"); + implDestP.synchronize(PUP::sync_begin_array); + for (int impl_i=0;impl_i*(sizeof(*cmd)) +void CBase_Main::virtual_pup(PUP::er &p) { + recursive_pup
(dynamic_cast(this), p); +} +#endif /* CK_TEMPLATES_ONLY */ diff --git a/src/server.hpp b/src/server.hpp index 211ec84..d11b33d 100644 --- a/src/server.hpp +++ b/src/server.hpp @@ -1,10 +1,3 @@ -#include -#include -#include -#include -#include -#include -#include #include "ast.hpp" #include "server.decl.h" @@ -17,8 +10,6 @@ std::stack client_ids; CProxy_Main main_proxy; -ct_array_t calculate(astnode *node, std::vector &metadata); - enum class opkind : uint8_t { creation = 0, @@ -199,10 +190,8 @@ class Server inline static void insert(ct_name_t name, ct_array_t arr) { -#ifndef NDEBUG CkPrintf("Created array %" PRIu64 " on server\n", name); -#endif - symbol_table[name] = arr; + symbol_table[name] = std::move(arr); } inline static void remove(ct_name_t name) @@ -239,917 +228,4 @@ class Server } }; -ct_array_t calculate(astnode *node, std::vector &metadata) -{ - switch (node->oper) - { - case operation::noop: - { - if (node->is_scalar) - return *reinterpret_cast(&(node->name)); - else - return Server::lookup(node->name); - } - case operation::add: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v)) - { - res = x.get() + y.get(); - } - else if constexpr ((std::is_same_v)) - { - res = x.get() + y; - } - else if constexpr ((std::is_same_v)) - { - res = x + y.get(); - } - else if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else - { - // Everything else should work with the normal + operator - res = x + y; - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - case operation::sub: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v)) - { - res = x.get() - y.get(); - } - else if constexpr ((std::is_same_v)) - { - res = x.get() - y; - } - else if constexpr ((std::is_same_v)) - { - res = x - y.get(); - } - else if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else - { - // Everything else should work with the normal + operator - res = x - y; - } - }, - s1, s2); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - case operation::mul: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v)) - { - res = x.get() * y.get(); - } - else if constexpr ((std::is_same_v)) - { - res = x.get() * y; - } - else if constexpr ((std::is_same_v)) - { - res = x * y.get(); - } - else if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else - { - res = x * y; - } - }, - s1, s2); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - case operation::div: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v)) - { - res = x.get() / y.get(); - } - else if constexpr ((std::is_same_v)) - { - res = x.get() / y; - } - else if constexpr ((std::is_same_v)) - { - res = x / y.get(); - } - else if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else - { - res = x / y; - } - }, - s1, s2); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - case operation::matmul: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr (std::is_same_v && - std::is_same_v) - CmiAbort("Matrix multiplication not yet implemented"); - else if constexpr (std::is_same_v && - std::is_same_v) - res = ct::dot(x, y); - else if constexpr ((std::is_same_v || - std::is_same_v) && - std::is_same_v) - res = ct::dot(x, y); - else - CmiAbort("Operation not permitted5"); - }, - s1, s2); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - case operation::copy: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - - std::visit( - [&](auto &x) - { - using T = std::decay_t; - if constexpr (std::is_same_v || - std::is_same_v || std::is_same_v) - res = x; - else - CmiAbort("Matrix copy not implemented"); - }, - s1); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - case operation::axpy: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t s3 = calculate(node->operands[2], metadata); - ct_array_t res; - - std::visit( - [&](auto &a, auto &x, auto &y) - { - using S = std::decay_t; - using T = std::decay_t; - using V = std::decay_t; - if constexpr (std::is_same_v && - std::is_same_v && - std::is_same_v) - res = ct::axpy(a, x, y); - else - CmiAbort("Operation not permitted6"); - }, - s1, s2, s3); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - - case operation::where: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t s3 = calculate(node->operands[2], metadata); - ct_array_t res; - - std::visit( - [&](auto &a, auto &x, auto &y) - { - using S = std::decay_t; - using T = std::decay_t; - using V = std::decay_t; - if constexpr (std::is_same_v && - std::is_same_v && - std::is_same_v || - std::is_same_v && - std::is_same_v && - std::is_same_v) - res = ct::where(a, x, y); - else - CmiAbort("All where operations must be of the same type"); - }, - s1, s2, s3); - - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - - case operation::pow: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - std::visit( - [&](auto &a) - { - using T = std::decay_t; - if constexpr (std::is_same_v) - { - std::shared_ptr pow_ = std::make_shared(node->arg); - ct::vector vec = ct::unary_expr(a, pow_); - res = ct_array_t{vec}; - } - else if constexpr (std::is_same_v) - { - std::shared_ptr pow_ = std::make_shared(node->arg); - ct::matrix mat = ct::unary_expr(a, pow_); - res = ct_array_t{mat}; - } - else if constexpr (std::is_same_v) - { - res = std::pow(a.get(), node->arg); - } - else if constexpr (std::is_same_v) - { - res = std::pow(a, node->arg); - } - }, - s1); - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - - case operation::log: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - std::visit( - [&](auto &a) - { - using T = std::decay_t; - if constexpr (std::is_same_v) - { - std::shared_ptr log_ = std::make_shared(node->arg); - ct::vector vec = ct::unary_expr(a, log_); - res = ct_array_t{vec}; - } - else if constexpr (std::is_same_v) - { - std::shared_ptr log_ = std::make_shared(node->arg); - ct::matrix mat = ct::unary_expr(a, log_); - res = ct_array_t{mat}; - } - else if constexpr (std::is_same_v) - { - res = std::log(a.get()) / std::log(node->arg); - } - else if constexpr (std::is_same_v) - { - res = std::log(a) / std::log(node->arg); - } - }, - s1); - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - - case operation::exp: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - std::visit( - [&](auto &a) - { - using T = std::decay_t; - if constexpr (std::is_same_v) - { - std::shared_ptr exp_ = std::make_shared(); - ct::vector vec = ct::unary_expr(a, exp_); - res = ct_array_t{vec}; - } - else if constexpr (std::is_same_v) - { - std::shared_ptr exp_ = std::make_shared(); - ct::matrix mat = ct::unary_expr(a, exp_); - res = ct_array_t{mat}; - } - else if constexpr (std::is_same_v) - { - res = std::exp(a.get()); - } - else if constexpr (std::is_same_v) - { - res = std::exp(a); - } - }, - s1); - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - - case operation::abs: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - std::visit( - [&](auto &a) - { - using T = std::decay_t; - if constexpr (std::is_same_v) - { - std::shared_ptr abs_ = std::make_shared(); - ct::vector vec = ct::unary_expr(a, abs_); - res = ct_array_t{vec}; - } - else if constexpr (std::is_same_v) - { - std::shared_ptr abs_ = std::make_shared(); - ct::matrix mat = ct::unary_expr(a, abs_); - res = ct_array_t{mat}; - } - else if constexpr (std::is_same_v) - { - res = std::abs(a.get()); - } - else if constexpr (std::is_same_v) - { - res = std::abs(a); - } - }, - s1); - if (node->store) - { - Server::insert(node->name, res); - } - return res; - } - - case operation::axpy_multiplier: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t s3 = calculate(node->operands[2], metadata); - ct_array_t multiplier = calculate(node->operands[3], metadata); - ct_array_t res; - - std::visit( - [&](auto &multiplier, auto &a, auto &x, auto &y) - { - using S = std::decay_t; - using T = std::decay_t; - using V = std::decay_t; - using M = std::decay_t; - if constexpr (std::is_same_v && - std::is_same_v && - std::is_same_v && - std::is_same_v) - res = ct::axpy(multiplier * a, x, y); - else - CmiAbort("Operation not permitted7"); - }, - multiplier, s1, s2, s3); - - if (node->store) - Server::insert(node->name, res); - return res; - } - case operation::greater: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x > y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() > y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() > y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x > y.get()); - } - else - { - res = static_cast(x > y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - case operation::lesser: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x < y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() < y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() < y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x < y.get()); - } - else - { - res = static_cast(x < y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::geq: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x >= y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() >= y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() >= y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x >= y.get()); - } - else - { - res = static_cast(x >= y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::leq: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x <= y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() <= y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() <= y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x <= y.get()); - } - else - { - res = static_cast(x <= y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::eq: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x == y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() == y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() == y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x == y.get()); - } - else - { - res = static_cast(x == y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::neq: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x != y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() != y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() != y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x != y.get()); - } - else - { - res = static_cast(x != y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::logical_and: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x && y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() && y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() && y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x && y.get()); - } - else - { - res = static_cast(x && y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::logical_or: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t s2 = calculate(node->operands[1], metadata); - ct_array_t res; - - std::visit( - [&](auto &x, auto &y) - { - using T = std::decay_t; - using V = std::decay_t; - if constexpr ((std::is_same_v && std::is_same_v) || - (std::is_same_v && std::is_same_v)) - { - CkAbort("Vector + Matrix operations not supported"); - } - else if constexpr ((std::is_same_v || std::is_same_v || std::is_same_v || std::is_same_v)) - { - res = x || y; - } - else if constexpr ((std::is_same_v && std::is_same_v)) - { - res = static_cast(x.get() || y.get()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get() || y); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x || y.get()); - } - else - { - res = static_cast(x || y); - } - }, - s1, s2); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::logical_not: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - - std::visit( - [&](auto &x) - { - using T = std::decay_t; - if constexpr ((std::is_same_v || std::is_same_v)) - { - res = !x; - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(!x.get()); - } - else - { - res = static_cast(!x); - } - }, - s1); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::any: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - - std::visit( - [&](auto &x) - { - using T = std::decay_t; - if constexpr ((std::is_same_v || std::is_same_v)) - { - res = static_cast(x.any()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get()); - } - else - { - res = static_cast(x); - } - }, - s1); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - case operation::all: - { - ct_array_t s1 = calculate(node->operands[0], metadata); - ct_array_t res; - - std::visit( - [&](auto &x) - { - using T = std::decay_t; - if constexpr ((std::is_same_v || std::is_same_v)) - { - res = static_cast(x.all()); - } - else if constexpr ((std::is_same_v)) - { - res = static_cast(x.get()); - } - else - { - res = static_cast(x); - } - }, - s1); - - if (node->store) - Server::insert(node->name, res); - return res; - } - - default: - { - CmiAbort("Operation not implemented8"); - } - } -}; - #include "server.def.h" From 636f14d60022301ebdd8b1f7a581d96315e03356 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sun, 12 Oct 2025 23:44:34 +0530 Subject: [PATCH 02/34] fix and test flattened AST -> AST functionality --- .vscode/settings.json | 37 --------------- build/lib/charmnumeric/array.py | 14 +++--- build/lib/charmnumeric/ast.py | 66 ++++++++++++++------------ charmnumeric/ast.py | 16 ++++--- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22101 -> 22469 bytes examples/graph.py | 4 +- src/ast.hpp | 63 ++++++++++++++++-------- src/server.cpp | 12 ++--- src/server.hpp | 33 ------------- 9 files changed, 103 insertions(+), 142 deletions(-) delete mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 8c99cd8..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,37 +0,0 @@ -{ - "files.associations": { - "*.sage": "python", - "type_traits": "cpp", - "ostream": "cpp", - "__node_handle": "cpp", - "cstdint": "cpp", - "array": "cpp", - "deque": "cpp", - "forward_list": "cpp", - "list": "cpp", - "string": "cpp", - "unordered_map": "cpp", - "vector": "cpp", - "string_view": "cpp", - "initializer_list": "cpp", - "ranges": "cpp", - "span": "cpp", - "chrono": "cpp", - "format": "cpp", - "text_encoding": "cpp", - "__bit_reference": "cpp", - "__hash_table": "cpp", - "__split_buffer": "cpp", - "__tree": "cpp", - "iterator": "cpp", - "map": "cpp", - "bitset": "cpp", - "utility": "cpp", - "queue": "cpp", - "random": "cpp", - "set": "cpp", - "stack": "cpp", - "tuple": "cpp", - "iosfwd": "cpp" - } -} \ No newline at end of file diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py index 47162f7..77c9e58 100644 --- a/build/lib/charmnumeric/array.py +++ b/build/lib/charmnumeric/array.py @@ -244,7 +244,7 @@ def _flush_command_buffer(self): if self.valid: return validated_arrays = {self.name : self} - cmd = self.command_buffer.get_command(validated_arrays) + cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape) reply_size = 0 for name, arr in validated_arrays.items(): reply_size += 8 + 8 * arr.ndim @@ -290,37 +290,37 @@ def copy(self): name=res, command_buffer=cmd_buffer) def sqrt(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=0.5) + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[0.5]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def cbrt(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=1/3) + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[1/3]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def pow(self, exponent): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], arg=exponent) + cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[exponent]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def log(self, base=np.e): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=base) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def log10(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=10) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[10]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) def log2(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], arg=2) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[2]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) diff --git a/build/lib/charmnumeric/ast.py b/build/lib/charmnumeric/ast.py index 54011be..35c13a6 100644 --- a/build/lib/charmnumeric/ast.py +++ b/build/lib/charmnumeric/ast.py @@ -20,7 +20,7 @@ def get_max_depth(): class ASTNode(object): - def __init__(self, name, opcode, operands, arg=0.0): + def __init__(self, name, opcode, operands, args=[]): from charmtiles.array import ndarray # contains opcode, operands # operands are ndarrays @@ -28,51 +28,57 @@ def __init__(self, name, opcode, operands, arg=0.0): self.opcode = opcode self.operands = operands self.depth = 0 - self.arg = arg + self.args = args if self.opcode != 0: for op in self.operands: if isinstance(op, ndarray): self.depth = max(self.depth, 1 + op.command_buffer.depth) - def get_command(self, validated_arrays, save=True): + ###################################################################################################################################### + # Encoding = | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # + # | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # + # NB: If opcode is 0, the encoding is limited to ID # + # Encoding = | dim | shape | val | # + # | 8 | 64 | 64 | # + # NB: Latter encoding for double constants # + ###################################################################################################################################### + def get_command(self, validated_arrays, ndim, shape, save=True): from charmnumeric.array import ndarray + + # Ndims and Shape setup + cmd = to_bytes(ndim, 'B') + for _shape in shape: + cmd += to_bytes(_shape, 'L') + if self.opcode == 0: - cmd = to_bytes(self.opcode, 'L') - cmd += to_bytes(False, '?') - cmd += to_bytes(self.operands[0].name, 'L') + cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd - cmd = to_bytes(self.opcode, 'L') + to_bytes(self.name, 'L') - cmd += to_bytes(save, '?') + to_bytes(len(self.operands), 'B') + + cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'L') + cmd += to_bytes(len(self.args), 'I') + for arg in self.args: + cmd += to_bytes(arg, 'd') + + cmd += to_bytes(len(self.operands), 'B') for op in self.operands: - # an operand can also be a double if isinstance(op, ndarray): if op.name in validated_arrays: - opcmd = to_bytes(0, 'L') - opcmd += to_bytes(False, '?') - opcmd += to_bytes(op.name, 'L') - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd + opcmd = to_bytes(op.ndim, 'B') + for _shape in op.shape: + opcmd += to_bytes(_shape, 'L') + opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - if save_op: - print("SAVING THIS LIL BISH") - print(op.name) - else: - print("NO SAVE") - print(op.name) - opcmd = op.command_buffer.get_command(validated_arrays, - save=save_op) + opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op) if not op.valid and save_op: validated_arrays[op.name] = op - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd - elif isinstance(op, float) or isinstance(op, int): - opcmd = to_bytes(0, 'L') - opcmd += to_bytes(True, '?') + elif isinstance(op, float): + opcmd = to_bytes(0, 'B') + for _shape in shape: + opcmd += to_bytes(_shape, 'L') opcmd += to_bytes(op, 'd') - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd - cmd += to_bytes(self.arg, 'd') + cmd += to_bytes(len(opcmd), 'I') + cmd += opcmd return cmd def plot_graph(self, validated_arrays={}, G=None, node_map={}, diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index e581f91..35c13a6 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -36,10 +36,10 @@ def __init__(self, name, opcode, operands, args=[]): ###################################################################################################################################### # Encoding = | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # - # | 8 | 64 | 32 | 1 | 32 | 32 | 64 | 8 | 32 | ........................ | # + # | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # # NB: If opcode is 0, the encoding is limited to ID # - # Encoding = | dim | val | # - # | 8 | 64 | # + # Encoding = | dim | shape | val | # + # | 8 | 64 | 64 | # # NB: Latter encoding for double constants # ###################################################################################################################################### def get_command(self, validated_arrays, ndim, shape, save=True): @@ -51,14 +51,15 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(_shape, 'L') if self.opcode == 0: - cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'I') + cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd - cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'I') + to_bytes(len(self.operands), 'B') + cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'L') cmd += to_bytes(len(self.args), 'I') for arg in self.args: cmd += to_bytes(arg, 'd') + cmd += to_bytes(len(self.operands), 'B') for op in self.operands: if isinstance(op, ndarray): if op.name in validated_arrays: @@ -72,7 +73,10 @@ def get_command(self, validated_arrays, ndim, shape, save=True): if not op.valid and save_op: validated_arrays[op.name] = op elif isinstance(op, float): - opcmd = to_bytes(0, 'B') + to_bytes(op, 'd') + opcmd = to_bytes(0, 'B') + for _shape in shape: + opcmd += to_bytes(_shape, 'L') + opcmd += to_bytes(op, 'd') cmd += to_bytes(len(opcmd), 'I') cmd += opcmd return cmd diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index 8662739553bb9d0b8a2a62ec550bca57b1ee7ee5..e7f2b565c3a10a783f732d7995c8a73befd5b377 100644 GIT binary patch delta 13319 zcmZX5b8s%ayKQaT*tKojwy|s5?OWS6cWvxyzqM`b+P2;MeZQG=&pr2KGP9D*nq=}v zGFf@nvs42*R1b=vEC&vO0RjR70}=vkM97B*3jwCoK)3u0rZD{fLLto0e*qF!v6E88tEbSO|AL{(9cA!Oe?hp<)mKX#Ma^1%Q7ESXCkA=EE z8^@&ssT8~i$f{mV>0p!+$!)KAivIG~j-w4Q4Xj+ILwvdf@7SoAbeCEO&qu2!M~l;> zum(#Q&M}r;kh!PTao`S7%z4WFnEze!Mf8Oo8YJrY^?8|t%>?7e2g_! zG`t*Z?Z}Va5Mkqs%@gY*`@?69+Op&QeQ!G_FwG-tq&&&q?F<*5gQBMk&;S0wqhVA2 z{esz_uEZ}1Wls`CsYFMrcU(4Im2avDO79!`8$0WHLsJW3u+B}RhLUm*S*_=C-E z5eWTC=>C;!b>X4+36XEY+CyaL)0@Q@uwE21fwz1J!T!B`-Jo(F+H=xg6a3G3q%NF- zmqqpNA&iyGfR6_l#eq3eOc?m{q-+X~CNYW=evu4|qWs00&c9XDFSHI(7yIi=?K&_e z?ZldLdC;EkT^+(IhYVRW5~W!8fxrgVcp(^RT?@&Ht11Vy?ue+EJn`tpOCFNx<>;(u{nzswMZZq? zFLFoqRg70GXJ))*<3B~dQmwG6)wPyI@(PJbz%Zqx+aF3%y5zggU;<9_UWHT^UWbcj z8kcIPoTlrddIjII{*&>?dh#O;Qly3P8+_8J09oX%%}Qa$Q%exZ4HmIJK~9j^pVBaR zVCJ71bS^YHh;M`f;t9GzV7+?MbgK|eSj5Qn5Cs*tS>@HqmlO;oYR4b@wZ{(VmP(AA z4y06lJY-1uNI<#E*)-kFd->C8Rw67r90cgStSkGlV~a*E0%Ah^_yJ?;Fk%H#3`|%b zfSDCWRKdXD7Ru)v4$5V3AJnZ=AFNz+{4~4K5C9ohXl(QH z@j|#g7@23r`I_tWq{XlkYx?-yYVh+z(3TY?o|yq3cPnP_^J{2-xkrpCqPE@oan`?t zU_gRWS*fpg?2obQs&?FI<^FPrqZWI+4V~RLb^l<~Abn5cqu_bQ(G_RdHviKHKqoLQ zR6O#a8Akj0<3YbrrOg(O*gl0I1=_k#@$k<%y@(ig(rE$LRh(b-t&~~0=*ntCvVYsU`u%oV}_@#P<~4@?i)65NKZt?>yN<^>Fz<~-kRDL zjpRgRboKy_coDaH;E#@oUw`WZNxDj$>q1wjgJ-3eKWbj;8j9{rG|-^IUY@J7(0{r zdlgJcH~+it0f?{H=$B0_$E?!ZMT*MI9|)dF6~YZ*aznZS>73fSvk3bAcbFT1TisZQgrp zjswq=JhrKjt@*y=9_I%P?CYy-EjuTj=mh`p7kuoaDB&f~LiX>m0fJckKMr>KK!&EW zVm1~rDbz3U(=2jBBFZW>p|T;5&Mw5RQ1KJDMkJBk5ZdKuu6@BEcQf;e!Evw)AY)=e zBETr0{K-InAiu>qPXqvWG#XE@EwA9fMg@pa>Q+KgzZndJ2YD_33hWhyD@VS7Tlpdr z5zAluqr?3`78Fc9C6jQzF2f!Y$qBf_{_h731yEk^kyC}m4H5*T1uMl54Hqz_t>U=O zi4w3pdQWCJHT$dwnep zIo-_@$hSp!O{Opr4Ij1ITJr?65~%^oLH^^q{pDwQC7&EaV+|Z5d}nlZmyg;-Rbrp+ z4Vt-&Q4${RUwQ`ab#{VkhG_s~cZnT(vw`lVYo=o;7nl}T+k+s(O}=AoH$J`WO&m=2 zkj}Er*TaNT!4{xBwE~k1;SYg`+A;UT>H_VHynL zr`ioBqw>V;;Md27{5JOjaW+Hb2HY|kl!)d|r=%m35R#`e%*t5}X*YHGSRH`JtCcpoS@zJ>rlA;T7?Oiz?)B)@9=} z94nwsJ#Hxq#sC;@e?&tXGL=1m{Dh?iWzddibhihacBoD-NaInjA3DGiskar0sUJqyL+%Y1`?u=HFa;lq%kTDh z{J`a>cPxaPkRp=IrnP%#J0l9Se&EP~vb)G?jl^Wd+!a6^_k`gGZC1oSI9egu^@!Je zF{0wZ*kuXx2L?=86$=+MiZXAFqmY?c%jOStykYlMVh4p_U^)E=C1uP*JIx68FWA0N z4=`6sBqpdy^j{uba2Br82}^ZK&iU}FD{R>zLDbQtog@zeK0yKE@U@uM03w9DmzM=c zx&83PqkaI?-xiAfDj|;@?vOv=@^f(R_bljTuIw%T=EYvMT^?mgrur%8!9PzcM^)Sz zD9_I31dna_J2Qv(#tpih-%Y!_s>_l1B$Tx8^@ba|XeL!Wd)1A$C`qVZ@9TMZmC|#A zz7aZ!^owaF9vB4nA+a?<7~}Y|=J|sHHJxxx=B)s)*bU}Gq+Q2n1mzf=GTd6&IR<`p z85oaL{IQH4)>^~18XBZXCM(OTFQ|ivuigmnOd2*8k`n9}$qE>$+f{EP-JUkm-K5Qu zz_=3ZY6RpBe*3>b(x?IE%azF`g{9N#v-C3z^6H%hl=h#{6<4n1{68*;-u z*knLN0y!#YA3eX=3ohbZ^VQS3>narUsl*NSDqm2mxnnVZn|H=gt1__G2w6|X@TLID9Ae$Rw-7KHu{475ss^{AwUJNlYdde9xo@H4{8+xS z<2Z*a-iqdz63UYyHrnc}2(K%b>AR%rwqloFOlJkT1P?8{RO(h9LFpWD$=QEYplhEX zCJ7G=rd8^!IpeY?aYlSksUEWRQA*Z=54`8FYf(MN$$TqK5o)Jue*CT@Yh~fXpj`fH0;QaS*29X<(!%5Fr3$qA+O2e>@f&wo(fn ztTvI+1q>z;Ol)aCtxY?KQJGM0qDo{Tn{!7qFlAZh@=ALj&|MadX&jr3rad-fyBQ?H zA9~-3zUKSk0Y2%9W3q<2B?G}-fxoxwsQxblrJcQ+-71FnGwBPmx1m3 zw&?e!C?5gYgb&KMPzBB@t(3MlwnkCa7j$@7r2jDo zTFdiP-SjHd@nKAL9GKX?m$^M}xw0eVlpFKtc_%=Z*6RSMF{D@v4g1$~4?j8@8a5}t2MgAST|joVb1Ya~b(;d}*r2t9 zYPB{l!@wGKc4O9;oG(XJs)Uo6J}E}6q7*%7JgX28+RYd4vfn+At)`r?>UH`RooD8C zLs@T8f6qX6jIhS~mOiA1-(KnEOI0Qe3lN)bhVObJ);%1%8Ihdze-8<6xhvivBHRR5 zQ=P;}8g!xdx!;WaWNc#n^J5e6iL)o(W({}8J(i&L&~UN_!EBmBw}y?2@I#C1`Ybxs z8ynCKUBbB1At*?{LJ0G|bl2_FY0)@C)#)dK05OZg7(6GohzXm5Byzvmo!^Y(+EEO= z0b{Bd5&|_xZX*h%b^0>IhNAJ~jtbw)=#H%n9<2hUVbbb*P7I*r;H;TUs$V=?o|-$p z6b{0Xxx{F2uxha=c62zTMxZE8Ovw@wRsO6u7&xk2xUI_aSI`g}Rc6o0EWR0q;w6KU zug3KV-Z1RsKOG>zDyTv*AbYvc7SQ{xv?2FZP7>(ulXjD~&M5$3cSw5kg-$vBpi%^% zY-c+`?@9D*B-6;F z!>;(N^F3QHgB9`n{<1YCrmIB|&sLwU*wYT8XE+-T}}Be0)4=d|IEbntK|WFB?nG zN^T=`g;M3xKAHhwmbIFfxP;gG}IKnGDRk( zYOx;N!D&)wz|2KC8J8j@$b7BalGd}_s=e21%Cw!l)(UR(jwzPW79czKB+$k{0iRwL zT6!urkq+R4Of`RY8AXB#BPkDvCZVbVbetE5DwskMn=>mY7RFndBjOB~U6)8;Qjrqw zHi#{v99ndezeOn(QaLCF9!jv1367?8kgT`%(mc_n(aYTR(+^+mo6zE~9Q!uvKsRn? zvw6tjjF9Ye!c9K4&VAUdPuRzl-O(LW{l$k=g9Nbgr7R&+$s;Ea7_oOyB2mgC!XiBK zjH`F*>+?$J*J%ufS!Ko_Fi>3nJ*a#=xx0dTb@K3p-N!I%NxU*G?cw!v_5vR5g|&B! zUZIp&%lRL^A=`}6Vdu?dMT4~spXgV>s#rEsmuPTvWJn&WvnftJa*C<4%yU|V7l^DC zM+2G$e>~P{cbscE*OUojmXCn&ow3{Yh>VF{Sgy`p!8`Wb!ZCG5T6*pdk>Vjd+`L>8 zvIQM;!h!s{$>F@Ym3V;t;gMx(20|@Fg4+#Q9SGD%`JRGVRnQyFE5N1z_sEQHHjnI+ znbZl0eEAd+gv(R=ESo!5Xp3>TnK)557X-w(O7tisJpF-Z!C2S>tO&r#n=v%lvP<^kgFnCfoHf>jo8lZ<8x;s+&+%1EuM@#!D@(t5$RgWX|I9zs~Q*WPt zA+vmy$s_Q`_DFmFYcq#Q1&6m z0U3)rN;Lg+}03VkpCr`=orp z>EvK6%P3L=402ibbA7GjH=6&zy=@}la^Yj8R9oF?yShN{3(KQ$HNbLv?QFMTMiZI; z8`a(D%vwLeN19Lf5oEu75&V+Dc3D%&YwRNExIpE(G(TdcB>!gqFL{IiZ71I^e}7JU2jAp!e&NjkopaBMRp4J#@Zh<@>zSm(LNh zwGq#>JAu;u+dyOW&f+3%ImSuZnC zJ5o8uV4a7z`$709%=i0fiNox{N5UjgDhRuYt6YQ9D;1Ny6cEiM;h(f8##`)4Ch@r( zI%*X<>H(W^n`dOP!PC&mudsT-3dS&pQ!@T3Mg7QN!-4GP(rT98a~j*`Mt&L5%`NpX z(z&Eq4&n`8Zu&r5Nf^0&=Jx6ABMqIiNk>4=l6bUYj#TDCNUhL3?z{v^`O&N~;$#)Z z!c6YYWg3ve0(d!nKRF)5e4}~(<-QcqndDF#@Z+f4r$iuj--U3v@;-ejG_KK>VDhkb z=ZSl+ZaK)(ZJLCca(KA?)Njge!4vlPOFTg$910VeaPMrxZ?Py+=PrSG0t~!0jpp{p>p7okdc$!KPIhK|*0^WdY*bf~m`TG+O_s zz@4($$E0U$*sZ#jk8qJqG1^BLn(bKBFjj&W2tYbIw1J;R$!!){9fW*JgTtEnb7EVp zATtx{{ez=HMctA(HI`-ImsU1weCel1)$!~Bt=3326wqRAw5mguL9KM-5P{8%wJ0d^ z9vAhJ%}f-69n7%+hE($pc#j+ z4Ve9?@(yn>O4=6?1bt6AAwwf1aqYJ!E7CtzZVoaQe|Ur37AB7JO(N5eJ+Os$vD2c= zl-rXw4k3Vb1sM$>F{VXBKN!fCZrU3KVKNI#mLUjOdA?817q55~->OwA;7+M$lg0{5 z01jqIglWzT1rfs_CBfi#*di!m8edbx0S3mSG0;hpUD{RM!#hM5zu;wwNKt2R z5aUU<8{sj~q?q@q=P_$AWoDz5m3c3urxIkQuTU}5@(t1AL{MDHK`}L7gbQI zs5dLOaHzC!blZOuyc_oYfxK?}5L-8DqloF|=>K(5N*10v(6wWw$eo=MA8}=x0BCvL zOG=8L>rAb516piwsd#qX>h2#J_rqQq@cTsc@q7*GH(9w)XK>A|CNCRYXQD3;T(3~n zj7YArypQLv{Ih=YGQ-00wu#01x4w{8hFt)^iT{VcUjG}!v}skROF)s2QnAm>#kBWA zkh;dy=&ibe|HgrC3t#<3tQHp)V4{ucvYoB7|4fBfqtjVbKGgx&oMd`{-QrDUuAV#B zMM_D-Uaw*!>z`V)5$0>CEuhIf9{*JF*U1RJ+VNbW9?Ot^67#TVubUC_*;*|_rwXdN zx*5TDZ(~$UQrFVuUGLH`sKWFyH$W|5Wv1{0;mTNZgs?GJp97(XTuK+Pwvel9lGfe; zCfg@SO3csj^m);MdDo@dt9njsGTdeXTy4RkG9S*OJIH=lI4F$m=wX#2DqTLy{e6@e zs!HemTitZkV#k!!)i97l`AID&Fc5S+phEdeG!1PUZk{SI4umNxte#}7E-t7-^G+hy zt-S!c{S_y!FQXVElJXCLoaM(`upy5dwl96bhXl)BD!G6OLS(;cucYvHgNn1_Y9H2) ziUdWpFngD^h*8e$%Jlg;c1#u3s+kJ72N~{8x{JDDEl{7w#cggk-$CEXhkfqhMBm`! zVBtRAl6gUsXG_Syx5T;1x$N+=wQVg~;T=1B4PD_he4>Vb52`F*~6gz(tumSeeKOUOL(6?R@XG|I_nvK@DS{;k+d`9Ay zE0?q4+Y-9n**ji^=bui3FO~Z}sIB>c#(en=gb4-QT~7Te+`BSs(}N0@no%GjFWbNU z2NHqr!AlAcAJl*}Lj1C0(knzJysS&?ux8L-dZ`SX$d8ZJp}B%0G(|Lq94hBIE^lN8 z^zZPXHUVfDnMf3thoxyi5VIeJ83>Xu=*%eY1mi9zq0a3G&Z~qdltAqz=Ab(0I!KG? zv2>FqGO~*Z*_!!~7CyP73JX05#m43*GnsIclpm^*uGN5It@ZFrh2^Vkb4x?%wP&X7 z&x)QUx8_sL`>7+db*|;x)*@NrCxdS7Q?k5*e2Yb&XPbq+_NyN;()BK|2D*O%#U z@>f4E;XuHB#M09ILahcof5+`;sF8B@RbL(Tz)VNj<;eApb~vv_c+V2yDvw{AxZ9Q6 zubO-zAr#nq6X#N69aZk;y?yU_glS8Aw(DHmoHkE?=kVAQJcR1m7k$Om@VLmJF{q-ffoe!K;%*YV()62YQ+f`s^`<5yH+!3QyY#a5TU z=mz%zH>h*KZVPDgc&_n}&q3d>RU*F}>29^RH*#I~@UqevxkD{=r{{iF`-}A%C_{&% zuc7-bZvIchk5f+f$^ZosqQ__FF?r!H3;-{5)dyzED_|BoRqbGZ?_>oXas5KgvPKFX>|Z-5K|`n8H&ie~Y%G({7Ph|{XWpU#a#G<3pf**8}~ zQUaLw5;Ussbd0|z;YQL)^lRH|YezBB2N8q^jyRU(j}O0Qo<}u#{z^xOkcAMNKh83wHgARx6VdK?@8J$ZaBy!=~tyM)`g80$b`Fer3JQfwG< z>uh0?#L!VmG&fwsqQ)>LygP+dj`38L5Z%@DWumL#n9&tVb-s%B> zcz^v^6Trw_z}eI>{67KW*kLRFzz(O=N++7qa@qM4mrsdxNi8^nKGzVh|Dk z4CA8IIgo_>PccrIf{C2hHyYzv@ZztWC&_jeZTNDXLEYEQuOwJNpaW6SC;bY>O#~$+ zK2<_XryeqguYb4249*aQ7`h14{xiE;FKQNATup}Su^9^`ider;b=VQq)1V!|yZ#wc zbtq))RDbOkgS_*i`r^(v6$}#M+ z86x#?X=ooQO#_Kw$#5ez(a;7Un*OIcsN?uX4%(LbUoJf7qhFF5H)Ug8Ci@*<%L&8v ztwB;-5VVacYhj25gAKTdmg6#W;!6`boeeL!Sb8AlZr}9>MO-At8}!eduX6tin#HGFp35#?FlA;V>c3dg8^+f zGNVY8ovx}hGln+$wy}6?=1q)VDvaMy1s0YR%d5YSUe}5L6h=pLL4XeBI z7PTV~LG>BJB9|zuBOrGHl&vzXpcZONnx)V+jBq?)hw2$1jzeZx!WxMa=$7pdqms-n zGg{vstIiH~BI=O!>C)5WY$N`>4s6;+%-r6rxG);H3vNB;bOWmAej!pwrpt^8v@^ga zp{k-SR~spcI?&Z(w6Nlr{qvOpD+4COToPw<=hf0eJHciz?&r!0SgKZQK;rkN1qjyU z$~6O35NoGoHYJmkjPRI-4%8Tw{^W+MOo6mcL3!JoKSWn`OVtwt-f!9&@m2j}-`o{c(&EFC4w zf_-yV+mo`#dBrm@BU&qem~sv_sM&H;s%x*MX%=dhru?*Li$$mH2B3}`dBXMIu>mA; zL}g94WHa1)f{^dou@AeQs^(99%@(^DiGb9!Z$0sn&Wt;NAydJ<&`OU>f_3qmJX8FP zfrPIGh--3I|6pNe^a~BD5Y=AS=JqZX4*4x^F(MeXYU3hD{Q{-#y@`@xm+90 zm@v9#S@`25qBd#XLX}voF!T!RP3Fu>C6cK$H9yo~bGny}%aXY>aHO%-hf+mv>yB~i zT!%X$=O50=qOQy=*EB~`+?(_8WpN@pWwP6%iw9i+tbJwnzSnUKiZx#I@km?Cw_|D0 z4+X&hgfb|m`%TAM_nraIo)o+Yj*gh+{!5W8UAB z<2z@4ZUeAeZhH%z5fOrp-!%e{%j>6Vj9UkLu-Fw578@#*omWp=SN=W5Dp{NyH{#$Z zb`ZcFI|z7VBm3=#Fh`=6J)IwdMK zY^GWRf;}dr(ycFe`GUrnaZz9f%aeb3FMnF=p&^Nh4c`mNeR!3F##E+&X#vx@YrZ_0$9I1eG(B)7PHB-y z;(USUnCUn?r?YTywslh<*l#n=I;x(3&f5U3YuYMO@Qx3b#s>8UFXKPvskNyoWjaWSo7tx^21ISH|Yn`A8%7L_q(;#F< zdTG``(#P1Vk(3Ec^O2DlbFag$w48!wwTX)|4yWcZVBg6vQ`I1T3ERWnnHEZbtDA2s zQ5ZY)-v}h9-@t8(3e^x$1l6uh^9<0&p6* z1`K)#BiwW@raj}LL}IEs#`PJ%rYghw$s^y%%SM}!;$pAr;lvkoqH=_Af1GFg@CH4k z!WP$2F9NQHn@L+NQ5f<)bvUTQ6i!LiEn3!6tec3(GSIY6J8TML66b!pgffdKni^IY zbCz<=Z@Z~x>}>q^fyF=4fX+9%Xu%kyrGFPN>_zCR+XhM)Es}4ZC|3^nO~5hV=XGlB zE=8fSU$9){NxTnv(}a8yvRRyrVJ>eDui2PE+MO+r|LP7bTni^9}?9bas?5SrDW! zKjX<8H*OGGSR70PE#4H6D1~!?a71E*R3aIy{3cB@*-@HcDPPS#N4$p<3Xs5@QeKfT zC+`Jq$A>ra%qEu~YLWRmMTM1lCBZ~>Pv%h7!oO2=ap*P4h8gk_S;QKj4<)k-53SAo zIj@_<3}1*`X_Sb*i}a!sFF|Zh+xE@sbWRT!$pPQi+Cm)a{8S7GZT%8qhlOHY?sjNp zM7bZVe`riu7i3Yt>N?h``KoZPbjCbB=UYBgx_P9UkLP+B=Q6e26XVxmOdQtpZ}z`E zPZ-?{uB0}vLYqv7`~(WlW!w3hZBeZn$^m%*>=K$VSibUPD4lw5I{v^*)J8dGZPoDH zKwDT8l|P1>zR!UEOZXTA;yeT1KQ?k1gbAp(X!+Ei)qUjZOCvt}f<)p;VYfx>I{7t_ z!&{|>0W&7~c444F%tLw>W0{;0`O$8NjcQ7Vb1BN&EwfzP(D2yH7Vj&`H_P;kmw&Ex z{@%Z@Z7(rGzjVgGWDTMiX_Ijy5BZ`CcF&SxT*(6Y01kleV%nMAJOZeZWZLZzW%#FY8&~Y~FjXEBpE+^jhb(BI1 zp1ThoZKQIgQ*SIHC1r}u^FK;GeiFBvmVlDgHmeA@VM%Ba99lsY%B6_tY!!YcbX z9iddaSx=ri9=+3P#yMf=8&1@FqCidkf|AYxJ|qBQaciw}2|wTQwA8C(mAifCnp~{X zzH~c`43bfU%Xr}>I5wJos5yy?8#DQIoSPh-k-OsA)5|69%`U^dNM-*}tAKu2@nQX( z*L-kNLuJb8_?c#L%vpqZjr^J|nsPOYk!zLZ+6%mtJicu};7u$2N$FZCCM@=r%HVGm z?k}jrdiCgpOV_6NMd(bw->`zql>XW<)hY(=ggw&JUt?qEK_dwIg99rZX&OTC#0?LKF4H^#2N-=2MY6gntZ1%~^_&;Qtzp3RMLD zg>4n#e}Pn0=3lT={ZA@Oy{hQHy?fP=|1YB@M2(Z=f0{=@K+ylmk@>IE66AoE5{QPG za-~KKhK`ehr!E9mOpvmPkAtKv2MUG({$B|-DM9K4VDE$}#vJ%5o$BNy|I709Uo-xL zEkS!k{}2iOaX^v$hxmUb8x9Bv{r?~;aQ;JNOkvbO_z!Vf;WW$QA8oh)zlmc1S$al; Z_}`7*Xb^yXD*b1KMw1c>Ozq!E{}1-Ps-pk^ delta 12982 zcmZvDV{k4^w`FYO#I|kQwr$&aV%yG%ZJgM4PHfwDGT-}7-MVvUrmL$~@2*u{wg2t4 zd+p&m;H7F{I7JyyFfJ)~0?A9z1c3OBR`j2*ZoGI$x6xCzq8L*~M%c8W@zMW%IBmj13@4JO;g7X|O z&&HTPyoLI-9znNYQjO1fs96k+9hHoq%)SeDQggEwP_+o?$2?hNlvPw@nUX0?SR|Tg zY*#E3{C^kM#FLeDk^TM<6K`;y-|PXTFfktw1bW`?Hqr&i%6E6Sojdt?cqXN!QXY@o zTZrLGEV*)@buhYK1p&V=l$oj%m?LQ|@B!5)G9hlM}^Z?9VVQuU2cc zIo>GM;r?|_Zf7yZxl~tSJeQZ6@t_{OObxG13!@z|OuS#L8wlu?E-K+}R? zk1cj{ zWbk46z<_=)PM%_5BO_4drebk*wIF)b$7R~Zrm&?yJWE&xqj-rTz8}B-z#5?;E?AeG z^nW=yFfgi$$;_M-sz-YX>aSvs5`d9bIh&XP)HqOk!XqbhMWbZ*94v<&UyxcZzNRWy zPt!yz38G#Fz+Gnz*V4V+Wz27zX1nLc_V`$xO95K>W%4tO?$+cBI7O5Q;jKoD@mFTN_+` zrPH>!yca8G-9u#OcBe`HgN04f&4iU_MbV;wUeCn!tvZ6Avz@X#%=~R7kcZ8e-=wam zfq=D`_PyKZdu^6n8+v-p^oz|r?}vA?0k61XQUw~@ zDBpNaPI#O8t+{TJPOyrl}%NyMq!iHh;_k+Q#b zL#?5${6A^W8F;fC8S+N)!hN{y9tp%Lcz~{6mB4mWPEf}EI4qdw3SdL*!I0lXirWSr z%;`x_mYee?kHyK9KwY8DVRD2Qw|BSFw~@aLJX@*Pwu{Yd&vOA?Pn&l8m*d07bW>7} zGZDdMpf}1qu4d{#yejQYG2!4ikb{Aq1wK4y?RxldqZDOf2b`D_yA&>xVq!sgm;h|C z{aK@Z!w7#rn26_H!y?bN!;lg!3yXMwA17yiK5<)!stU@GyacW{jEE$1f#}T4mQz!QxV9 zyY3gI5C+-Yi^_$)bQ4*o5deW@O7_XylV#$u%4OC~i;X`<=e6nRr@T8r*MBJPEf+@Z zU2Iq2k1Cql2leen>tSSNkVtjYG^u~R>(55+Nx<@pVdV{7r_T!B0?#W-+n?sVDv%gz z5gcLCbAR-A+!~muN#`>Cs#aGB62cYqYx&^=#O@Z2R9LO~7BM&gWo4;ge~AXA)r2{(1OMSrYPb$ zWs;9<*4HNFu@fs@3etua3ikMZ9{LUjyE;WR&H%qUB^-d9RhS77BaM?OEDk1*U{g4S zFUq0looplvspLyz;)_R|*HS#cn)3p=U@97pL)x(=PwqT%A%{m$!`hCE>AP5BcBN;@ z&Cq6o-XUOl*e(AF( zd2cID^bW%&X(PkMRU4X zAkk%7hMSnijv4tT$t!K6)G31JRZq$gf9J<3fJu;ZqB+H&02Em@3pBLUZ~!#;R$v5_DDg$yb|9dC z-?53{!SDhOe3*v-#ep*r@#9P{WUNyE34EtWjYix9a&YmHb6>e2#)qM7Yqmp7ZnKTKO4xz(#YA)-p$U;*~)~$$i`r(h1Icfd^FumE`fH}6x3RxY1uw*F* zaFB({JzRP@+38>pFVjAb#AizLZ)Xb48%C&mlodAb-VOt2VUfHqaThC9$}KDHdf`$l z%>`#XqOd=#!d(-AT3=-#EXop4(Q?eu7UGJ0c(RR@R(K{vxBUY{5#OuK1_e1Rk+ACV zt}mXK$eKm!ViFn%BNGCG;$JQR=vZw+HU^PGrJR^)Yl!IADZVhBL84E)_~;gsYaFCA z!ufhfV0OiR-AVf7dDTUrh#}hWHr2%LJJ%_mdsGlycSz(8@tZ#DoQJDl)g^28-KB*@ z^1{wmCb_>XG2_lSM&k@!m_9zZK27d@_0nHQ&M zp8SuCx+(G+P%PQ2l;Iu$5k)b?F;F{ZWNvK+$)0_%RwdC0x|rtC0!qMC7`>U0Zs~7H zulVVV_<8lrDT?(rd@$F5VJo6(4&&duhl$*wf@MgJU=`HLQ0xno>S`&*!eBKQ^@O{9 zuGpZ(eUEhS2wwr_+BAQGJk_cOteZdNs>)EnllQWhK4znjaJK;3)pJ+-zx*_-`Md9S znv$6X-`C=H+AQ3?!u#$=0p{c6b8Q7rIIHDucx+zZ^K!F-7cbC&-xl@E0W$0K%Bk+` zzeF$yrdkM|j=n=>OOD!BSI9=4qkh-HK_Ou%K(@7Ijk;Cb|40&jL}6-1hCeMuc+TI- zLt;3FlcfywAaJc=Ukg9n@}6D^G+@fAbG*>0#OS@)csZWhrt|Aez7@n{Lq1(dP zd6@}Jx`%Wy*SPuj2Bl}Xz!WE7b!#sGkt7g;t35LSaF{Zs&)k^GFx!R(~_^_ z;U&CF2DU9p2VSC-xmcHJZvB}sW6Q;&0wxi~eLl!YHpM@U^r?e8`SmZaG#;=JO|Sv` zp&6>4pQ0yswZUbiCX8V`+bn2phzDm!tkhIP0Jw5fKBY)W8fYhK`B8W$@|l@A2McN4 z-Th6f36s<3dX(^?Jt6&l<1c?-BPj>zRNyDLr>DE1@H7~bF$Sn_BL(1@pdh0HTn!S= z097EZ@JP}4A(-1GIsSaRD=e5WMIQhn%*fDmfuZTC@5WnPZcpyU#+%^p?ms&WAtWM3 z`cTiU$(0<42nS(8+Fc9_S&Fe$P5Zr=n_aSW*mNeBL;X~vmuIenX$n{!mKT$d*J#-W z{Isirgw5iF;8B8klc-|>ffFBw^i`8|a|gys=FXGGrj7RrZDWMjUE%1|fkyzaLUM1* zf?r{vxlXWZ33{#Uw2}Qr1qdvHISu4qZNse%cp9MKCi+PscU zAM|1__Fk*&JycuWKXTY%(<+R3-4LP=Qvpl>%80a|Cv-y4JaJs(TUKg8+R{)@Vpwp<70WB3Y^0Sfmx3vypaiC1Z`)QpR!tw(M5%*_9%4S&rZ4vO06#vV4}Hr8VW?3$dCgfCPp}er&ILuyueU%}f)P-B2^0 z-GD2s_246NGuA>)Io^VuC2Z+NgNauNWqYRvjg}jE&u|dLzGjC-XZ?3{6L|!jgT-~P z%t5BqTjPk-0mTyUu<()miYZpfFTUx0Z`I(qrdO$_Uw6Wv#aS(uN^cNKM+lk^HmbVT zQ@JW5Y3cUUQVa%J^tOP-m@472lXZX9{VZybt@jjQ6P^SG3z8P zH-puuO4mE;lcpQ-E<04wE}%FMHtNm>jN)(+-G*ozjdtfR=q>;~Zry_zC8s|aQA6&b z4E9Q+Eox<|iNpr)G70l2xtEI9s#qD0lgFP~knZQr=G@UMv_{!&Hdb`PC9fIlC&XV3 zF}wOy-6cj{J`WF+PxEajgVm#~_b$mi4_sRWG3|GNhJ25F)X(7HuH9?+Hxnig164-? z&*O~j>ZxnXSs*|dXTx+>5U)JUE;=7^2M18h>TcYVM7Z$S)?jC65%TtQ{>^atq+e}* z{Foag32;l7KvIR)|1K$Ige#uSNnphOUVx?k?PXHK!2AbuzpSjH`>n}-@78Zpc> z0X!Z7Mp!k03FtTQI1<*7r2OGv!!8sDhqQ=f4H?0qvdGnkQ}#EjGU$9q$^#8srU zz@bQyHgQ13907@OQk$XZW-A*gm$)Ypb~2}~4AOW2+;Oz3XWUCZ`YV4V4qD+!cqqyM zS`pX=%y2>-6jU3=cqQ?YKb9v9TtzmV4kgJ;Fo?aH^XpiKpNyh$!_dg9Vm_P~6f3ET zJupZ)MKl_C9|y_;&WNQZ_yN;tJkLksUgG=-2?*dmR+HtoW77bz1nvhGyyRJJDznb$ zxxtlYGnT@&Hzc^>hzb{5+$-Q+GktGu??8&9h{&O0u% zcwYf38=a{)sxv$u@$O%{mK{TBloe`!^D=oK7iwCbGZJAu+h!NB|WD9cyE%diHzVtX5S5Zaii*axhJP)xbNr(Xw7kv0rCiTJs>^_g(iawI3eXf){o7O34HDGa78!bb-T6G)M?lH8J)7BAaw(kJw zh-!Hpi?|C?f^oqsf^jK3DO$@rq zJTBx5D)8n2O-7pv`9@H<2({V;s$6PN8zdbDYsGAh7OB$LGCUeJAsBVGm|mqZ#tDr~ zqh(y>8c$wy8fahYk>C|2<|9+mH-ibB&+zbBQ%o$S5FPcJs+h?ZS zcdjF(7aepZb)dqUC0Cdpedxs5Cu!dzi&06Km+pv8{eB-CPKj1z?b&x@SZ+nML$VO=IOjo@ezB3f^6loJN-Tc+*xR^(31EPuFyh*3W>9%GED*+@}dW!O-eRP zpVg%Nus#AN*rd=iutb+a?xMrb71Nw3v|N5S9XkdhM=x@h1FQH;`Q3)Mm%METHu~nl zbiq5BkNmbh$N^dSh}q_xrd^B7q$if)Vz}UTVHLUdr%&xVPH;-49j5THXJ8qEnq0 zKd@z4h&g$Oj2UNL|B@fvg*Rmcx%?P{GXmKEMsed zuID1=g*}8Tu>fr-$kW9kj1i&)k|#5;l|8=UFV-CZrH~#3O)l$oM3D|W2NNhMVrl2A znSnYX8-pzZ%}tHX&Ew?!#)Tk-HW?hOXyc=ysRgP#0V6MU}0)_b2PnQ5aJ-*NA_Vd6Kff9U4J~|sE zL@S9Ze8W$!Cq?}()btZsibRz#e(b_dtK zHX%N!Rm=j71w^lxsKag_;1h<5@YP;F@llOj0VN`j@4g4zC&70r-N{d)E6Wo^#_z0& z`8an0i6wx1khVBavpCP=dP_2&>EQcUqpQd$yjMrX?76x2w&TKO?4j+S%w5HOGJPOk zy9IpJv95fPz=Opo9MUF*Qr!5j<+Kd6n_In&ywj_fjUaR@m_-v`6qHYH)?|pT&b7XAy=T(4H_}`P{!VeHvv4%I^1%1M z9ut7sJJY!&jS1-t$M9v(@F=J2C!ChR$?ucVM;#qAnHWjkD2)BH znZN+2kf$f(X75xl#U{i4-U&u6{=~ z+8j_ZkxfqarHU*YFfTZZ#n35oPgSl~}uLogZ(Fy@pE$55n(oA???W zZWmcNW@Pg-X|nx^l$uU36h7~7XAx)92|7I)^^~9An=tZo%K=ZJQr8`4SymH2CeZ+Y zbkv@F{=4fv56AXy8>*9hPBxP^qa5~`bOGnP-O<){(XG}LJD-6rKf6)MFO^I9g=VW- zFDyVY4&o=vptB{LJS_d!=`JvRemZ>N-~4tEJ~*n5f=r>y`82BvO^RM`IKRODo`hH& z2Ef1rm%?pkGbaunX0*xcjl02u~t* zbq4%NNRpFz$fLB1`PScW!X+F_XRf7&3Zd#Xc4zBa<;n8EbkLSJlSq~|M}eU?xrk?s z$S3ZribmC1P5ri6448v32bTo+$!bGEc+4sP#&`eeaplA`wU*sEOQRO-l?H$z4b3su zNRM#b-Yb}`bYZ-tYr<;#sQ5q=^PWj?LHQ{x)OWu>$|SKw4O(&?v1yN1Um_CtT3}*d z+Lz!nFfc0#6A^zQG#pJd!o9G(X~xi=#tRSwmQ-J+!6Dzs6fpCfdyuKx>zIVDitH(& z9_nlV93yl_qR;TIDsXF%F(aTbr;h;`ghawxb*B`=>DfThnE@K;V^K1d>t~2{;;V8s zi(Lm}%1z2;E2y2#%08FFL3+}O-KvRB@2bc9@IH}>U1=$eQN2{g4z}cJ{O^0{-&RvA z9pi@{)w6FU@~d)8wvyaDa0{QmYkAK6&WJpzu^90&?y+p8)99#AsNaC`kqf#NZTj#e zddq|T`A)c*ySlV{v22^3O26g2#w*X+q-H$tUM4FZ_Vvg$4fpQ1RkY+ryDY9Be3q3r zjggJmif4-CR*IcJ6m|*+w8!f-Cm84MWEbw!cG(;^3M(L6H~* z*IrV_b|RIDm4)8HT#zqG8*b@{x#B_t`&94svSf$yFxo;f75iVZ&{D`ciI}M0Mfkbi zfB8`Le(EvoN00(YjNqaMl&8cTHzXVO6Y2%%28yDj(1f(aAA%Z)tZXwl*y+-i#&b@m!KHY7^Mkdcr^W=}p`b-b?$qvS*ou#c^-CX-y zcvQOW2-{dRwy>DpNN03jQ@t|>n=fTr zFFU8F@;OsEzl6_29Q?O9n6?{v=&$g0c(k~hp2SC8M{QR+C3d*7*4qTk0KWbHj8*~} zjRe~D1Pbf%6!JOxoO^P4x22aBD90=`U`|3%H|IS@n%jc?=j7i0ITP?V%=btgVHi=D z2vGxCkbePm)2Y|tBcP5h{I>r!1C(PNh9?uF30b?LER-(ox3^2y(WVefwb!cyWW!SB= zQB8h-a8B?P{A=|}!sVz2?0hji!+iNS8T!`xC#cV^<}1PScs+Fo7|PLK6w*_7Pd5D# zc3%J%y#46V-|{c=4hNlNK@Xc{gxr|5tE_qNds%4_zDT`w_H^?o|9b%6%6(p zk2KZzpFvpgv_<%>qXY-(YSTMP+Dp6I?$vT^au0DLy)S$+A5|wlf&=-2*;OBS2$MgF zbR=ZhZoG|04J}l~M&B;@OuQ*Di5_Jd22g-{h*on95Kr!UcDKb@o{k<)H(#;6%Aws7 z6R|cJiVm%p5ElXv%Vt__wV{0H%dhU+7QZF*o?eR9!Y3T3FiT6x5BdtnIAC09B^cbl z{uXv_1Ot|bBh^~=x7jqR0ZL}0{#K2_2Hsl4JlDDc47XXsX$!dAPR0O5!7MV2C~Lqo z-);`y#i5j3Sznc)Y)GvZq%u6u3GScv4dcRtvy_9?8#|p14Zn>zxVlqK{=()s@DqRb zl|lk$t59JY>5Kdd-2WUH4kag>K#4teF<^Z8os+P|M35j6jnEhn-b9?Ee4BMvF{0jw zb2mb_6yJdoArx0|1YpT(Dq0D&$>Iq!6Z1V_t-g^<+bVpF|1ife8#A zT_A@(g7hF>@M8qPZzhBSV9wA`kx;!LVEzD^Z7q_ri5sO%J4!E$BZq)bx>T={L`!`L z)E}r2(bYb0GC@Cr@<6Q6I1U)c7SwURhDfQ z`VGsK?nXC$*3t3~%YyE7H%!_^>E5zeUBD6dxSiW`E0d2-k&jM|Pv>*jBqOaLj;`)e zHv-EGu3uf&an=nwW*gs23~8`bHrA;gi`rQSXlwd>c-gac~tbC1n&M%pv!`{kIDTW-WnrErC+W>$gFn>ENS7IqjE&@J$w_&+sG?2wTGMi5m!*NCEqT}o%7ns0*t{ulhGdrs?XajpOd1muAU1VsD4yXQyhsDK(x8(AEUUwPpK z2QSGQj6+DqjC;t~P|#NFa)d}wM0R7X$W;@}dHqaJRR=`C*2Svlh!Vn4XYQd*6V$cX z74?y-?^TJd0`Dw;zm2SwOH~SMnzd@~0&(f(5_Z1cS2_*`?HE)oqKIKEd(5(2l2{GNR2Q)!swavm4nvjzY-?OwK(cVVqh2fxl@kUavl#K*Q@#3qGPZT-BgF07jCMp!j_ptEWKUqjb{ob zT;~*nGS!k`Ld{OK&f@c&bFFWUzkcQRROz#$S4!?RJ5fVAsQvTD>Zp z>S!)eeCGMB-*gRX1|_ z#2es8&&4Qc5vr3_8AQtL1&K%+68Q>6nmF;sc|m0|$piXOGku8nq=*FOBUmWdY6B?L zuA#GMVjl#s0d*LkcfS-Xl+S^ludU#86rDXbb@T{-$M0X-$B`Vh3H3RlG%6JMS1V{YYF0Yq1p2JM)!%lEG@HNy* zJDpIqMN+hA?1I0*x$@=i!WKx>)CCJL?2Af4GXU3%p=s2LFbB65;iI0TIptF>am2lv5xlYY*-f@iMVdl6Mv}Y3++%A zc#6k*E=eA~(}i`S`o+zZ;F37G^}yBdgHNr18FY6KW$uXimHAh|#dH%-!m1LZOT7m5 ztpRTt#dld4Lkj*FTFacz=UM1ZJ-XhJcuvF)68>o{U7JZComQ#p=+fIuU*IoHk-Z;W zbE)7Q%2=~jX+vvqZH+n4PM_E#`qa0)`i|t4ZRcbb%A#}(e(@)2p*>W$LDUuv-ZxUi zYHRcRA8#?r^bU^bUUH^Z&H{Q`;w;wsG=SkVpdl(Dn*q)mO~uhATTqSUgZT**M_K`$ z?Z3Pht6GFYFFK?^eg;&29ks3q)B*2-J-b~g3r{7wOt7nFL7sh+H=o=|kJ$?3kqmj% zG2AnmO_H^c%f#L$(78ZZ+Zs_q(~~9%?4SB~5ZGUOfmh?)awR<*O+_g>nQe zqr?IGY)iTjVQ@8C9hU_EwSnOC1WAZY#%TEfF=(Adg@(k0QJDo=)aXdjfM2OAUHw?% zs^V*c87A{u5!@jj?$Ozz61FP+5rA}5si6?+tZZXMLj5MuXJ@#pMApa#yA0hMNrMRU z#CTCM)nz2Q%0w*u@*qkxF4##zA26>ea*aL^aD>7DPASu9LkT0ljmB#kw;evGk0F@M zhE}q7@b0r@lZvJ@5-c5-I)^k0h->{eKckXBk7hgW4o`^{>^UDVKZdyib3mLe7ESWz zJ){gl@dwLD8sz9QL=X12^Q#f9OL;!WGlgW}U%Hu6G%ACUMbDcF&>igsjVpsz3-0aL zW5MHHLI>=_3ZdObb^1RHEjNY;a(eqyDk^f7Oh*7)XXw_bqwSLIr+0+y<%T0m>0a;& z?)4-KaG&X18Y^=%UX_*M4M68%H#`H#K z@)u*~xfZtGIpExS!xisDZ?xCm*Yk{^Hs<27)F#3Mp3iz$?}Wgs*G;cWuI|T)nCsC` z?MvNl%iHfo+zk;C11Fl~1)YO$3Ve#yt|_snu7 z4JbWlbl}e+HDcy!0u=vbZ1N-c?_3YEdb_H7RBWie3yvJ_Q(@r7j%UqD6O#MD`h`BB znDt&x3Nj@HVpMZF4bB|Zn{mnd0{$2f>W2uDF1kdLbLj~!&!r!v)JHW;;df?2tNHfb zN>g#5E~F{X_~wFcsS#)gPO67h0i_pB2-74SI$_dT`U|wb0nT-5mwWnj+Ml?1mM;CV zou98!r)E%^!0yp%;?kd%yQs50{EFifmI*%;@zv8_Izy5P2Wc%~D;wD5$Fu6=_t%|2 zBq^<4(LdF;HL`>a(8VOywkC#wa*nw;immnLjV@k*TAzTW(#855gQ@|V$!i0KT8 zw}KE{ZE26d0kQ?4vIXF>O3Ps@P0Ohj84J@TE`*^#GX280sn_53)8Me711|g8hfWAw zno*}UWUjt<={qNjeag;tKfol-K;nv9uc#||I%0G0kX*9$zvEFsSLuS|Z%z33^?0T* z_oL1#nLDv8i@K~MHLFz(VMoA4g!s=Xo5CV4glI_Y0EN?tv@dFnwYnPiOEx7{hqPgh z{hXGegFiug+{dlI3Vty}^G*_{&sK5Wm=a9RJxuVIy%-wm2j_j!f+`*$%`qTGk>c$k z*+(`a|L}&6B?mBrXb2KUU-#$`RR(QKkz=B^7?r;&UIinoFG?P08s;IhsU$K|z;AtM z9?ooe08VeK%W;Jb%1GNtl9=M?0(j5_aTGMYBPB?}o>{7;_dz8SM}=%DtPzSJ-omcL zV?+F!=M6(1B=dV$@mYP)*iJ(gRp+W-IwZw|ad#W45-;qN!2(zH#|1=)$*2hK#l>Px zV_8+hGqq+wfuzx&8sP%UczKn@3N1rJ5?1-80OOXVlNUEgw}%+nTQcft2wx!xg$`sU>$E>dJ;yz-q!iS{ z@li}StkcBT!VZkRn!bP4#?IpoO%n+?0buu9e>cAQCpetRrf zb+bS56V?=Hh-(MM#QARq$DQY9Kjr%RotXO(80=60RTv#jtZ#Bm3%mJgcNdX9cczE= zD;udxAj^AidMiJ(zP|P0*KxEbsa8gD3|gs{bO1qB{6P$)Y29G#X1fcgvk~bq0vzpz zJuwPyB|hvd#riL#z-+>AA=JQCxA4~rwCi(ZVFsJaICQbOx=?y?W62QsSfZWdsMCMR z?uXTuc`wwsaLm%hL6uH~+@+sjSAT=xi@co8&u1YE?~Qyp|8hD=-=cnGg7uSn@D!~X zMU9$c5n^5T;k}vl4Az2)2I>ZT0hG*{&3F2I>(J;BaX@hlA>BxJMNSZy4@Dqw=;@jKODN$L&#lHM1>X`gh6C)NvYQrT!y|43Xag|5J<1$em@H z|3`})U^bbF6vF+-*#}95?;lW55&Q=dRiyrbO_hJ=F9=kH|A|agga5BF zj0aVAg8!fQ@jv#*e^+y$X|kl6AP6%?G6N=ha=schNIy<;4=yHxq6{zy8t8uwfh6Cl z;R0hPBd7}!{=bH!|9T7vs5uZA|6lu2q&m^R#Q&+S{!gMP+rPvkb%KA1|DE_hBJ96c hbKsWHzeE8If`3vp)W8LamH+o9Q-d5LN$DTz{{k29>5l*a diff --git a/examples/graph.py b/examples/graph.py index e21b971..cf86be8 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -11,8 +11,8 @@ def f(): v = ndarray(1, 10, np.float64, init_value=20) b = ndarray(1, 10, np.float64, init_value=10) c = ndarray(1, 10, np.float64, init_value=30) - g = v + c - k = g + b + g = v + b + k = g + c # k = g + 2 * c - 3 * v l = k.get() print(l) diff --git a/src/ast.hpp b/src/ast.hpp index 8cd59b3..7857161 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -8,6 +8,27 @@ #include #include +using ctop = ct::util::Operation; +using ct_name_t = uint64_t; +using ct_array_t = std::variant; +std::unordered_map symbol_table; + +inline static void insert(ct_name_t name, ct_array_t arr) noexcept { + CkPrintf("Created array %" PRIu64 " on server\n", name); + symbol_table[name] = std::move(arr); +} + +inline static void remove(ct_name_t name) noexcept { + symbol_table.erase(name); +} + +static ct_array_t &lookup(ct_name_t name) { + auto find = symbol_table.find(name); + if (find == std::end(symbol_table)) + CmiAbort("Symbol %i not found", name); + return find->second; +} + template inline T extract(char *&msg) noexcept { T arg = *(reinterpret_cast(msg)); @@ -20,8 +41,7 @@ inline T peek(char* &msg) noexcept { return *(reinterpret_cast(msg)); } -ct::util::Operation inline to_ctop(uint64_t opcode) noexcept { - using ctop = ct::util::Operation; +ctop inline to_ctop(uint64_t opcode) noexcept { switch (opcode) { case 0: return ctop::noop; case 1: return ctop::add; @@ -46,26 +66,25 @@ template std::vector faster_tortoise(char *cmd) { uint8_t dims = extract(cmd); + std::vector shape; shape.reserve(2); + for(uint8_t i = 0; i < dims; i++) + shape.push_back(extract(cmd)); if (dims == 0) { double value = extract(cmd); - tensorAstNodeType temp_node{0, ct::util::Operation::broadcast, value, shape}; + tensorAstNodeType temp_node(0, ctop::broadcast, value, shape); return {temp_node}; } - std::vector shape; shape.reserve(2); - for(uint8_t i = 0; i < dims; i++) - shape.push_back(extract(cmd)); + ctop opcode = to_ctop(extract(cmd)); + bool store = extract(cmd); + uint64_t tensorID = extract(cmd); - ct::util::Operation opcode = to_ctop(extract(cmd)); - if (opcode == ct::util::Operation::noop) { - const auto& tmp = std::get<1>(Server::lookup(extract(cmd))); + if (opcode == ctop::noop) { + const auto& tmp = std::get(lookup(tensorID)); return tmp(); } - bool store = extract(cmd); - uint32_t tensorID = extract(cmd); - // Args for custom unops/binops uint32_t numArgs = extract(cmd); std::vector args; @@ -79,17 +98,17 @@ std::vector faster_tortoise(char *cmd) if(numOperands <= 2) { uint32_t operand_size = extract(cmd); - std::vector left = faster_tortoise(cmd); + std::vector left = faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = faster_tortoise(cmd); + std::vector right = faster_tortoise(cmd); cmd += operand_size; rootNode.left_ = 1; size_t right_size; - if (op == ct::util::Operation::unary_expr || - op == ct::util::Operation::logical_not || - op == ct::util::Operation::custom_expr) { + if (opcode == ctop::unary_expr || + opcode == ctop::logical_not || + opcode == ctop::custom_expr) { rootNode.right_ = -1; right_size = 0; } else { @@ -135,13 +154,13 @@ std::vector faster_tortoise(char *cmd) } } else { uint32_t operand_size = extract(cmd); - std::vector left = faster_tortoise(cmd); + std::vector left = faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = faster_tortoise(cmd); + std::vector right = faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector ter = faster_tortoise(cmd); + std::vector ter = faster_tortoise(cmd); cmd += operand_size; rootNode.left_ = 1; @@ -191,7 +210,9 @@ std::vector faster_tortoise(char *cmd) if (store) { tensorType tensor(ast); - Server::insert(tensorID, std::move(tensor)); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + return tensorNode; } return ast; } diff --git a/src/server.cpp b/src/server.cpp index 176374f..ba97ce1 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -139,12 +139,12 @@ void Main::execute_operation(int epoch, int size, char *cmd) for (int i = 0; i < num_deletions; i++) { ct_name_t name = extract(cmd); - Server::remove(name); + remove(name); } CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); if(peek(cmd) == 1) faster_tortoise(cmd); - else if(peek(cmd) == 2) faster_tortoise(cmd); + else if(peek(cmd) == 2) faster_tortoise(cmd); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) @@ -222,7 +222,7 @@ void Main::execute_creation(int epoch, int size, char *cmd) { res = ct::vector(size); } - Server::insert(res_name, std::move(res)); + insert(res_name, std::move(res)); break; } case 2: @@ -245,7 +245,7 @@ void Main::execute_creation(int epoch, int size, char *cmd) { res = ct::matrix(size1, size2); } - Server::insert(res_name, std::move(res)); + insert(res_name, std::move(res)); break; } default: @@ -259,7 +259,7 @@ void Main::execute_creation(int epoch, int size, char *cmd) void Main::execute_fetch(int epoch, int size, char *cmd) { ct_name_t name = extract(cmd); - ct_array_t &arr = Server::lookup(name); + ct_array_t &arr = lookup(name); char *reply = nullptr; int reply_size = 0; std::visit( @@ -301,7 +301,7 @@ void Main::execute_delete(int epoch, int size, char *cmd) for (int i = 0; i < num_deletions; i++) { ct_name_t name = extract(cmd); - Server::remove(name); + remove(name); } } diff --git a/src/server.hpp b/src/server.hpp index d11b33d..1c20a72 100644 --- a/src/server.hpp +++ b/src/server.hpp @@ -2,10 +2,7 @@ #include "server.decl.h" -using ct_name_t = uint64_t; -using ct_array_t = std::variant; using buffer_t = std::tuple; -std::unordered_map symbol_table; std::stack client_ids; CProxy_Main main_proxy; @@ -188,20 +185,6 @@ class Server client_ids.push((uint8_t)i); } - inline static void insert(ct_name_t name, ct_array_t arr) - { - CkPrintf("Created array %" PRIu64 " on server\n", name); - symbol_table[name] = std::move(arr); - } - - inline static void remove(ct_name_t name) - { - symbol_table.erase(name); -#ifndef NDEBUG - CkPrintf("Deleted array %" PRIu64 " on server\n", name); -#endif - } - inline static uint8_t get_client_id() { if (client_ids.empty()) @@ -210,22 +193,6 @@ class Server client_ids.pop(); return client_id; } - - static ct_array_t &lookup(ct_name_t name) - { - auto find = symbol_table.find(name); - if (find == std::end(symbol_table)) - { -#ifndef NDEBUG - CkPrintf("Active symbols: "); - for (auto it : symbol_table) - CkPrintf("%" PRIu64 ", ", it.first); - CkPrintf("\n"); -#endif - CmiAbort("Symbol %i not found", name); - } - return find->second; - } }; #include "server.def.h" From 7b499658f9ae2edde0af7fd5beeda8c75fe976a6 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 01:31:04 +0530 Subject: [PATCH 03/34] make it work for broadcast --- .vscode/settings.json | 7 +++++++ build/lib/charmnumeric/ast.py | 8 ++++++-- charmnumeric/ast.py | 8 ++++++-- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22469 -> 22642 bytes examples/graph.py | 5 +++-- src/ast.hpp | 21 +++++++++++++++++++-- src/server.cpp | 4 ++-- 7 files changed, 43 insertions(+), 10 deletions(-) create mode 100644 .vscode/settings.json diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..509f07f --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,7 @@ +{ + "files.associations": { + "*.sage": "python", + "vector": "cpp", + "iosfwd": "cpp" + } +} \ No newline at end of file diff --git a/build/lib/charmnumeric/ast.py b/build/lib/charmnumeric/ast.py index 35c13a6..257e1be 100644 --- a/build/lib/charmnumeric/ast.py +++ b/build/lib/charmnumeric/ast.py @@ -51,6 +51,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(_shape, 'L') if self.opcode == 0: + print(self.operands[0].name) cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd @@ -60,6 +61,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(arg, 'd') cmd += to_bytes(len(self.operands), 'B') + print(len(self.operands)) for op in self.operands: if isinstance(op, ndarray): if op.name in validated_arrays: @@ -72,13 +74,15 @@ def get_command(self, validated_arrays, ndim, shape, save=True): opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op) if not op.valid and save_op: validated_arrays[op.name] = op - elif isinstance(op, float): + elif isinstance(op, float) or isinstance(op, int): + print("SCALAR OP> ", op) opcmd = to_bytes(0, 'B') for _shape in shape: opcmd += to_bytes(_shape, 'L') - opcmd += to_bytes(op, 'd') + opcmd += to_bytes(float(op), 'd') cmd += to_bytes(len(opcmd), 'I') cmd += opcmd + print(cmd) return cmd def plot_graph(self, validated_arrays={}, G=None, node_map={}, diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 35c13a6..257e1be 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -51,6 +51,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(_shape, 'L') if self.opcode == 0: + print(self.operands[0].name) cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd @@ -60,6 +61,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(arg, 'd') cmd += to_bytes(len(self.operands), 'B') + print(len(self.operands)) for op in self.operands: if isinstance(op, ndarray): if op.name in validated_arrays: @@ -72,13 +74,15 @@ def get_command(self, validated_arrays, ndim, shape, save=True): opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op) if not op.valid and save_op: validated_arrays[op.name] = op - elif isinstance(op, float): + elif isinstance(op, float) or isinstance(op, int): + print("SCALAR OP> ", op) opcmd = to_bytes(0, 'B') for _shape in shape: opcmd += to_bytes(_shape, 'L') - opcmd += to_bytes(op, 'd') + opcmd += to_bytes(float(op), 'd') cmd += to_bytes(len(opcmd), 'I') cmd += opcmd + print(cmd) return cmd def plot_graph(self, validated_arrays={}, G=None, node_map={}, diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index e7f2b565c3a10a783f732d7995c8a73befd5b377..c45c801e529877b18d7e804218ca8bd7492ed0ad 100644 GIT binary patch delta 5303 zcmZXYbx;)0*Y}t1?(QxDrBg}?sfDEkl!jdpBm{(|mqtRmk?w{CRzg5(DQRix4(WLP z@xIUd%rnoKIrEu&=bU@z{&VJjzrEEcr!^=9IvQx`k>oaCz?D zUo!CMRVS^3At3*nKW*!aCx=(`Jn2+suYOE>*Q&HAR~(nHka&;wTF^nY7L6QZZQ@iqX-b~APD89$ zr_2%WI)XiM{6p!PA&mue1%C=(3p}bla&BOEgP05Y*=Q+{zr{f#sNwGO*Bhx%dfc1C z9)E$8G z$&f8cqbRDqjfC(>8XuFzaUx+}(3l$Lc<>@N-&|hO=CSG0T2SXCIwkg1><{7r2B}AY z{F#S;*=eoPCQ1lYBV(39q}uR=!cRv^F5X%|-ABy_Y-tQKv4V{`-QVuc>@wYK6naaF zp>~$;7`hdESJQFW0$(OeG0+HrHjmznT~DY6B->-ox@i#CnxpX)NPcC<7}kAvy}KTO zo+sUzH7)XeN`hNa>+U;@k5`e(NS!r3v*bSe^e-zL?w?;!=j4rMuMHTr2bC_vphBDj zyAIp)UY!zGjCI`K_Hw0>_W75d=f)Kg^#!wid;2_UBW)Q z?-@H#C%gN)57GT$ppbhmY;pwCl5Dru!d7h{OBHp45{hgEVeCDzNNJy#@xov<~>abLuy z;jByUE+E8o+sDnlisHdWi9^v=G_2gwjA$1D1v=#&C;PkiJH0zxEqCpTqrYfjdo;i; z@HasyQ|RxizThZV?y8hB$4+Kt(|HV16D6hulkW6oC4v_PoPYBSUZ{HL#{4Dt_)SRF z;vqY?-X2L)5sk9etyQg~4ALRZc72DjV%rukti^`yfg;Yy8I$}tW?Cshj!csR^M)>- zXO=F^*!mQukT-Gdy99bV%=9@KRLfnU5UKM(2_0hW^k?U4^~k6x-Z4STxw*ejA!u)w zN?3Lx(Z7CTP_xTxaGOjGKRJC@`-jL^6mdzlRZx;C#UC({W z1j^nxvYTgOC)~ublU%5*{KFQCrclrgNCvrx4F+H4RW9^kY ziP;yw>CK==D-LS$ctASIzb(3t?TcXz{k9n^vf{6J@EeBM5a`Tfg8^0pm%`KA^~)31 zVeZnlAE^Ff8)&zDFh2T=-kpb2V$3PQ>9(2=ohGxi?bZ*1YOvA0$=;QnySxbp)5^F` zC*CrL*4Tknk5A&h1e&*Z3If&EWjH9>@~MMIEX7aA#9im8t&3zUgjmf2YCj~aRS+y6 zQfHiE%^p})zo z+dC%jQ8H;opgP0buG_7jz;YLTVn23KJ~+J$R^71H^R)9GvBujFs6Dc#lMKKOU4|P;OR6 z@2W+*e+um7^-*bD&YRJQLoAhB4!lpCo_LVVsoYEWnm3r0WmWf6EP=yd^GZdk(_{w;(;G;7TPrgTH1@!q|M!poWoq zr36SP-=n!;oVp*F5o<59Tu)S{0druQqc-U zV|JyLZ;@p-hA*AUWbIkR0h2McZ5HsnG*WxIx5w^W;+daU@5NiZ=q-=av+$PBH*cBv ztIMk&wT|a1U|93Gi|6%=WY`ADZcfM{NF&d`-f#W6=F#+y8SB1QE!>k9~7whaai5a%ubffR3mxB zNtL*vB%dqPbfdOBb6KQy$HO)f+WP3MZ93Fw%%%bFSJKI|P=J800L=>p=z|)|w9YX% zbHGAy`@zqRN7Tl7-B0(iKNSLE?R}q zn<&F=5SO)>^C-{8re$c23Lqy%d}?EtS(F{;J0g&W@o?G(5{1GZ%mR zf;D5IZJ#qKg({q`xQ)2h} z@!aW~cUOk6@Z0H{mtyJ{>gOlqAE}+*!)EwjM2tj4P(OS)`<<9WxSo zn6hq=%zBArK;p5oN;dJ#+MnMbTxyt6(hAHwcoHqMtx zJh(ci(XtO(mrC`lw3o4f{3>;hsy$Q@qSbsKzON0pN} zVi^cP^Z3|zq+O*G?*tkN-(4EK99?i&5v%o)pteOob3M{iIlmMeDFLTU>u)p>$BykG z6IgSwy0ABkN4OL7k*DKQ-QjxUpR3jmRd^a*$-lk0B7M-i{8~E(QW?*aN;ko;v0f7L zrlaBviD{KfY%FvO(b`*2TKN{H=TDJojhd;f0!w%8nU@mVOS z&?Vz6H8PgeZ`P(hCAG8t2e;)<$&Wy3<3f@2G=ZVNChU~Fjk9KRnXQ)cyE;2c$_`^? z(Tz3J3r(gsnN=_C5HrXL^VYZc2bgTR6olU;p=TV!HbULH_%N9vmJ}o{x@`sQthOGW zjcHpBoeoEENiePCLKNf|g`8$<0-hO$;Asyhgv($C^g|iZ-RuwA8tUXaqpwlhHA4Xq zzyx-PG$Nm5#zGsf^`M2Bi#O=bR+$48_^3-|^-H`d=Ml+RL(h zYxZF@f92hj4V6!*{^lzr%Ff*WBDn`piF9IL!iGvGDC7LUZ?o;K^c+jtw$Ww!lz*rh zJ5;rokYaJ{IkaD0z3nZ78Th~$BFo*xs3~nhMrR(y5RpT+BNZWL!I;hzXc!mDvP6I=E^vyea z(ZlV{%OME=)7^nWL|ojGj8A`jl`Jvax&f>48C9Z+BRGbjk4u?YC8d9~a0hNJ`iyXi zo?QHhWJl2c%PWockJ+&_r?X|4S@w`-xTr3 zrt6uJX4Ll@+IO$kGK~-hZU@jZ_C`|TJQqM1F_op~*W-Ba zS?ho6=oe?S%!X3xx^s6ryl7hrmvi`3#wONEi3KoTaEh{=OsT|j%f^&<@kZ`xAtlmE zNGqSw8AKpeZi>VD7dk287M{&B`g)8krGm}o;qRAGX6?7PbM|5vAyD|LdU{*>Gy8KRxh@xZ5_joR!oE+hY$9O|8>jKtY;S5OYmu!P|R3eCY8X{pjhe&YY%J_t<_rSFq`A-31}n6vU)X;0VzpH@QD%U=xyC zRpn-kwbk?!A521G6eYK+m+I@APN#r3L}`uQ@ErA_HufieS(guR>Gx|J9-q*(@VHU> z8Amri>9L?o7^l?B$?Tu_`}S#mohI(8=RV$q2ZMuRiISYGxmJ01TneqlFb_Ggoikh- zz9@mm4`h|4;4z0m^#@kN^Mx delta 5102 zcmZXYXEfYhxW#ow?=?|^(G8*{M2TKUuhESldawDb5nYU)AbK5jqGbqTi0EyInjmU) z5-r^Kb=Upyu6w@x)_K-G&-u33+WWWx8(oi0rl*CAM~j7pMT8ZN(rzTnC&c}Sa0FrV z9UX{d?pQ=*e@6n4!oO_R06{Wl%-vouE7sT(?2#w-*mAKI~TUqS+=6J?P;h0Lbp2`Uw z7I-4MSOhf>lZo=s<`~zT(;;Z#GSK#}B=M)G%N0j($GnySp`zZZRUTY(`8{5~a6}qh zqB>sNvR^QD6bH}2@DQLA)*Bk{ySZM}UXh++%M*INNQ~hmP&E18q3Q{dhXm_7g`LR^ zmg*PzRq(-#%C~F5=#Ugf;lX=H{J3^!TsY0B0!~3me;s2#$?pAC_T=BdyZM1 zyyXv?3~#U~>`Xln+MnLd+{pj9%SXiIM$vKlGfdnyat1}33#fXFN!H-I^)U#WBbY09 zCd9VpMbGbR??e^je#SRb%xG`1;wXv7P*TU66XaHH1dL3W1+4Rx24Sc+wrHOa)_C_1 zQP-B$?1ktke@?k|2@bpso?t(*t7p7(KdL7#e6<7GP zC9LMyTVVq`27pR75;|HVR<%Ov{jN2e5S80PObwARtL47a5I?q%S&Fc~JI<&_O;%y%Bcpos zhPi`$7=voQin`T&#s}Bg9khF=Ci+C25H*F?Bi>7wJrQ3pu2h$Ra@Nl8NDi z!F4~k3_$7gp)G|a0`vQdp7b!fh&`ui{>0nOhTE|hHL$MduQ*q$GF&;augtV1^ngD% zZVi|EJ?Ftd&}1>0?)uGApbl&99(tLcqif^SIfwQBi*@6yp9akVFi{ZB+`%6-L=;jrQ$L}x(x8~bPQ z-6)6csxSKfLg1a93As&I*^Zq4)vs2a-dFZroi!EYGAdBB6U+XFPQGFNz-}X(WiUGr zaB))q=rJ_wUF0ok2kWz9ew9<9Cu{f&CRoDBGP#qokr5_dOm>sb7Yq$BG)L#=&bb=kCMJhDl`0WPNfFN}>j$n9n^2Io|LWEa7tR}f=YCsh^kU+Fru87tM7 z16_fx>RlYos;J}=h8j|eMOpW6DEkp1U{nOxv~>l|xhT^wVZ#{O8aAU%KzRxFYsN#1 zc1u_q>AG@R=Y!JYQn{!kdjw^b&Y8$2n)iP%_|6fCY^xmd&dEf!z&v|-qXV}P_Yg$A z8(de$ipPz|CtQtH@c7FR-lC;05s%9XT^ ztT~x`pH<9$_ncr-ZlMl?$2`hb`qJX9OJbpw6FOtyH|v&FEZ~ekLqe!IQ|4C@2ljrs zC)vAEXyKl!q)LK{;)izB?nv>wvwM?W!E4YjLZIoUBoQYP&zmk}t=eiTi9rJjFK45z z8LFbbd;V+ia4Hk#W;4cV{5uneu&&>kI6a#)1KB@rZn^84ZPh}Gg@u4qDYL_CBxwON zOKlcY=7J-Cx74HLM3)E!90D3)4hAAFmpldbw3yP!jA<*3Oi+^O5#pf|&KGAxJ_@H; z+Gd6yZk*`M2XxA%O$KEu^Ky=5nr~g%cB-ae0h-7+l2?=n6MOjYqNVJ=V~i{`VB-&# zB%|*h-2yi!E29^m9s-igXN=b`FY5syCHz^E9WZeGa%W_dgdh(;soz;PqTOq|%8Re8 zg1<7r#Y@=`52@Izq8<)YV3_`RYG6Yrf~Ud2B;JwKgJX)FRK-VwXpe6nM$%g(;_UV~ zuYy%~Q5YD^Nh8~xIB=)1$m0_Yg9K|J`@Lhuv2};aFLM&NNErp4SRUyL17O;^Z7#TI z_cX$JYY!O859erW2z5+2X&Ot5WR)Ego>tpMv2%SaW{lOb(+aw!7T$@PE`9r3wN2EF zq{6(<;$rFM4G4(vU@iJ9I7@pN2L`k7q#`>kDI`L}yPU=t(RfNUijcKG;)dN+xzx$E z*}j{OqF_p;Uiq5X4T9f&fLn0=AAAFJ^x(EJ`!@O~5iVnNk}c`Dpj{Hv8C9zDsk>!A zfdf5~ltbubS}(_dPV?6SX2NMLMBQRQ06krHXjdYgywb;vlS4QOBPU zqTJxhw!ai8jXC(Y06K{uuL&20sDHf9E_y01P+gXj$e(c3SiC%TsU)ZtD{HL}0&&m> z_4cqIwyS|v)jE?51**+FZ8N^%KgboE&M0w`axI0?{&<~{pP6d|{zTNez;^8G@dp|F za9QWu8|rlw6Y0-y4#jp#(R7*pM!t`W+DKUmdaOYdC3;4r6hNn*bG9?V6r!YAjmFr9 zF#v?F7sA_&9utjiWJ?vmxc`iQR;JdAxQ`c~mtMq;9Uad4 z2wVBfEuX#Z0%|5o$+%UsGzOov34z{F8Bk+tY;>P`2q0*YVl3rMIz}K391W3@WY^;$ zB$9vVp}TUJFCaNnW7t408_W;L)xOheM(L9wMl_aG-#~4cA?S5OA?U|`!Y^Y0&;AoE2C)>?y*fq>b686Q%vp;UmiA9k{>?k;sT&!zk`~6~3^e;u~ zfw+N)x*-`6KT9>xNnxe#sGi~su1uz9;+O8oRDUn}c6{*K?R$0e-)AOBw?vhf8JV}1 z$|W7yz_B%?@I-$0lTWHk@ufB-Wz0$?)CtQsJ-4^-eNMtTAC)}MYUk3*4<1IYBc@_9 z9A1OQX^HwNu*HcT)VkAt{Nk;togVe;uMtuiRT7ywMw8Bfo>8*{E$u@;lf-pi8uAXl zJSUW(0RJ_zvP%^oj3P})a8?j`-uzwzPnnGjh~XR65DTS>GiNT9hU{e-+2uMR=U+~X z+kRuNIK^|(U(3SKW$H;Y(F*I6eV`+(Iz+O(^aI ze8qao-9rzOg>-9!CR50lF-L=$gr|j30BJb^-td0YGB470FpD7?JB3Sy{? zb6V5~ckKUO-Vgs|tDh?=aj1+->BbF^?49hX2NJ4hpYCYSPqr`@)DCX#V=eiwZ=9aA zj#ccPCFZ+Ky)2lcI6$G1u#vWc$bxc7J~@#gB5YPppYe=(L6Zv_)B_A=&UlhbRtX2{)k8I$6=sVU@sNgHJS|9moQy z+0S_E2Qv8@XIJ5r)RwqVNp;27Yz0sLB#tICd+of!eYjvB0=jjStm6SbITn59_syIm zkNO@$Z@kF6_*i)Td(ZKPuj&!M`h&=PEoAz`b#iXEcNuFL-9vve03%(j+nNb|X5%Ew z0H3FkOY8Vc5(nmyL*UPuaodp;!h-5z5gcbp`vdSHI@rKKA}r`os zo^c2e@o7E0Z0OHS%da#m9brP}2bx2-Ec96nc{b+xiy;@pYD^Ho8ffjQ#9dlUQRZPIzBodyFuaR3B02&LsIM z24GP3b!M?6w8M-iqb1MS*N<_=imtThU9!>Eymdcq%y!o_PxE zR0@1I0_^2q>o4`CSZ8TEjW}6D6aluYU<)1ELV9Ii= z>*LXFmq&ySQkF?WTTNimKv;RZ|aV;r~U+P2{7+DD^*Dn}h;?diO{Yt=RId9)EPz%D$gUbwpjj z`?sct%V=gGWKE7$`AzImk+^w5Eq?!Unf1#ty8^dZ>_`!`rPE-JWL!al-_J%v=+6nb zo*8mnYK4%50poNvn|_E9oZkC*VE*moYJO!#nDD$K4ZT+p^19>?Ish$rsDO+eus4Tanh>k>ua^2RMP#T)7T^VVBhN7 zMJUqv!HtG-sotjjG=6XBa?KnXUHGSK{mfPEoqEQh(`!|YVpzd-S%3pun|%p3C!%q7 z7GXSCjm}fU{KZPR9K9*KUX0#}x%D^<@M;8Jl-&#^tW_QRHokPm3`SW#X?}79_m&KH z{Gd=-X@NRTrj}}v<<@B(9WKb-rO`$=MtK(QHYA)#C;p!LjG2^+{Tfg4SYm69;qr7N zATgv>hOwLnrxMz+bfBvjNg7+#Bk4(C&?UC@yZtOUi|?x>kzm7?(WiK9`>@D2J1?&R z6{Y0)mWfn{D`r!pnk2oh(1|7==gb?6b{ng7s;KhEBq|b%O%Dvcl$C8EA?>?%8#`RS z%Pxjxw;RZFlB+8vogXDOzyse-~2OPWxX3Ku+jZHX8%w4V#LA{ z{HHswC&IDescPgTvp1UQEO GZu}pTx1yE+ diff --git a/examples/graph.py b/examples/graph.py index cf86be8..78fe1d8 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -11,8 +11,9 @@ def f(): v = ndarray(1, 10, np.float64, init_value=20) b = ndarray(1, 10, np.float64, init_value=10) c = ndarray(1, 10, np.float64, init_value=30) - g = v + b - k = g + c + g = v + b + 32 + k = g + c * 8 + # k = g + 1 # k = g + 2 * c - 3 * v l = k.get() print(l) diff --git a/src/ast.hpp b/src/ast.hpp index 7857161..4a9d33d 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -66,21 +66,35 @@ template std::vector faster_tortoise(char *cmd) { uint8_t dims = extract(cmd); + ckout << "DIMS> " << dims << endl; + std::vector shape; shape.reserve(2); - for(uint8_t i = 0; i < dims; i++) - shape.push_back(extract(cmd)); if (dims == 0) { + if constexpr (std::is_same_v) { + shape.push_back(extract(cmd)); + } else if constexpr (std::is_same_v) { + shape.push_back(extract(cmd)); + shape.push_back(extract(cmd)); + } double value = extract(cmd); + ckout << "VAL> " << value << endl; tensorAstNodeType temp_node(0, ctop::broadcast, value, shape); return {temp_node}; } + for(uint8_t i = 0; i < dims; i++) + shape.push_back(extract(cmd)); + ckout << "SHAPE> " << shape[0] << endl; + + ctop opcode = to_ctop(extract(cmd)); bool store = extract(cmd); uint64_t tensorID = extract(cmd); + ckout << "TENSORID> " << tensorID << endl; if (opcode == ctop::noop) { + ckout << "NO-OP" << endl; const auto& tmp = std::get(lookup(tensorID)); return tmp(); } @@ -95,6 +109,7 @@ std::vector faster_tortoise(char *cmd) std::vector ast; uint8_t numOperands = extract(cmd); + ckout << "NUM OPERANDS> " << numOperands << endl; if(numOperands <= 2) { uint32_t operand_size = extract(cmd); @@ -115,9 +130,11 @@ std::vector faster_tortoise(char *cmd) rootNode.right_ = left.size() + 1; right_size = right.size(); } + ckout << "HEREH" << endl; ast.reserve(left.size() + right_size + 1); ast.emplace_back(rootNode); std::copy(left.begin(), left.end(), std::back_inserter(ast)); + ckout << "THERE" << endl; if (right_size) std::copy(right.begin(), right.end(), std::back_inserter(ast)); diff --git a/src/server.cpp b/src/server.cpp index ba97ce1..c172fca 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -143,8 +143,8 @@ void Main::execute_operation(int epoch, int size, char *cmd) } CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); - if(peek(cmd) == 1) faster_tortoise(cmd); - else if(peek(cmd) == 2) faster_tortoise(cmd); + if (peek(cmd) == 1) faster_tortoise(cmd); + else if (peek(cmd) == 2) faster_tortoise(cmd); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) From 3b3d7565813a5605877b8c582f6a8668b06aeff5 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 12:11:06 +0530 Subject: [PATCH 04/34] test ternary op and other binary ops --- build/lib/charmnumeric/array.py | 6 +- charmnumeric/array.py | 6 +- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22642 -> 22641 bytes examples/graph.py | 10 +- src/ast.hpp | 30 +++-- src/server.ci | 34 +++++- src/server.cpp | 1 + src/server.decl.h | 42 +++++++ src/server.def.h | 163 ++++++++++++++++++++++++-- src/server.hpp | 128 -------------------- 10 files changed, 261 insertions(+), 159 deletions(-) diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py index 77c9e58..5f33859 100644 --- a/build/lib/charmnumeric/array.py +++ b/build/lib/charmnumeric/array.py @@ -311,13 +311,13 @@ def log(self, base=np.e): cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - + def log10(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[10]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - + def log2(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[2]) @@ -330,7 +330,7 @@ def exp(self): return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def absolute(self): + def abs(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 77c9e58..5f33859 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -311,13 +311,13 @@ def log(self, base=np.e): cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - + def log10(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[10]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - + def log2(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[2]) @@ -330,7 +330,7 @@ def exp(self): return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def absolute(self): + def abs(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index c45c801e529877b18d7e804218ca8bd7492ed0ad..e33d22c86086c2c22ce377930846d792976812cb 100644 GIT binary patch delta 8028 zcmZ9RV{9e>vxVEfwQbwBZQHi(_HDP_+HSYDy|r!I?bf#2z2APxO>XY|c`})qpOcfE zb6*d7R}TuWEC&Eb2LS;RKxG_65=;Y+&SQHb>zMD4bqlQ^pvk-deo$v$Y@-I|b z=o3{Vr)alld7NdH|_zCQG<7$6n zg)J&fuk>?RHsTu&jK%G-C7fbOCJbn;Np(YDB`L%#ekU2A8F6hdEGTo?2#8Ho1S((* zp4tTSI7!YG$^JZk$UZg|jOV6FGp@s33~$B(Hqz75e?I|vxYby@`iDmn9-&snb_Xf$ zabqQ)zn6K#wNIq0VLB|@YDtuvK_{aMta`h|C*ai?zM)JesR+q}YcZy|L|nziuT?W_PCXUM(-0ok@6BwviF|}P8xhu<6r4f7k&g?Rzm^0V# z26Q>s!u^32d*Vv-%gLCM@F)7TWQHjoBE^;Delyy`h4XAY+%e7KeKOSHG{MpO_VcpS zAe2vqMG2jN)l=AcSk}4HG?@@{Yq=rnmZ^n5yv4?%<-4Bgs)42U)?SA{QY?Rn>Qm21 zf5kg!h4)Ls*SCdps_^j!)o*(9kwmen6k0q3?+-Kj|TL2{x<+ z86C7Nro>em{fceRtqL1fOwNv)t;HWJ1_|K68)|Y7haY%B3#i#}3=4hmitv@w9wxh( z*)Bwf@urv!y2FG25zxoi11jgKJumG&%>RN*;>sy_R#4{=!dSul`3OBTI8TBB4R?{4 zMZtmEC`NJ0Dza)>kh@gfu~|9uN|}s=V9M=&eE1kp#+tc>O2c1p=dnsijwM&)?DA*gqtK-9_NX z6;2`|*1KR$IBtvk@Ar~yId+VU+P^Ng+GSlty2M?!4or#>S#pp}ug2!Mn7%*mc?NXC zipibS*U;ZEU6^rKj5`bb6q4x?8GuG5f|jEX7^P-wMaarh*967TB~Ull>cr`fY#?OU z83o2fcpxJzS$dkn=G_Fy{E@!?uOUY1@57B$A7lhWoZ z+`jKt+*%`>sIc%_6O#9`QJ|Edc1k=?Woxc|O5Lq+kl;As!vJ@pFD(OZO+aq!xOhc` zQ4{8rf%sD7s3>p&R`#f&S(5|fh(I6M2+uv^5bw6V&=MUft8kzPH+NnxX?BP=H+SzA zsFesJR!)=JkQ*{G6Tg@E$^M?dZ;~@2*uhULY{mPJ{=MuR^6ZcxZf+|+OFjX8Z$WMu zekf2EZ^5uKCldLK!RLRp|5umfWGghcG6+%@p^=<7IU`?mDNA}0DQEpmwpiRRq#6J z=#DphSupcu5@6gd7=1MSN&7{5TZVw)v)%>!*+Jt|$a=DO;VuyMf_eS7-3!s$9x3d7 zrm@p&o2@H^DVR_0qMf#{oVA%myzpGJV0NChtc|+m{I{)`6*HKt!M>vg|M$F}pGJEE zhJoJDZCRl$EV@4L&twf`JZL1XRiDD4&!c2-WCnt0oOiB{px*HEH`1X|>7F5^zUrE0 zjidx5H1%ANk$Y`iAn=N+M z+T{af{g{j&sSwXbK3O9O72r-V(W>16JG?C&2vEi?$0`{HJ7KL=RTiO*05=W@$5Rvev-{Y%s$4dZJ z1poW1aw*pE4zvPOR1xRbrBjn5Y7g8$@nj@CLQwjTczQHu@DO5wfQ%&*6L|v-CO3^4 zNNB4;HYFuPbVaZfHca(c)oHKAg||6{6AK}_JfkEY%DPVwt+-Rl!<%d{BgFKqEN24p z+^*YqxBSL>ndsS-Umqs(KCN~$x6z-RQjP~p>s|t*#wlP`A`n#x-(>x)U6*~ZT`dHv zDGjg=MSXI+lzXW1!mKFEkuZSXu%eLhB<7FUVwCYT>ZdgD7(P*=mhn91ciN)Exd#c$ zW(e#2Ey|H)OpPk)=D@Nev$Xw?n@@HN4?@CU%zg| zS`8d?)K5<&oW{I%qF+H9lqQ6{&)v1a@2*8qeiPbu1J*UcTQWH0kp@aVM)TzQ|0$?|2fW&&;Vvx2^;o>r-g+Yfktb-EM zwu__TTtSY!9<)VbCh?NbKU5p>UiiEeX8@J0yK9lZ0@zAOWhyd)-)6!Tc#*Q=x-tAzCB$q^B5QL(Nb_Tl7w)r#TE{pwrOv9_91h$IC? zZV}oaoH5a%VJO;g`w@%L#~|g&eYvy3>lp9f?4@3ei6gQ)RfuUF;UEV>PFi!Fwy(Cg z7p6YGrUe27Nze)5??m&>mRqY*m~HEk=fyJ#OO=kyiHXI1v0fEm@D`pi$|9tV3|2c0Hr_2B5Y7tO>8B9X6P!Tln#%@RAA^Z z|1L0F4!X=pD^Sg7vj&tZs)yO|xEcd>gR?U<3ti=_lp#u&%Td*^ zDhHY&HOzFQ^)prqaOE#l7)lY!5f@|22^SU0K^G;PQPj9LLT%7&huqoHhHedky2sN1Wg=yW_Nd-{Jsnre4g z@zsCF;-CS=JGd5KcFXRjio01nrZ1Y~l%(lr4L_Ab5}4g3pEH=gy~=Rd z%~#)h6i!^V($}fDRH$fa;#@Sk%W3+KFSwgDS?ZVY_|{%Yt=~V|NLIka7b)+Db((eD zIOtSa3VpSoJI*iZB^K>u&-{644(0w#Ni_V9mPrAo509hin)RtXk*(PH?(6O#PGI3J z1=>$7dksf@KK_c?Ap3nIR(+~@y{kTHz7g-TOBd(z^(%d~`miS-=>pkLgvoKbIkU4a z<6^3RS?L19%dbG#Ui0<9(?g%nHHB9BP!0&gs#}Yq@Ado7rD%aYqy%b8ide&@oZy7bld_P;=OXv z`~iTjy>poNLKQ71ac2Vm`;6%7sak0-p)l@whpZp`iuZpo1n4_BL1Weqqn;%oL&lb; zJ3B~Cc82qA`pYLhYU7b6J)i~xe|Iru*XjT-A0iozkO83Vq0z>V$Q}{{#bR^*t#v2# zBl@+}OkI6&ws%Ux>w`5=1+dw@t3O3xiQ-K6j^H4NNYe8_;6QDWZk&IT71Hr?Gb5vj z6SjwWyF^h&yRZH!gD8h4BZM@|+b09n$faHV9{O7x*6%WPc`4=vCw zpv3|E<)?5jx8@#+Ys6{6Ly;kEk~F(m6sAdS!rEKyWC%QAJ|w8A-1>4L<3aGp(J8>V zmmG{&!$>@2(v$E|(E)Ta@J(3BF)bhn_*S&JN}_EZJU>L(l0p~*dh!P-Xe;e{xB1i# z1@%02gNoo9ofsh)!gvN=Fv$5hu>|OC?9gK5Dj+S{Zfmn3*@4UkD{OpZ7Zl0$Q(TP@XZwDvRNbU@?J{*;-HO zPiW7p({L7>Xcn6+P0UBtqgZa0wv*6&bxcO=8&Yp{RCT_Le#fz~i&9pQyCM&I8(0i8 zcLq-EQ+l{q1V>sq={acOfsO&4mo<$(Qk&AFNb#kxHFpEF6j$SsgF?G^o)>yG4{_ZI z?<~jZ28_STd#`(rSBkq_*;9en`SU1TE5-P!eIKdoX_d~gjdd9QT`m{o9Vi*FQ<%LE zS9-Hs{>NWwhdpR_-MjLR{5%tZ))gOZh2Kv_32YX0(VORLGv`%$OilsaKGk0>O}5Pe z4b3)<7Nv&{qk3`cae7itgaN$)J&!}wP1Y3wMIHr?PKAy)_Oo3_4IIGDb_2~|KTq?9 z>V1!gvBZ@17UNol3_Gl;SQxiIhlxm2otO?sb-x+Xxa-#@TZ>z+f;IxD=ZlhMk_baen>4QC(L zD#J3;Qrwrw9OCVnjFG^=1GLk~UuecYiBl0s_vVaaEey_4U$PW(NnOm*4Ikba6C+P= zhW0BW_N~V;+o&TfV{NfuP2CRcy&KQ2n}rph6K^ullKtxaQ#0mIgD{JOWTRLLwUHq( zYGPnw0dcIUj9BT4DCu?>E+n|6WIl2-ew;qGf*oC*B7)i51_MBD2popuPVKWgL?e8{ zx^{1p+PVb3A?poQqxSC*-TMg%EBDgEz8V%@EL+hf9r_7t48`dZ6`ti7HMj2R1ofEa z*vx&agqHF``UdH4I?d~M4g9JKrQtJ{zBu0L4JYM$f%nQfwTM2_)$-r+!*uCafVK8<^)Jrt8Ux#CQ-v z>480tsWYlu8WCwAFd<&_M7`JM(RcC6G-rdAS0i)wK!4sz?NDIx61%RGmD|ZNqj}p| zp2ubfOm*i%NRl<4NQ?;y*%cgLs3E$l15$6{U=r4DGXp!+hU;D?#R;k4B4a{caA z8(?LrZm>FL(sp_r9{FF47Nn32oBD07N9w zQaLB*ucO6W0y4KspEYCxa6W=2IAt(0&cv5P@1n!cl{KBIaNP3Qj-7*1;FfsH0|)Pu z+WU!)F@X#x&sa<+6@7YGW~CkzoHI1c-Lt3DYEwx$85LlSOK4=I&t&|d;Fwbsc&MnG zXgJterWSSwq}UcP16;(ZZ8YV?O_*jDugDXckS4D#=iN@KLupa6z&W7-`VG1frS68a z_hY+XRDMan`79M@ud{g4nuRz`wb`}eimHMqgMs5ocr!F7_LDU&zZ_Z-xA+tM46-7a5fec|7aBF=NQ(`TWF#eg-6nto-*#U!y8ILlJ)f{sY^lTi^7Op_d?2!WhbZ> z*eiSvYwH_*8|yA=f^tfQmIY-OxULJk*Yj;cLYslW2C3;Va#DVIA*nL%2cwnj2ahir zaRXsWggt)5PZY4V3)IX&pFoVk6#;TE;&Taw*q_-U$z1}jLMqz2tLPY!>P|q4qS-I_ z0tpK>)%D^sA(=|GO=eZ-XyG@*g*WEcpiKPJC$daKaN+OrZEXK$W!*E)qw@pvI+zrA zR*PuvXz7P7`MNP}(^C{i_Wy ziS{92OE%(VV=;qo93&YCskr&FyV&d%-@X0BH4o>o4mh8$vEyabKw=J*7nnO=Z*B2? zJ!x77%mPKX(|vJQiwGCuGzI!z)y{*T(>Uy^sJhIb`yOV?KU8N2FI1Gg&n`hn*nv+V zRVQq|f3mCzIh~t!PlH@SI*967ma~G?M#|a1*FOv%h1d9xd3JQ{3YWHEk zES<>hI;TOxQJHSR_H3%2^F@!l;eZfvYE_O2Q~bHjLic)NgLU62iKnF;Il9))<1>6F zT~gbt#rrFNmJZXJ6=R(PWd!CFYkWKqaJmguun$%CeqK)7q{0wdKqyok$f+0=zb3?; zZb_^3vXQXjk~A6&Sooi$<9H!fGb*8RcE$+Hv5%Cy{)LeZh}NA3*l6<9cJze; zTh6f&YnUO!imxT7oY)|R0*Jo&+eL+ra+_;6l(-gZmX(IKw49p5UG!@Qxjl9ov+7wq zV?)}zFt!M(HkNsBV`gx`M}R|X*jzbvnGWxa1(>^u09DWf3vg&`+!jx27V=tUV%+;k zfq=aE_@s(c&T%LjWO{NJ*Lqr>kYH@8ad%f{`h&)Mm7ORl7H8N$Gn3hx(!Rm*zB1kn z6?7pStR}x{xB9jdR|yfel)a;skAc*UtG4XD9VZ95F7HQQe?FJLCxKtdP(XYc2Wq@C zeR`ViC7zYY6K>~drf|xn2r zt8cI?oRYdtGFYcLtATs*t|NfIRgM(RxJ3|F*Qa}djyz0g;;DYeu#d>ahw#(z#_=tp zawGU}9xqKZ=9)~GTrw4L z@WBdDXr?*KWL7GMZ`$d z#$v8ix&Yv^j5ubIpXpq?MyZT}r~%@I0CUAQ2l5m;0$Wbxm4%PiJUW z_a2brg|~_6uZ`t%vhP&4fPNBG6Ed&Z{tB>50tpjsokUYGaaBaE${A*@hz+2~I2pO= zrJ8`o<*V5s^b1`Pg+(KMMkQM5IfRM4X8?KS2^JEG2@DVaBaiFpN5fgTezDu zxJ=|U;%8l8(M@Mfja!ZRpggbTGPLW$m*+0%j+VM%<{s_Mki)E*dAmQx(Oc-aAaEP- zef{bP1wO4w^I00LKfIGA%P(|Tynpgd*?$b4@CcLFXUI>S}oZV zbU0^~TQ>IaNHbB&#`77IzL0kWR&CQEYgFehI!)ZXhNb9@O2sd;nb~$W zMT=sL{I|2k2|LNlsJ?fZJ+)>za&Fu9tY_PQjPUMos-F$^Ok&Xm6hAC3#=QT`VA6uB21-Kdrw_r4 z&y>8BI}&9SQmjzD7FE$mm=?{#nM%^#s)s{)Aq~X1+5S{oX_<;(>vp?x>pZ@a5?t+B z3Ms=eLEL@IBhN!Zf_DC{@6h$__-z|Z#YLET3un75u!dpr%F|l^^-6$0sa!U3>(v3^ z3%^N=n8}GtS$;TyA~g}D2iBs~%fVV{`tvJg^}kg+9Ot+>JUrJjIet9R)sTK3Ex*r{<6z#^$M&>nX+Oi8T;6_V}jF$<71r!<0sJ1lSQ1l%5Ek41|F#2V@NchhF~RW zsL*GlTH#~N1YqwiL=kJM2ru;9*zMbWWU0OOTx-l%-jbAH2+jn+3#%ll3MW2qI#o6w zS6~zmTNe*k>*?NJFr?(EP>clHpoZ+m=#a8p>k~2EW(}t;;v2b`8VL($QD8;ca*AM5 za|Wl?0^^20bOyMgmgOnRs=_Dl9wbHw6HV!|U2AwBuoq(rDEFOXz{ zE?&`dkR?c9Hc+l$W68p~pbH4aSkgoZO~QMdHUkgBg~E`*z-%b6QA1Jqu^WI73iMmn zn4Iu8$e09i9mty)F%bMvN|+Ipq~S;_sE^5KeAE8W|DAxgn~K7z>60U&w_u@;19f_m z_<$%VL7PCkB%+l(Ia?bCB^RFuqA>|Vw-nhs%5iAHKT>wtAq(9RUI}5gU@7cA@*U@n zaB<3^dsCqaxHZyAnN`)LH{oi)zQuce+@R0bUNg3}V&O>VwxfC2WdPj4z_!f)typ{M zSQY7))uSikhT!$!zO~coo7A5_4gVMyN$X~aIy&UHv{0OSDxexkFN%!cvRTXM)jlL1 zUYPWqtpp7<@LfJM-b*ae(z@@ZLLU8$)zb1-NczSDgnIKEUbhhtI8zg76j5xM-FH_I zoFm)I`BVj;66Hw?wr?9m?8KiFon#>7A?-_JUW%be>sWXU{mXzMI*~2m&slqvAFLWB*%Gv zjW}mu6gBKoyOPv=6aC*Tx+#b=6yZOfc@v~~E&M;WC_@1LdFb700{`%-Cj1YA>N5Wj zss7(^dP-gNUx%y_^8YgGe2ve4gIrC6|L4C$YbpI3=xE`Cxp5}XYvCnVaiS&PXu=cy z^XX?4&N40GK|uasgMiTgCnW#hY7Tx&p3~y{cZr~l4_2Z4-%CktO2}HZe>?vJ=B8YC delta 8033 zcmZ9RWlS7gw6$>^+#QNTad&rj*W&IDQ=C$Kptu(=u0sdcQrwHXGq^*!Z*TG?U+&3H zp0l&h%KmwhwXPaquNzx9jer7VxDIsC59F4AutpL6ha!|u|A2@p z^$&`upD|$5R84F~@Y6z3U|<|T3#jZssF*0eccb-r1KjEJ<)WASW058t{NvY1go~u;I?2mmX^C??D`OHch z_G?nA?G00Q<_{LF&fy;jLH*Z05n#y}OSwVFpukPSymfxjp+whPU;lt~^hZA5RN>;UdS+MIpc2=2yODfsSTzU((ct&U9L?A2juHch@^jSLDZ zNe{`{AM_e;0&p2+@vYvz!wwR|@sT);&LDkcjVXA4C1vK(Y<(o?|E3`-4`;@j z#id)+NFEGW)Xn`MgC4OL7q|AEb8h-BN0W{0pm~2}UXjrGBQ`rVw27@@&(uR#t1N{) zEPT$NGue{6@gb_!r2*>+sNMIap!jPATLSSUSXUmxavGtir*xmz7%x|P=;cA|_~kQ5 z6H^C{2)ta9kpruGBBwy(k6br{SVCY^q{aq9`ZfJ6PRk{`Y{XZwFG{oLoW1N-_0KeP z6p?uXA-Y2|FZG#fwGJ~fk=W7`%eBPM2gd4WUo79D&_3K;y6l7~u$GaboKCys)4ZP5*mAaUy zo8-7>Ny8!9)5-M`eAEE?sn1s;fQZmuk#1NOKf@WtfZw7Iq%>aq;^)QnzLD%zoUgYi z>H{-01SsgIsX6p~xJ@$jN1{@hcEttrKRW+ZPd>7MurRCzT@LncLo2wmH;Cmtms|NF z14$FjcXJ_Y>9Zc+>3{1b*rd$uM?A_E6cx)^wS43c!8j$KT}hlK0##wK>$%wZ_jJFH zNF?mih~KemUFU*nSL6X%CX_@szJ3TOMK$Lhswgz-!vCYx*=4%cg<$hwTsL`a2Ik5E{7 zm0eMjc23V$s&n|dTYKn+XRpD|??%JeCq#=`fC*7KpUyB|zfnD&-Owm+oH24IlK{gKfUkV2XH?ZHHiLuXn`;e|Y`cPGxlO{3Woq!(V zBJzUBR~{Z=UGS3;XB@)zBXNh+)aC)uxXHJ^r!%H)a)d$jDbBpT_O3o55oJMSSWk~h zzj43Lp34xA^Z-n_oy(90cnzVkZkV@g;wvCI(je@!E%8+RImVFjSVNnwo+i z^C@8q32EqfyutWd%;dcB;iP{7U`CByRb^^&7>d8`t#jDv;QP2wtdnrHiJa3naRa<{ zYEZmk4pQ?!;p<8?Z=d}X#40u^Q8IF?AI@5;IV``u!Q??>(oaq{(^0- z2keXK;DQzNJlXWqf1Mi=$q_E1a$3dGTglx*BU^H!U#v01UD3hRcGBu3W6ufaWw!06 zC;E5MBv=oeLSSYxcv(^6gobY_91YULCPVQzSPHBjd_4vQV6#y)lROg%NA|>4K7AM* zQS2VX>itpMqL-F}g~uDl2kOVBg>3{KU{kSZKR>d_iaIKm7VAC=4A@u#1pIw|PH=JK z`Eidaplh-t1#Jd78i_1z-heo8gn)SGO_ww+Wyq@7Bu`~T zpFc?#k?;Z3TL9J( z1nQ4LPE-9h8L=Ei-V^U;2h3dSYn<&n$30lZe~K2q?qX|@rcERE@9>0kg#usqw)$X( zCbGY-%@fe;o{?qPARe5Y$v|<)QZ`29F?|3Xs#6!iaERMk1yu0FM1_c7 zzedKwG2DmJ!hXPdPW1d82HDbUJif4pqQQ-dQDWCEMd7@&nMVu?+y4>UDUQ*MdxUog z#-gNBy$Hoa`+y}bo_;Lnd4W<+G$fT9c1_gmhTDz)@AG7*+{frZS2LO0b2+0Tf`OR@ zQBwr~YxmbJ*=SgPz^uy4M;c2JsjZZ-k+2Z>mz22WQmK7~GZz^ud79hsj^!a&o*($b zi#}Lf%foZnJtg$8<_;Cx+ax3;WqRfMdn@ts?58u|qm_e|+r9CE%8LNL+Ud$c#cF8j~{f36L#Xu322csk_IFmMwOW zGV}?RAUIw|KbgY5+eSG%~7sEcMZkYfjVP@MUFt7^^+XYeOFeHZbSAJ&%;UG7`7RL$oN zAnyLa=qes+bjTyiP=`k5L z!-b9o$B~{!e&{HReRy!$@p;1M3%Y3nP(%*wYv!iElOo95t76(R70dDQSS^*i2->${ zZtgLs9Y*|w&?m4rIbftoO_#pqgdmclO-;s`dbI68+1)vM*i5k=8BXm4l@G7%U?WDs zD%cF__HFm~SK+yM5O}zKiqZ_=97GQ1tdi>r zOsA0Gbu1LhuU`MlEJJh_aJA)HdOW1whXz;72lq<{;rf<4{PwicmTKJA+v~>5vyo{I zY|F(>5~yez0OP+gfo9w51*3(x-lB;(5I)%vk~lg8P_+*gw>dJu=_iKAlBQr~%4}8-n;}~VDof3>JVJ(vK{n2noAflJG@s}~K zT4{nT{ZeIBvr4o%Qr$#HHZLpVSSQX}h1M+oB4Jg5k|0%qVkA|v8F|%p2fT8%N}Ra~ zL)@NbQ`|0IuTQ8ha%zH8{4FGNb}fi==MJppSjDbcP|1lbsO&12YhThnP03yzWv$IZ z_9GHHX`Iz@!FlhqfSoEf@OVRY6IeLR1G=_L01azY2gW~*nW=+p(~Z3x371MqeG-N= zs@voCcNrIc$Tg2GePLiYh`;S>q_~E1cFTMDPfALU7sbx0oxaNr6ID@~{|{^?J;oyC z?i$l%w#Xy5A69|?@w>0v>30dW}`iWbcf71Q0>>}K~BJD*#i9PRQV(! z$P@047A=A!{#`Si?vAED=@!1`e(L1U;Jfpt_OSyBS6T`Q(HxeRVf32x7Dh(Q_mGj* zTKCpTk$dTrkt_>8uuI9`&X*h$(Dm4iZYMBOUSZ7;Tw`BAzwi)j9T61ist+c^Ay%5M` zwudRt+>5od(W8GWk19Ks`Tb1^52tF-|QnN;)vgK4$FG_N- z#ULIozbKU>V5FhgZjhPB-nZ+ddj@{dC}MO||GY29O)EYM0#mQH^)lbHW_(n-?*BLp z-L+yNTRIGGG(v7%&*AY?As(ULLV-38*?w4^|- z%X)-EJOdyHJJB88(opPDN0re1XDp{*tVH=U+3|4g!$+o-s9N@oOr?5)e3|n5x;*L= zclnU2?8D4 zf`F}N{RX)9v!tjxR<#LN>EE)#AiMBiolD+=4qGQl?jxVS$vMkGnt*(c>v#g^Mm~!W zt%qUo=_dF-KU?gV+RMSRP}sfnY@kcjTX7&kFd{j^ef*IXri-`o(%rB3ohe=QX28u~ zmu5Nu0*RXL%14Et9GF9QwqCg_2K0BfXYeembb6NMczSrl9F)~EMt%7<_z5P!HV8n@lpW|E)_rPa zL(WVWMMW148%u}yjM^_>Ez{{yHb3j`lov&e8z*FJ%P^UcL~V~$J#;JKn6R+CNxh>q zJJb_P+zpqX=GMCwoGC3rvxLNcIv*I${#}lgh4a3>v^~I$%270;)Jm?Bw2ki!f~-5j95wrjNd8P|)l2@*Hu>;9VN2|Cyu7cL5d>AcQCsZ^YG_v&WrjQ0l&_|4-2&($_iqaN((M$ z|G+0Wo&aI>r|fTs`DR32w(NRlV0V#xB`iN>nX5?bt+PVT*3kul%3{5L&v}H~4qm^Y z{JB9cu&;WV)%Y89faRzl0{nbMr2EaUG}c&R>roQ7=iBD0K4xZI>X zHz&cZd=EolVncD!FBcY*-h2zOcTWFYDtbJa2n|SHp}$U@9VqGu+!Kh2v3O&qol$cZ zZk^sr%lGdONbamx8mJwq`o-p$PrCg|aVII#da%G}a~mXQm7)+%)Fe=$#}JTCz*`p2 zAs3puBO_emO)K}d7&Yn;HR^|&d6jQrwaP6pucaK~}Tgv*>iYU^VXc?udf2d!l9bVIAm z7aGrKxS14ahOa0tze? zecV-alKG=}XsvWyHrc2yIudRYBEOGzVGr{2KPDsTHA^=TVOjGDhY(lr~S06NX?O>^q@PW9KzX zEe|5zXP^;I{XB9mQB#_V3jDy=prvb1m7c&g@ZBJXJE`nVs`_wxkHui51_=nUTN$ke zYqRN;t?dJNY`BZV<8DZB&UtL45%|ulM&8E=emldvF-w%cup{S>aQ{9vkU{1tOmKzl zSXO!L<`_3Pw>0P{k{M3FRtLs3SfuWXiNn5tj%aZysJ;7LX^TydHJigNWpAGmHzldQ z1gFxPChR$*d$|}eWU1^ZS^^`BD;aPjsAaul0(MqxN?qSKVf!`H3|hm^*oA%(aDg(Ou1RVjNprzZa*}|vQ!q@HkPX1!v_Y?7={Z(ewqMiP0H(&qvvohM4^ntD| z2X(<5P*N<^I=SU(Cp9%`rZc_D2V%D-pyl6nWxTs@*^hc|CK?pmC-is7w8_DDGE-n` zIc?GGA`5SE;9`mX2XI7wh3jRkVCld8X^U)ghnrS*tM8@~2AM8lqE?}=p(g#$2$R;; zonB$ZK^i4NQ)iQbbK$yr6QftUW}$0)#w{ZCX9)%ZjKA9%&pUWJ`%kol^*TMJRny%_ zY^f&)ccCGV;duUN0u+=HZprcX?qtVM|j*uK=i}{s={5o+%%o zhfcwGWiHRyDx;$TPPtEdhEA_AC@_af9kvpD<6ZlTIgQ1&Hg{ z?v2(+4|*-7om-M+lwC&}6Sqlk0Oem^Eh5s0NoK?6l{p)^gMzI5kOqpuL7II|6x|H_-6WlD9qaC{CMr){ zin*BCoKNdC*XxsMEs;yX=>fWr|@R&Bgu)uDfV=x3PY{>@{ z-s+^YqxNF&Qt*B0#b;|2@1rtPW5y*5HOJ;L4zCav?Fh!U%hO>EX=<~T&pXa{E?nhv zm$(Msmr;sNpm;w{zSBL01W10qiCtKjovYPD7X@F9MpO;L2ByH#=OY(ehB3l= zF+B?u%R(XTvOZ9s?>`D8B(PC$tUQ5bmPXow%{#k+vw%r^SDuSJ=iGMxP|wwR3rB)& zfvGqkB0=MNH&ahZ(BGKZYUHIR=g^bmtW`00iRV_Lu@L{#E+>HBTz zL}kjAzoXCT*UP5AAze4lWj5l@%yJtS=D|k2#>=Tjgj>g3&$?b7ZikS!OCn%JT0K*I z1~DY%N4)iRy5Mf52Xp`?Qy-x*9xm-Z)B@dNbt$j^!M8q1|4vYbPOvg`NLO$KhLWBc zk_9ueWY zQAly6oe07tl&7v>-Usa!Z4hX*$LkDi@_YCZnv{#TTdP5LKGNOh>T2P=>KEXkH*$?r z=KE3bLFW(Q+h-+KVpBcicUjy2E+PCX#!xeC41}Jbt-G|jKPaS8%Rwj;0b$cb={kG6 zJFgp-gN#r&oRy+mDe7}cb(lchQ69<8@LRZJcrQYf5Dm2eN=5upYGBHZwXJM`7l2??nb09ydW*ReN9Bf|MX%jayMs%VPc&AW zY>e^5elS>-r~S@N065`Ac9G*yckRaf+jqv}v?F)K>wK^6gSSa0xOP0yB=yGcEzrO+ zD|}HH-1M`S0qXedhB(2uHZkWJd~u?L6v@vsEy7Se9;if)ccG;M(l2>Xq;Eoz@>q8J z*}1lli$@W!{Om1GLyq!Nip%)^4gb$kjD=$A$Cb^Mm4mPGd%#$VJ$GXJio4tIQxBv1 zLVpxvz4@OUl6wQLb8$G)_bZW_?l6PTy&T|$jOa!p_CBW|2=MJkeEcsZ|9__?{24T5Ao5QMFeHbo*8E@PmL?&{#1I>) JUgzJt{{gUrey#uj diff --git a/examples/graph.py b/examples/graph.py index 78fe1d8..70976af 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -5,14 +5,16 @@ import numpy as np #enable_debug() -set_max_depth(1) +set_max_depth(10) def f(): v = ndarray(1, 10, np.float64, init_value=20) - b = ndarray(1, 10, np.float64, init_value=10) + b = ndarray(1, 10, np.float64, init_value=1) c = ndarray(1, 10, np.float64, init_value=30) - g = v + b + 32 - k = g + c * 8 + g = b.where(v, c) + k = ~g + # g = v.abs() + b + 32 + # k = g + c * 8 # k = g + 1 # k = g + 2 * c - 3 * v l = k.get() diff --git a/src/ast.hpp b/src/ast.hpp index 4a9d33d..65f39d3 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -24,8 +24,9 @@ inline static void remove(ct_name_t name) noexcept { static ct_array_t &lookup(ct_name_t name) { auto find = symbol_table.find(name); + CkPrintf("Looking up array %" PRIu64 " on server\n", name); if (find == std::end(symbol_table)) - CmiAbort("Symbol %i not found", name); + CmiAbort("Symbol%" PRIu64 "not found", name); return find->second; } @@ -58,10 +59,18 @@ ctop inline to_ctop(uint64_t opcode) noexcept { case 18: return ctop::logical_or; case 19: return ctop::logical_not; case 20: return ctop::where; + case 23: return ctop::unary_expr; default: return ctop::noop; } } +std::shared_ptr to_ct_unary(uint64_t opcode, const std::vector& args) noexcept { + switch(opcode) { + case 23: return ct::unary_ops::abs(args); + default: return nullptr; + } +} + template std::vector faster_tortoise(char *cmd) { @@ -86,14 +95,13 @@ std::vector faster_tortoise(char *cmd) for(uint8_t i = 0; i < dims; i++) shape.push_back(extract(cmd)); ckout << "SHAPE> " << shape[0] << endl; - - ctop opcode = to_ctop(extract(cmd)); + uint32_t opcode = extract(cmd); bool store = extract(cmd); uint64_t tensorID = extract(cmd); ckout << "TENSORID> " << tensorID << endl; - if (opcode == ctop::noop) { + if (opcode == 0) { ckout << "NO-OP" << endl; const auto& tmp = std::get(lookup(tensorID)); return tmp(); @@ -105,7 +113,13 @@ std::vector faster_tortoise(char *cmd) for(uint32_t i = 0; i < numArgs; i++) args.push_back(extract(cmd)); - tensorAstNodeType rootNode(opcode, shape); + tensorAstNodeType rootNode; + ctop ctopcode = to_ctop(opcode); + if (ctopcode == ctop::unary_expr) { + rootNode = tensorAstNodeType(-1, ctopcode, to_ct_unary(opcode, args), shape); + } else { + rootNode = tensorAstNodeType(ctopcode, shape); + } std::vector ast; uint8_t numOperands = extract(cmd); @@ -121,9 +135,9 @@ std::vector faster_tortoise(char *cmd) rootNode.left_ = 1; size_t right_size; - if (opcode == ctop::unary_expr || - opcode == ctop::logical_not || - opcode == ctop::custom_expr) { + if (ctopcode == ctop::unary_expr || + ctopcode == ctop::logical_not || + ctopcode == ctop::custom_expr) { rootNode.right_ = -1; right_size = 0; } else { diff --git a/src/server.ci b/src/server.ci index 4ebb75b..a36044f 100644 --- a/src/server.ci +++ b/src/server.ci @@ -1,11 +1,35 @@ mainmodule server { extern module libcharmtyles; - PUPable pow_t; - PUPable log_t; - PUPable exp_t; - PUPable abs_t; - + // Register all basic unary operators + PUPable ct::negate_op; + PUPable ct::abs_op; + PUPable ct::square_op; + PUPable ct::sqrt_op; + PUPable ct::reciprocal_op; + PUPable ct::sin_op; + PUPable ct::cos_op; + PUPable ct::log_op; + PUPable ct::exp_op; + PUPable ct::scale_op; + PUPable ct::add_constant_op; + PUPable ct::relu_op; + + // Register all basic binary operators + PUPable ct::add_op; + PUPable ct::subtract_op; + PUPable ct::multiply_op; + PUPable ct::divide_op; + PUPable ct::power_op; + PUPable ct::modulo_op; + PUPable ct::max_op; + PUPable ct::min_op; + PUPable ct::greater_than_op; + PUPable ct::less_than_op; + PUPable ct::equal_op; + PUPable ct::atan2_op; + PUPable ct::weighted_average_op; + mainchare Main { entry Main(CkArgMsg*); diff --git a/src/server.cpp b/src/server.cpp index c172fca..bc1c919 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -259,6 +259,7 @@ void Main::execute_creation(int epoch, int size, char *cmd) void Main::execute_fetch(int epoch, int size, char *cmd) { ct_name_t name = extract(cmd); + ckout << "name> " << name << endl; ct_array_t &arr = lookup(name); char *reply = nullptr; int reply_size = 0; diff --git a/src/server.decl.h b/src/server.decl.h index 7bf7a1e..2cacb1a 100644 --- a/src/server.decl.h +++ b/src/server.decl.h @@ -10,6 +10,27 @@ + + + + + + + + + + + + + + + + + + + + + /* DECLS: mainchare Main: Chare{ Main(CkArgMsg* impl_msg); void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); @@ -134,6 +155,27 @@ typedef CBaseT1CBase_Main; + + + + + + + + + + + + + + + + + + + + + /* ---------------- method closures -------------- */ class Closure_Main { public: diff --git a/src/server.def.h b/src/server.def.h index a8b7cfe..5e00c13 100644 --- a/src/server.def.h +++ b/src/server.def.h @@ -3,6 +3,27 @@ + + + + + + + + + + + + + + + + + + + + + /* ---------------- method closures -------------- */ #ifndef CK_TEMPLATES_ONLY #endif /* CK_TEMPLATES_ONLY */ @@ -70,19 +91,103 @@ #ifndef CK_TEMPLATES_ONLY - PUPable_def(pow_t) + PUPable_def(ct::negate_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::abs_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::square_op) #endif /* CK_TEMPLATES_ONLY */ #ifndef CK_TEMPLATES_ONLY - PUPable_def(log_t) + PUPable_def(ct::sqrt_op) #endif /* CK_TEMPLATES_ONLY */ #ifndef CK_TEMPLATES_ONLY - PUPable_def(exp_t) + PUPable_def(ct::reciprocal_op) #endif /* CK_TEMPLATES_ONLY */ #ifndef CK_TEMPLATES_ONLY - PUPable_def(abs_t) + PUPable_def(ct::sin_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::cos_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::log_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::exp_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::scale_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::add_constant_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::relu_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::add_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::subtract_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::multiply_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::divide_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::power_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::modulo_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::max_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::min_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::greater_than_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::less_than_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::equal_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::atan2_op) +#endif /* CK_TEMPLATES_ONLY */ + +#ifndef CK_TEMPLATES_ONLY + PUPable_def(ct::weighted_average_op) #endif /* CK_TEMPLATES_ONLY */ /* DEFS: mainchare Main: Chare{ @@ -278,13 +383,55 @@ void _registerserver(void) static int _done = 0; if(_done) return; _done = 1; _registerlibcharmtyles(); - PUPable_reg(pow_t); + PUPable_reg(ct::negate_op); + + PUPable_reg(ct::abs_op); + + PUPable_reg(ct::square_op); + + PUPable_reg(ct::sqrt_op); + + PUPable_reg(ct::reciprocal_op); + + PUPable_reg(ct::sin_op); + + PUPable_reg(ct::cos_op); + + PUPable_reg(ct::log_op); + + PUPable_reg(ct::exp_op); + + PUPable_reg(ct::scale_op); + + PUPable_reg(ct::add_constant_op); + + PUPable_reg(ct::relu_op); + + PUPable_reg(ct::add_op); + + PUPable_reg(ct::subtract_op); + + PUPable_reg(ct::multiply_op); + + PUPable_reg(ct::divide_op); + + PUPable_reg(ct::power_op); + + PUPable_reg(ct::modulo_op); + + PUPable_reg(ct::max_op); + + PUPable_reg(ct::min_op); + + PUPable_reg(ct::greater_than_op); + + PUPable_reg(ct::less_than_op); - PUPable_reg(log_t); + PUPable_reg(ct::equal_op); - PUPable_reg(exp_t); + PUPable_reg(ct::atan2_op); - PUPable_reg(abs_t); + PUPable_reg(ct::weighted_average_op); /* REG: mainchare Main: Chare{ Main(CkArgMsg* impl_msg); diff --git a/src/server.hpp b/src/server.hpp index 1c20a72..f4f7bb2 100644 --- a/src/server.hpp +++ b/src/server.hpp @@ -46,134 +46,6 @@ class Main : public CBase_Main void execute_sync(int epoch, int size, char *cmd); }; -class pow_t : public ct::unary_operator -{ -public: - pow_t(double arg) : arg_(arg) {} - ~pow_t() {} - - using ct::unary_operator::unary_operator; - - inline double operator()(std::size_t index, double value) override final - { - return std::pow(value, arg_); - } - - inline double operator()(std::size_t rows, std::size_t cols, double value) override final - { - return std::pow(value, arg_); - } - - PUPable_decl(pow_t); - pow_t(CkMigrateMessage *m) - : ct::unary_operator(m) - { - } - - void pup(PUP::er &p) final - { - p | arg_; - ct::unary_operator::pup(p); - } - -private: - double arg_; -}; - -class log_t : public ct::unary_operator -{ -public: - log_t(double arg) : arg_(arg) {} - ~log_t() {} - - using ct::unary_operator::unary_operator; - - inline double operator()(std::size_t index, double value) override final - { - return std::log(value) / std::log(arg_); - } - - inline double operator()(std::size_t rows, std::size_t cols, double value) override final - { - return std::log(value) / std::log(arg_); - } - - PUPable_decl(log_t); - log_t(CkMigrateMessage *m) - : ct::unary_operator(m) - { - } - - void pup(PUP::er &p) final - { - p | arg_; - ct::unary_operator::pup(p); - } - -private: - double arg_; -}; - -class exp_t : public ct::unary_operator -{ -public: - exp_t() = default; - ~exp_t() {} - - using ct::unary_operator::unary_operator; - - inline double operator()(std::size_t index, double value) override final - { - return std::exp(value); - } - - inline double operator()(std::size_t rows, std::size_t cols, double value) override final - { - return std::exp(value); - } - - PUPable_decl(exp_t); - exp_t(CkMigrateMessage *m) - : ct::unary_operator(m) - { - } - - void pup(PUP::er &p) final - { - ct::unary_operator::pup(p); - } -}; - -class abs_t : public ct::unary_operator -{ -public: - abs_t() = default; - ~abs_t() {} - - using ct::unary_operator::unary_operator; - - inline double operator()(std::size_t index, double value) override final - { - return std::abs(value); - } - - inline double operator()(std::size_t rows, std::size_t cols, double value) override final - { - return std::abs(value); - } - - PUPable_decl(abs_t); - abs_t(CkMigrateMessage *m) - : ct::unary_operator(m) - { - } - - void pup(PUP::er &p) final - { - ct::unary_operator::pup(p); - } -}; - class Server { public: From 2ce770002b25087c2a709574f6ae9c4e18ba0953 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 15:35:32 +0530 Subject: [PATCH 05/34] ast flattening for matmuls and dot --- .vscode/settings.json | 8 ++- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22641 -> 22641 bytes examples/graph.py | 17 +++--- src/ast.hpp | 73 ++++++++++++++++++++++++-- 4 files changed, 87 insertions(+), 11 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 509f07f..81f51b7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -2,6 +2,12 @@ "files.associations": { "*.sage": "python", "vector": "cpp", - "iosfwd": "cpp" + "iosfwd": "cpp", + "compare": "cpp", + "cstdint": "cpp", + "format": "cpp", + "unordered_map": "cpp", + "map": "cpp", + "set": "cpp" } } \ No newline at end of file diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index e33d22c86086c2c22ce377930846d792976812cb..4688357aac4ca56e5cc5c737cabc25ee55234f21 100644 GIT binary patch delta 176 zcmeykf$`%8MxFp~W)=|!1_llWo8*l=^Vpeek|(cW-vXkNIBY=F0*+)5#meajq&9nS zJ`n+N_2iy{xT5kI;vl--tc4duv%0L~0nu9nSeb$J@dQyFAt0kB kpAK;XQk(fhdl*67xnZ#&>Q7h{h>8f008yJD6i-Ad08mOdP5=M^ delta 176 zcmeykf$`%8MxFp~W)=|!1_llW|HO?v^VpgE6DO}>-vXkNIBY=F0*+)5#meajq&9nS zJ`n+N_2iy{xT5kI;vl--tc4duv%0L~0nu9nSeb$J@dQyFAt0kB kpAK;XQk(fhdl*67xnZ#&>Q7h{h>8f008yJD6i-Ad0LZ8~!~g&Q diff --git a/examples/graph.py b/examples/graph.py index 70976af..872f0a4 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -8,17 +8,20 @@ set_max_depth(10) def f(): - v = ndarray(1, 10, np.float64, init_value=20) - b = ndarray(1, 10, np.float64, init_value=1) - c = ndarray(1, 10, np.float64, init_value=30) - g = b.where(v, c) - k = ~g + v = ndarray(2, [10, 10], np.float64, init_value=20) + b = ndarray(2, [10, 10], np.float64, init_value=1) + c = ndarray(2, [10, 10], np.float64, init_value=30) + # k = v * 2 + b + 3 + c - 32 + # l = k >= 42 + r = b.where(42, 69) + # g = b.where(v, c) + # z = ~r # g = v.abs() + b + 32 # k = g + c * 8 # k = g + 1 # k = g + 2 * c - 3 * v - l = k.get() - print(l) + q = r.get() + print(q) # w = c # for i in range(5): # y = v + b + w diff --git a/src/ast.hpp b/src/ast.hpp index 65f39d3..ecd3fde 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -42,6 +42,17 @@ inline T peek(char* &msg) noexcept { return *(reinterpret_cast(msg)); } +std::pair getMatmulOperand(char* cmd) { + uint8_t dim = extract(cmd); + if (dim < 1 || dim > 2) CmiAbort("Matmuls not supported with dimension%" PRIu8 "", dim); + cmd += dim * sizeof(uint64_t); + uint32_t opcode = extract(cmd); + if (opcode) CmiAbort("Matmuls not supported with rvalues"); + cmd += sizeof(bool); + uint64_t tensorID = extract(cmd); + return std::make_pair(dim, tensorID); +} + ctop inline to_ctop(uint64_t opcode) noexcept { switch (opcode) { case 0: return ctop::noop; @@ -49,6 +60,7 @@ ctop inline to_ctop(uint64_t opcode) noexcept { case 2: return ctop::sub; case 3: return ctop::multiply; case 4: return ctop::divide; + case 5: return ctop::matmul; case 11: return ctop::greater; case 12: return ctop::lesser; case 13: return ctop::geq; @@ -102,7 +114,6 @@ std::vector faster_tortoise(char *cmd) ckout << "TENSORID> " << tensorID << endl; if (opcode == 0) { - ckout << "NO-OP" << endl; const auto& tmp = std::get(lookup(tensorID)); return tmp(); } @@ -125,6 +136,63 @@ std::vector faster_tortoise(char *cmd) uint8_t numOperands = extract(cmd); ckout << "NUM OPERANDS> " << numOperands << endl; + // when we encounter a matmul, we treat it as a : + // 1. a dot product returning a scalar if both the operands are vectors + // 2. a dot product returning a vector if one operand is a matrix and the other a vector + // 3. a gemm returning a matrix if both the operands are matrices + if (ctopcode == ctop::matmul) { + uint32_t operand_size = extract(cmd); + std::pair xOperandInfo = getMatmulOperand(cmd); + cmd += operand_size; + operand_size = extract(cmd); + std::pair yOperandInfo = getMatmulOperand(cmd); + cmd += operand_size; + + const uint8_t& xDim = xOperandInfo.first; + const uint8_t& yDim = yOperandInfo.first; + const uint64_t& xID = xOperandInfo.second; + const uint64_t& yID = yOperandInfo.second; + + if (xDim == 1 and yDim == 1) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); + + ct::scalar tensor0D = ct::dot(x, y); + double result = tensor0D.get(); + + insert(tensorID, std::move(tensor0D)); + tensorAstNodeType temp_node(0, ctop::broadcast, result, shape); + return {temp_node}; + } else if(xDim == 1 and yDim == 2) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); + + ct::vector tensor = ct::dot(x, y); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + + return tensorNode; + } else if(xDim == 2 and yDim == 1) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); + + ct::vector tensor = ct::dot(x, y); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + + return tensorNode; + } else if(xDim == 2 and yDim == 2) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); + + ct::matrix tensor = ct::matmul(x, y); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + + return tensorNode; + } + } + if(numOperands <= 2) { uint32_t operand_size = extract(cmd); std::vector left = faster_tortoise(cmd); @@ -144,11 +212,10 @@ std::vector faster_tortoise(char *cmd) rootNode.right_ = left.size() + 1; right_size = right.size(); } - ckout << "HEREH" << endl; + ast.reserve(left.size() + right_size + 1); ast.emplace_back(rootNode); std::copy(left.begin(), left.end(), std::back_inserter(ast)); - ckout << "THERE" << endl; if (right_size) std::copy(right.begin(), right.end(), std::back_inserter(ast)); From 4016abff637c6949badc1fae0bfa7301d8b7199c Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 15:45:44 +0530 Subject: [PATCH 06/34] fixes for matmul --- .vscode/settings.json | 5 +++- src/ast.hpp | 56 +++++++++++++++++++++++-------------------- 2 files changed, 34 insertions(+), 27 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 81f51b7..4a10a94 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -8,6 +8,9 @@ "format": "cpp", "unordered_map": "cpp", "map": "cpp", - "set": "cpp" + "set": "cpp", + "chrono": "cpp", + "memory": "cpp", + "utility": "cpp" } } \ No newline at end of file diff --git a/src/ast.hpp b/src/ast.hpp index ecd3fde..951a868 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -50,7 +50,7 @@ std::pair getMatmulOperand(char* cmd) { if (opcode) CmiAbort("Matmuls not supported with rvalues"); cmd += sizeof(bool); uint64_t tensorID = extract(cmd); - return std::make_pair(dim, tensorID); + return {dim, tensorID}; } ctop inline to_ctop(uint64_t opcode) noexcept { @@ -163,33 +163,37 @@ std::vector faster_tortoise(char *cmd) insert(tensorID, std::move(tensor0D)); tensorAstNodeType temp_node(0, ctop::broadcast, result, shape); return {temp_node}; - } else if(xDim == 1 and yDim == 2) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); - - ct::vector tensor = ct::dot(x, y); - const auto& tensorNode = tensor(); - insert(tensorID, std::move(tensor)); - - return tensorNode; - } else if(xDim == 2 and yDim == 1) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); - - ct::vector tensor = ct::dot(x, y); - const auto& tensorNode = tensor(); - insert(tensorID, std::move(tensor)); - - return tensorNode; - } else if(xDim == 2 and yDim == 2) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); + } else if constexpr (std::is_same_v) { + if (xDim == 1 and yDim == 2) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); + + ct::vector tensor = ct::dot(x, y); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + + return tensorNode; + } else if (xDim == 2 and yDim == 1) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); + + ct::vector tensor = ct::dot(x, y); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + + return tensorNode; + } + } else if constexpr (std::is_same_v) { + if (xDim == 2 and yDim == 2) { + const auto& x = std::get(lookup(xID)); + const auto& y = std::get(lookup(yID)); - ct::matrix tensor = ct::matmul(x, y); - const auto& tensorNode = tensor(); - insert(tensorID, std::move(tensor)); + ct::matrix tensor = ct::matmul(x, y); + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); - return tensorNode; + return tensorNode; + } } } From 1180b11e31ba8ed7962b2c747b4ba1eede45e3cb Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 19:28:23 +0530 Subject: [PATCH 07/34] add support for scalars --- .vscode/settings.json | 6 +++- build/lib/charmnumeric/array.py | 14 +++++---- build/lib/charmnumeric/ast.py | 40 +++++++++++++++----------- charmnumeric/array.py | 14 +++++---- charmnumeric/ast.py | 40 +++++++++++++++----------- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22641 -> 22956 bytes examples/graph.py | 35 ++++++++++++++++++---- src/ast.hpp | 26 +++++++++++++---- src/server.cpp | 6 ++-- 9 files changed, 123 insertions(+), 58 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 4a10a94..251dd1b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -11,6 +11,10 @@ "set": "cpp", "chrono": "cpp", "memory": "cpp", - "utility": "cpp" + "utility": "cpp", + "array": "cpp", + "ranges": "cpp", + "tuple": "cpp", + "variant": "cpp" } } \ No newline at end of file diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py index 5f33859..ab2f464 100644 --- a/build/lib/charmnumeric/array.py +++ b/build/lib/charmnumeric/array.py @@ -13,9 +13,9 @@ deletion_buffer_size = 0 -def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None): +def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None, is_scalar=False): z = ndarray(ndim, dtype=dtype, shape=shape, name=name, - command_buffer=command_buffer) + command_buffer=command_buffer, is_scalar=is_scalar) return z @@ -26,7 +26,7 @@ def from_numpy(nparr): class ndarray: def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, - nparr=None, name=None, command_buffer=None): + nparr=None, name=None, command_buffer=None, is_scalar=False): """ This is the wrapper class for AUM array objects. The argument 'name' should be None except when wrapping @@ -41,6 +41,7 @@ def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, self.itemsize = np.dtype(dtype).itemsize self.init_value = init_value self.command_buffer = command_buffer + self.is_scalar = is_scalar if isinstance(shape, np.ndarray) or isinstance(shape, list) or \ isinstance(shape, tuple): self.shape = np.asarray(shape, dtype=np.int32) @@ -218,6 +219,7 @@ def __rtruediv__(self, other): name=res, command_buffer=cmd_buffer) def __matmul__(self, other): + is_scalar = False if self.ndim == 2 and other.ndim == 2: res_ndim = 2 shape = np.array([self.shape[0], other.shape[1]], dtype=np.int32) @@ -225,14 +227,15 @@ def __matmul__(self, other): res_ndim = 1 shape = np.array([self.shape[0]], dtype=np.int32) elif self.ndim == 1 and other.ndim == 1: - res_ndim = 0 + res_ndim = 1 shape = np.array([1], dtype=np.int32) + is_scalar = True else: raise RuntimeError("Dimension mismatch") res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('@'), [self, other]) return create_ndarray(res_ndim, self.dtype, shape=shape, - name=res, command_buffer=cmd_buffer) + name=res, command_buffer=cmd_buffer, is_scalar=is_scalar) def _flush_command_buffer(self): # send the command to server @@ -270,6 +273,7 @@ def get(self): data_bytes = send_command_raw(Handlers.fetch_handler, cmd, reply_size=total_size) return from_bytes(data_bytes, np.dtype(self.dtype).char) else: + print("GET OSME") total_size = self.itemsize for i in self.shape: total_size*=i diff --git a/build/lib/charmnumeric/ast.py b/build/lib/charmnumeric/ast.py index 257e1be..fa9c5f2 100644 --- a/build/lib/charmnumeric/ast.py +++ b/build/lib/charmnumeric/ast.py @@ -34,24 +34,28 @@ def __init__(self, name, opcode, operands, args=[]): if isinstance(op, ndarray): self.depth = max(self.depth, 1 + op.command_buffer.depth) - ###################################################################################################################################### - # Encoding = | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # - # | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # - # NB: If opcode is 0, the encoding is limited to ID # - # Encoding = | dim | shape | val | # - # | 8 | 64 | 64 | # - # NB: Latter encoding for double constants # - ###################################################################################################################################### - def get_command(self, validated_arrays, ndim, shape, save=True): + ############################################################################################################################################### + # Marker determines whether we are dealing with a tensor, a scalar or an arithmetic type # + # Encoding = | Marker | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # + # | 8 | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # + # NB: If opcode is 0, the encoding is limited to ID # + # Encoding = | Marker | shape | val | # + # | 8 | 64 | 64 | # + # NB: Latter encoding for double constants # + ############################################################################################################################################### + def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): from charmnumeric.array import ndarray # Ndims and Shape setup - cmd = to_bytes(ndim, 'B') + if is_scalar: + cmd = to_bytes(1, 'B') + else: + cmd = to_bytes(2, 'B') + cmd += to_bytes(ndim, 'B') for _shape in shape: cmd += to_bytes(_shape, 'L') if self.opcode == 0: - print(self.operands[0].name) cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd @@ -61,28 +65,32 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(arg, 'd') cmd += to_bytes(len(self.operands), 'B') - print(len(self.operands)) for op in self.operands: if isinstance(op, ndarray): if op.name in validated_arrays: - opcmd = to_bytes(op.ndim, 'B') + if op.is_scalar: + opcmd = to_bytes(1, 'B') + else: + opcmd = to_bytes(2, 'B') + opcmd += to_bytes(op.ndim, 'B') for _shape in op.shape: opcmd += to_bytes(_shape, 'L') opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op) + if op.is_scalar: + opcmd = op.command_buffer.get_command(validated_arrays, ndim, shape, save=save_op, is_scalar=op.is_scalar) + else: + opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) if not op.valid and save_op: validated_arrays[op.name] = op elif isinstance(op, float) or isinstance(op, int): - print("SCALAR OP> ", op) opcmd = to_bytes(0, 'B') for _shape in shape: opcmd += to_bytes(_shape, 'L') opcmd += to_bytes(float(op), 'd') cmd += to_bytes(len(opcmd), 'I') cmd += opcmd - print(cmd) return cmd def plot_graph(self, validated_arrays={}, G=None, node_map={}, diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 5f33859..ab2f464 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -13,9 +13,9 @@ deletion_buffer_size = 0 -def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None): +def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None, is_scalar=False): z = ndarray(ndim, dtype=dtype, shape=shape, name=name, - command_buffer=command_buffer) + command_buffer=command_buffer, is_scalar=is_scalar) return z @@ -26,7 +26,7 @@ def from_numpy(nparr): class ndarray: def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, - nparr=None, name=None, command_buffer=None): + nparr=None, name=None, command_buffer=None, is_scalar=False): """ This is the wrapper class for AUM array objects. The argument 'name' should be None except when wrapping @@ -41,6 +41,7 @@ def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, self.itemsize = np.dtype(dtype).itemsize self.init_value = init_value self.command_buffer = command_buffer + self.is_scalar = is_scalar if isinstance(shape, np.ndarray) or isinstance(shape, list) or \ isinstance(shape, tuple): self.shape = np.asarray(shape, dtype=np.int32) @@ -218,6 +219,7 @@ def __rtruediv__(self, other): name=res, command_buffer=cmd_buffer) def __matmul__(self, other): + is_scalar = False if self.ndim == 2 and other.ndim == 2: res_ndim = 2 shape = np.array([self.shape[0], other.shape[1]], dtype=np.int32) @@ -225,14 +227,15 @@ def __matmul__(self, other): res_ndim = 1 shape = np.array([self.shape[0]], dtype=np.int32) elif self.ndim == 1 and other.ndim == 1: - res_ndim = 0 + res_ndim = 1 shape = np.array([1], dtype=np.int32) + is_scalar = True else: raise RuntimeError("Dimension mismatch") res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('@'), [self, other]) return create_ndarray(res_ndim, self.dtype, shape=shape, - name=res, command_buffer=cmd_buffer) + name=res, command_buffer=cmd_buffer, is_scalar=is_scalar) def _flush_command_buffer(self): # send the command to server @@ -270,6 +273,7 @@ def get(self): data_bytes = send_command_raw(Handlers.fetch_handler, cmd, reply_size=total_size) return from_bytes(data_bytes, np.dtype(self.dtype).char) else: + print("GET OSME") total_size = self.itemsize for i in self.shape: total_size*=i diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 257e1be..fa9c5f2 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -34,24 +34,28 @@ def __init__(self, name, opcode, operands, args=[]): if isinstance(op, ndarray): self.depth = max(self.depth, 1 + op.command_buffer.depth) - ###################################################################################################################################### - # Encoding = | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # - # | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # - # NB: If opcode is 0, the encoding is limited to ID # - # Encoding = | dim | shape | val | # - # | 8 | 64 | 64 | # - # NB: Latter encoding for double constants # - ###################################################################################################################################### - def get_command(self, validated_arrays, ndim, shape, save=True): + ############################################################################################################################################### + # Marker determines whether we are dealing with a tensor, a scalar or an arithmetic type # + # Encoding = | Marker | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # + # | 8 | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # + # NB: If opcode is 0, the encoding is limited to ID # + # Encoding = | Marker | shape | val | # + # | 8 | 64 | 64 | # + # NB: Latter encoding for double constants # + ############################################################################################################################################### + def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): from charmnumeric.array import ndarray # Ndims and Shape setup - cmd = to_bytes(ndim, 'B') + if is_scalar: + cmd = to_bytes(1, 'B') + else: + cmd = to_bytes(2, 'B') + cmd += to_bytes(ndim, 'B') for _shape in shape: cmd += to_bytes(_shape, 'L') if self.opcode == 0: - print(self.operands[0].name) cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd @@ -61,28 +65,32 @@ def get_command(self, validated_arrays, ndim, shape, save=True): cmd += to_bytes(arg, 'd') cmd += to_bytes(len(self.operands), 'B') - print(len(self.operands)) for op in self.operands: if isinstance(op, ndarray): if op.name in validated_arrays: - opcmd = to_bytes(op.ndim, 'B') + if op.is_scalar: + opcmd = to_bytes(1, 'B') + else: + opcmd = to_bytes(2, 'B') + opcmd += to_bytes(op.ndim, 'B') for _shape in op.shape: opcmd += to_bytes(_shape, 'L') opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op) + if op.is_scalar: + opcmd = op.command_buffer.get_command(validated_arrays, ndim, shape, save=save_op, is_scalar=op.is_scalar) + else: + opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) if not op.valid and save_op: validated_arrays[op.name] = op elif isinstance(op, float) or isinstance(op, int): - print("SCALAR OP> ", op) opcmd = to_bytes(0, 'B') for _shape in shape: opcmd += to_bytes(_shape, 'L') opcmd += to_bytes(float(op), 'd') cmd += to_bytes(len(opcmd), 'I') cmd += opcmd - print(cmd) return cmd def plot_graph(self, validated_arrays={}, G=None, node_map={}, diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index 4688357aac4ca56e5cc5c737cabc25ee55234f21..288c351a9a1b1a586a47877a50fb7d0a3712795f 100644 GIT binary patch delta 13863 zcmZX5V{j%+w{2|OnK%^4WI~$GT;yxARr(xAaiMT2-(nJb7_f{(2f6s5e)CYkPBn{FF?Wy{R@(? z{=m9KIh{|f?v6?r5D;Px5D?6N<{MiWI@#K}+L}698Z#O?IT?B~ICw5i@uv6GX@4qdl-h?Y zA-wwUTmst!Lkz?QI|rZdqZW*^aM(y(NhYv8XU^DUtDA=J z{@LwtlL_?W5^-oM1#Hi;3R#N%loHENlEUpHt+Du{^c3GNy__DkmvGWSfqJ^81pJ2l z!u}l~T=V@ja86vaO-_Amuw~T1&BHS;DU|duWY$KGFgatyd(}MU`4u3zTx6t4T7;H1 zkH8Yrf;xG}ER?pglneT6KYhZlntDSfYr@tz56c2i2swKXtYKB5b%)%q7~vg+3{rGj z8@%+}48pYC=u$UJXg&7p=7~Yw2#AAMrEy_N zPWv@(xcryO5Wq{@U@8Okp;S5VmA9lVb(%~*g2F#{_#wc4IEOA0E>hqF$tD@$+Odv_ z{Ilp*$?^Kx*0Mh6F!Ef93x?X z(cukg4%sQLi>lHz$anM)+^+`$JwEGzVTY+A<~>s_Y|(P*dh1~ttZ4nwKEpx^FnM7v z6Ep^=Ix*U0PT4at36gKP z5XvJgoJ0NDTT!Tej;2kotK)i&G|OU5e4p-di(y?^vdZmd@;&%ZLEx`RC=y0K%Ke9V z(e2Xab&}l=vV49N_1jS|pB8E{u$XZc1J%`Y@GVDT&H8}|@qV2nF@IE2^d~K3-zt0; zso>ob2?rviA7z4?nz0LXOYwv>LVsY}nRNW6Cd=Qe&Z6x1Q%3o_^`bK<*dyKzn--jp z?Rq${smnVlmO+%tY=Odp`kJ^S#1Q24ECr_SJqB2?$mrNS;TT*&J>6KJzETSpLfRH0XrDSCP49Ntg@?;DQMve(@S=#y_gM+XnSC1%WBhY@?V*49v-c+vX1 z@0}Y-a}I?R2TObLs$g=M3#JX9)<(3_I zbC6lXu4NBRIV6Lkc}1!V{!vwMA*hlZu`PK!&unDcrpEgh@wY1h^6hCC4M4#c9!~fj zD-|(QCNyJQXVRs*t<}J`+WDAf2=W$f$T2cBo_o-RKdgyNM4;&)R%xyM z9sQmGE8V;sI`AdN2E7o=WLlVutuN|;-X|go+f#ah6gC$9B!FdtEGu<4StU`eNug*x z%ohGK=IiGpQ${{9fN+34#Gl*An?#J7&%>Ps+*t|;WiUu+fN^0A-pd&RD+&enb7qE_D}1sp$F;L`FuZ(BcJC5wyJNlo0b zL4#Jn*eY^+l&-q!8$H_wi$x6D73`eps=C>rLkihXR2gu@K`6RO{P{bdR7e3Kn)LP< z;0O@y5r7osai|sI)ph8ft1M_3=;0>FoK`@c65=Ju+`jg2_z@a8wfb|93nmOZnEg4J9#9sRkhzl{enYR3<|)u^76* za)@8l?kT6vY7Kz8}G1V1%tooifVm zT}>%Vd4kJRZ-UAhny<#wi?vcbU5Z42$rzRjT6$U6OYyvx*enkcE}+0zrF*f6*myb^ z2!kc{wFsHfP*}Vkf{`EX8s+gDB6All(zU9&j@7?crUGXqmyoMos%!Ia z@Jm1=x;5~o59Y`mIJHoV+?F{UbK9vlInOZl+?g0!?*ly8^xCKb%4m$v zW_Gol0ACN{XED3ZL0`W8lmHJ>0Zf9soU-Sy*Ef2v&!fdh>MyhkThaE{qH8;<^IN*> zAa(dBl+i*=A{(KN19uCMEZ8`p*#f|!ZdFO%KriV@^lQOVBwgLgkH9cX zG`UMa?TfU5Y>uD%gMt6vk&DFDc?22_QnWP7)U1e#DlPaVn6qmNsrMids_?lk9-NSj zfuC}I+?PrLCF1n34&3ok&7qZB9EUt_9dcv&T^vl5=#l3%<( zreWY9xA`j3t)d9E=m%(HmqaXbvG=xn;AlZCvZ_~!-Dl5J#z&; zroR>5@$!@qFsIPJPu_1C%m>>H%k@k+rawjoe!T5ocAUeucfQeD1F5qomm)@~BWOP2 zFbmaXlJe(&g1|9LB$QT)MMd0t{x+bO<{zN2LWMfXVZ68%bMV==6j}gE1CVjVJj07b zYib1aZv&%VF6f^}|eFAZ+MhC@i1lzh)x8!ZdBXLRxZLl#<$@xFk)h&Vz>u4p9!TD{#W7Q zL4iAHb6A@ytsLSJfWMFsdE?y(OP1Kc!y#!W(r`&PCJ5~mrHkvo@mFXtGB<0%Z) z{GUnv6heb#$hp7-b=7z<>B8pe7M*wBBz}S>e_W+jO0z_V5uba< zKYIZn=$3$sSZ3GpdSBDBD(CX)t{*#tEms<>5T9h1=*u+9u<_Jejbsr)X=#hkev#A%g=~; z4dDWHgM<{uXmqW2;vk*$opxbR$dG9Y#Ngbjc4s4hju#PH#N_-U;Ntui7C^EH$zx=A z*#8i5GOT8OLcX~Eyt3j@w>qjiQT0LB?ZMpYb~1I63b& z6^VevWjm-Mgr3K_!$aYl#@11weV@MWF z#25I#2S*}`I$nb&&I07Wnwf#<5NR$=PUm=JI5P5UYoVVM*o1`F{SJsM0$s|0_t@Cw1XDD96YtUKk_)GfC#HSdaB} zskQMS!;vY%B{~NYqZJM;xLO$Rs>x3;cT^2yrsDxLXdDI59u;7u;g~z9h6ZSp1#K87 zpmt4$qGOQhJ*$Tr(=`Rn7!Mg|&UO;@T*@OI?3V7JQS7S#CXH9;A*qz!sKK3X%}VOI zsKw05e~vh;o-c$A>B~Dn^B!-6;bTD!^#@TYJcghg(_y&^k8X0%I2bAk^oD|!Sv#k~ zlez<`QEqnZx17~kX}Iz!XgAS`5T7E0Kl2RY&anrXEh=-4c>@_Sk|Hrc|KKA%B1dqy zB6xviYQkv%!82zBsE_9#4bF~VYBfDRM2~N)uM+d8u!jk+@o)M4Zy&I!Rr3dc+KN)> zO5uVf==rKiF;@9&)aa%)v)6@VQJA9e^2+jZHHny7SJhFv5if@wd_>w+v4r6Lwo$#{DkcFBIrQgd)xh`KG(antF(cR^5a;(>F@H+ z+r$f_Zs=dyg@h^AK3O+~bu$GD{N_o*S5X;k!s1_C9?J$kMWHSJsTRShaC1gp{1UPv zcr?erptdNQ7QyJqp^P#5aPLb0z`prOii*l9jQ320_GI3SU@U^C>dLZ ziVL6@7nhY$WGbdq4J^Y=1JH|^$!uyk+aM{If(u)3}uQ$biIx*hC}T38%r^)S}!!s;G{{hu;zEKgGVJgdUN zjlzpn>KWBya&4_dtHpIHxf_)}8eza|lku3do2-o(MVfoIz)bVe1RdboeF&kJ>6p{p zq}J)-`26c+gpzt`l*nrA*v5nPnv0wV0fnVqEo1rXR?4E)(_Bu)6}wnh6U}3}SK{&e3(#|Lv7CW`qY}`5$4F>z)PYdO9@3rF22OjM<2`AvT1g@d; z+Z$nKqZVoML^8krG6Qx}qTASzn<^U3JWv^pKIC_A51Vc<`}y0Z#mpnc{l1}LP%7ew=~2U1Q{UKHcr2xMacXPPWB19@_roeOg$Pq9C7lDLKe0A z3f1Utq027PA$UQAj>PtX;th!b@?m&{h2}^sx~J#ANkhzH03$sUw@$#@SfSPUryxbh zDS@5dn_&15CjR>u-~Q48nO-R5CTMrgK$1RGB3vS*Td+`qf=qx&qFTrE%ugbkeoe_p zP-T-5rfIHU2IM=v4s$|r6u1zf4nN~Ypti9_{DI`WVG?_BD9Q6A+TJ4l@x8S<;93r) z8OI`>5OBT7Vd*;nC?6mgHq1L|X6ZAeZ{WLWEbzC=4s93X1$+pkLTwHiZOrIP-49W9 zoW8|~;A}vA^5|P&{}kcRA85NTikc>lYW(E8Y^kh9yINECq|=CD7XV^zjxlWXViZ!& zR1QIY%?S_mn@Q$!!X!bO8Vc9My8EI7uhfyM2zCzKRvvr>?r0z>`q2kC8Vc!XJi-Pn zMC}aokG}&G7)_s`2sx$?L?PjOiaz8gh0Yz}l^dwea4dFm&Oi7*obePF_*hI?6&8Ay zx=mL7h31R9kg3<&k$+)ccUV^tnCmZ7jnmv{%qoi=nr>2TCSA(S8x1vk46#qp$*Wh7 z{3~2A2h(TC9F6_ev$xj-aL}(- zFlgwPXP$kf9hwbAM?iXycq}8wP)5?%?X9`2v8aRsoA=G$qn{w(^(8-H?r9m+WUG44 zI`>oyJN;PG&Pw`-SQe-**?Qlj|E5>E$pQpu-YksAS=)hE8Ml+k-S3y{=5|?a^C>It zq|MJ6;n4~T0OC}9fpY!tBc;)_M)kqU^TcG=KROIeovU7X_jab2qQ}af_b%%;t8(v% z@4CyE1uZdKr@GVvOnG|D9G#opE3F(`t?t#pzK?_Q%Z;kiVLqOE)w*4|`Iq>@zwH)Q zVHtQ>a&kaNmHCJGXSt4)N;QEqO_~PdQw>`wyG_uhPk9-QCZOk{B&2-`q zz>8#G9j@}Zy6-`J#F|iMxV&Xbj%7L0EW*s4@{A}FpBP`D+r&kdFbbbz@OBpcr2gweQ&xlCRkFfD z#t%sI&SMP3v{WdohFK zrP>Gez0m9`O=5ExMeq&X_&w&YNwgFQ{_}68tJ2QWr)$PlA>(euFD!r3(NMi+{ zG9Y9nZkF_2$~Q&2T1}p)nzcJt-?aR^qqeG7VDDzDH&_GMcG>0BP3J#n5lS^Y=3fQc z)t~z|7?~}WUuKvewE^4e zL=Wp6kD!ZrpWu*)FED3Ov22{h=ihI?_ok!DwV=4{o3+rOpmC#B$LOfurecR(FqJdL zB!0dcUi#9klbeUjrH$Rf95c+Zc5ue>ss5&%1zvuHDFB7V~?he*7s_nY#J=+ClYmJ{4 zHmX;RVb>G)-zZcj$=feF4l)=z^tn{Hmfbq`)kiMDTad_!fHbSEj_h~gDL{7jISBeG8+`=YIBF}fzy*q3sfDc?8{}2R0ag5FdAY^_VXP--O0-NJ;0h8Whfu$V% zhx1E_J7~aWlP*3??1Sk#h+!uLWN+C)gj~j(H^@)y+urcGo8H3H>73unx!m2c%Cl(; z*~;d>ow4rvPXx`2&`T&g7=gU8pi;8SHx@K@_I^W{JC6i2F5yPLq25+vO=T0T9;!od z_lmSM20ra2qeRNK zClxhWVP@Cei&SVWHl(}n%Vj>+KcrbNGR2%X8_dy~r@mv^P?~d>0YH|vLl$(FFNAus zGo{7nuf@kpy<+>;ZzhvaHVfOc*-A@p1`TAuRw0q)H#L8lzw+*ZAx>m$UUNut&sX}%;cBArODz;SbAz*oB0@5OuEE=M(1z4>XUuW?5x_cLfCY+fCDhni~19z+Vm^VE$JQ_r=^VU zlKR=%&Wg2dY`a%+2H~FJj1hQq=6e7kWGbkZ`kfT z=_qgLyYcz3Nd~eR=3u6=4KeCRdYT+)FRyBwm<}~u>vQRqcA&ikxdgMkb)j4j($6x9!Dqs`5Wi7i&S2bTIvgPiFb?^W=)Ldu!#BS1B-W(gduL;{QM9Fbo~h+|d<(S5oJ0#nI& zQtoW{4yjs(GC+_x#%&ujX4!a2>bcgolpJJsZn=5Qxv;~Y`>d0_SAN;jWKX1p~6kdJ0 zy)qGdd?h2+vNCq%K~HH;t+TZ?mk*ksaQ%$(vjc)V>bqxyKR?nr{+gciB#i=AXu)oX zTXrk+ZFz)E9-8LYE>LkP#0p1P6_zMCWn0nXs@_=>nQ4vDxvhw?D9Lr3mPc|2A-;Pe z%FfjVQU|emtkNl3!}p>S&#SA5 zp^dWY9v40)(FIDz$WQn%H{Z5rDd=O>-x zXQs+jM}rdg$tWd$3WaF-^NfI;0KonEV;Cr?9~-yV&m3StDzo>aus4m$U(kdUxtP@I zQ&$l+SRD`V=%o_C8h^+q#7K1}>5?I_li9ui%I0Y+%v$m~D)*tCx-IfH;ym!Bgce8U zDi^8Zxnc!3P+Pw@?g6tFwJ;i^MBjNyl*B9Sb3nyizVIdT$@z{S#AV6Kh+zu2o zcHUU&G}uT;+Ve;dm#JTKd>t(!o0qLDrzozFOwq)WIoqdrjHIByO-NZ;`n;cDjNGDa zpMN@!0iH=F`B>{_G$zNMC%PTycd^;o%dNC*j}Tejzmo0ip+`#@znXj&nwQ^N%_kSj ze0fkbx__PU(NEo87uQqB915$QR|44#S0Af?+Hv`mFxhdjZ-uYgUwYo(TH5y4AFj2z zH#uhkYclL*8;K%xkT&av=@oYAhSyrA=$9_TmoL-S+YET)DyxVEvTbyUS-4YdlI<^} z7Mm!`n<;EsS$+>0`xLzR)M+b*Asok3Vh$F{PS&YHHvTckeHuLb5o~jQSF-q7 zXq*QGzGXieObZVyzE6o#EzAZ^nCwSO&2Oc_+nlFoLidMs}$mn7Azc(6!@ z``r=HL)t#DSwb+PKy>bwf|k=efcNv2DGc$rPmn@Djjn4Y87Uc(4<$?@H+{6%Tj9fw z=JLA{gZr9n{GEnyo?IB?`T1?+d7&b_346)JF=;ta-Eb7x>C zcPS%wiB1%7SKU;oa+t|O<(qV(GW z8Mou6;-Mp|y-9@$j^S)Szdk=GmMXSHk!8GUj!D+Z=FN-N*?5bso|O$;`zu+^4x{1f z*r+1(mD|P9CUV=S$Z8-w!wpE&ie4%$6|z2ccW&tFulLz@xn-9~x%Yj!_v0pbD*0jg zt~kzJea&YHoE}S0d4mC&Hk;kXBr`75V33~!L8~(Uj{yK}x7@ZFErGdz$eaG|L zm2j0vD&1T3>fetp6CT_OF1S07i-dnm8ykYE{Hc9CH_~E)Qn`Vh{U*(BbD1=8PEyTD z0y9<9=u>YP$w=Ogixr zvEH}fJSlnNDZ9ij?${=+vMy7q7}YU690YJqLWV_xy2_jlcl)08=$X@JW?L-Vbq433 zdsC_1b2R<2WhB>dG)8Z{hIV3)9}&vs(98Aawr8j5+g%1cZ%x=^K6joZad#s#M`uJ~ zL)lCbyLWIM`F*IXfH*2_M!Gu$IojEjDwhCRd&C!F5 zNw5%cEQSmNhMG-?AgkbTUt)SU;ueJdFI6@(`+URaOxx@%Y@C5>7h4pmYoL-dc`NT|?ub-YpkW{sB|8#t{=A5nqUr1j}AzLyCjzVT&M9)QPM`Xx| zdXj*oSxneSFCs*S?)b<(Bz*#~h43()F})%)(BEJvFZ3itL}i}DC;})R_tD1zFoOC} z%y0)hg2Vnk*rU(>aTvHWV~yhzg9`6cbttPo|-_WozR$qrr;2lU7!n&4lh! zslIxJel_1UT$J*Y*4x*xbn~8{H`6<>Z%>LD#|Gm}t@ri0BvcnO%Z#^pdH!9#gAmX| zkX}JMoc~9%q%g$n?s4|kmdF&15J^(NiF_D>6^zMB_4rAixZ+Z6&jK6 z&>3&9G3_t7!5@RA_0W*o?JwO*B0T(yz@VbK1L`|5?N5RKejxt$a^DJhcHapR1f(4= z5ml5pv78+fXlsLhOxVNoe$n6?vwa6g&Iy{yNlx+;U`yPRXQbRnMdq@We09NyepfK>8uBl7*COWUS0qUD81k(_j%%A()e zH&@T#@Sk#TuA%AsjeFqN_wn}=@AP%sN1qM5(M{$j@QirbZ`R)sI3P{L{7YazSF>L; zYDgp%TEnWpK*`EB_2I-pluY;yaG00WOlbmx&nI3~g_zdg7>bm^cjS;<0{134+<&&! z&#SJETom0K0!Zu^^m-XZOku*^Z3SU)`YFzgM0u~p3+p*bXKKB?*YY7zRv@eZi_aj2QF z5j;5NVz#W}?9gMyj)FOeaw0$Hd70GA8yClJ>OCKVTZt5pZfThwQ9y0L%x_y9!YZ>D z1*;H+9|Ccb$SX|4VfjQ6pW|4xIVjE*^a=2z>-~teNHsM8xmX%#P@#pr{VxRJ#*Vze z7|-zjUQST^^t}wIt;S#utY|vvXzN`$4W9wZ$jiylp&oA--a8t_GU+oAw@31j$x{d! zr3eQ3t;zHLeR%agb<*up(qHJya%;5ji^Eg>H9tBQVuJnX$yX7MmSUX35Za1kYY|46 zSaR}PJEH5=q{vBd?(X&A0xF;bE29BG!=B2>VV@nlw20vk0(3*qG*u%8Yg~rj@!=oD zQ8!@dx38v%n#mH#f@`ckv!iJMBT#mAJ z{gm|W0caSVTUyrqTYACS+y|Xp2#2dwsv{vt6_%0~*a*s(NTssMUFb9osw(inWToW%w5*$OKp~S)V0w%WZxwpCk-!9MRu@&!ZcfN||MR;py z=%Jt>Yv}2)n`LLpbsu)Fo5c8O$-&0Fz7s-er`T8NM6e?Ngx^ehT4?IQH z7a6agONvR_;z|m>SfTWKd_}HBPIoCMj7FyHrPdYs8oKXnUcURQ9BmNJus}76Z@mvs zy*p-AiQ|1^>W0Z)Jhi=sWF(qy`m-A!ew#X2irb2Bi1s>@2BzUiKgm7dhUiA=Q3aPmibW%>4$ypsyBgO|F+IK2*u;6m?*&CW-Zq5W@Qyy~Jwxx{tG1QMB>M$C?gV|vE#jiE*3d(scm>^VAY-C(!d znWmK4_3RR7UsGI_^se-8hU?!;ldlo9%P+dw71dwFsm&X<4~{C;$%EMJGgpqN&Kb@l zJfSfXNJRxvId4gdXDrP8yf2u3fg<%I8xWbJ6RtfrFn9V}BfoKgQ+j0UGJPhna62=z zx=wZl#=1+tKjJrFuSCVDigO{^aZ;qd@W44i34@sV`osqGLrl2@rpHC&lK-B|T8vFM z{V0be8Rb=1gbCwEjbIL}gcXNqNfT+K)WL)!d(o=N$C!34`01@UtCY$MP0rd4pgr%5 zfU1X@feo#fqy+5+f(XUhJb^6<$~&a+!|m-Lkv?<785D@NF(h#yrW0(Pyvc^nvYkKK zmSra~THB{x1C?#4-YCWz0aUF->!4+#+8%h|H+v@TdkUwArR_}wrw4`e zBhRXF)QN#;dO$c~Q+WJH+p2w9iX1I=1J-lbe1-AzvpJ7^SPD9EGav8G2w9InLfeDa zQ<6lZhv*pi8A*&gYgvHK!1#bbg|u~j3R3(TNMM28>F6G6VF!F6t=D4>XMGGGX%|&V z2fPc%jOP=uVcfjo@$&SmvPh?}Jz!hEW1(&36~P+7N-kfegQrYi);ogr=b%)JyU~Ms zeu09!y|6u`=wM^?Ie7<^7Z}mvOuG>8gV#o@KPaOxC)=0L#@=AN=HwiA+>@8?R@Z!Mj z{MEnqI==Thp0?bRMc7Wmu8&z!)BSmet}&8+9(KZgCp7*E-{WYiN42agl5c%t*l^-? zNOB*4zQLUFnnJ~uw5;jWT*>7vYCfHQpL)WZ!o{D?C6LnQLcu~bA-nkkHc-7^5;pCm zHtY#RxYC*Y9Atjg46Z7jPHtMpy>+E@rcZzU+Q7EZ;ENW`rC~Hm1JQi}()qc$|Nd#!B z`u7tlz)wAr9K4kR6yj_90FEu#ZxLL5Q%CG8Kg-LGsv=0o?h4VXB-|3rGh!2LYU<1h zI!som^(z$wTE`bITnmB@&+cxzT9Jl|96nCDG{Ux5)d3%_CBU~R#AVUm?cLK ztyzBZCqEh<)X+P?+)1K{!0`Bp7ZdnnT8k^2Eh&JP%%3+A%vYOcpf&KH~>aocaggN2~U+*iZQ(jyY;usz~O{=38{(gRd7F_%Ys5;~?@Bjksce|=e zc&7a?6h?e9e>0H3zwSs@#UN|$NRoMedm$Hq z`T8d|(T!}xvDL+aoIGehgskW92wa8j<&dN)Cn2*it>$VLus))GH|_u-cS<9fG6hHv zQ1Xl-81O_0;2;iSXr+hqd2>|@{nV6xb6^}m68YvjbE^sNYGI&wgJRfOeG{G>Pou-2 ze`TE|$hz!}wl8*z(klxvQ_83K+BsddU&>tj_1w!J`|8?U>sSkrH0O-A>!W2cQpi&= zM!FP;X4uC}#FE?d=DP!>Q_`b~wCdMVq9TV?ugCHvmbwy*bKCb8&`et` zGdU=c`SP1(D0PNaF&y|HNwt^V8@ISyd>Hs{zG&U`+pb5iI8$;HTuB#i%oD||>=Y2F zz)8dMD-!=Sxa>>QA`o=5&@~faoa&#%QOneuXUm^N;#3dMq;diu$wPOE_+odmW1Em;5si8*PgcStC~OBAOBRjix}zrx^p?J*X)vTB9=rd0W)fnDc{O|+uBPT z%Ckr6fjc>NgndIj_b#M7)}H6Dv)tBJ$yr&T0xjeETsxZxQZMVa6#I@McWcKElYTb)88fyT6@V99hVP$09VH(RNXjn^3|?=InLL~?xzh>W1PDzPUgMl9sDKlaeDDfURcQNzF|bP zyYdv)vCGaD=mL;xS)r@_;%76EGj}AaDhcgEB>TN27ljCHcRi7zenUEc922kMATTWv zE|v4{+RiW%=7MX19{**K^!sq;(d%A^PeX+oMSEwK=nf0&mPADRv(NDS{1G6 z&`HZ4q+JXAk(|y`QYn@LK)=6l(=ikYXo0VKF(9A8OnVwOGx*0W46#%nSvo!9Y@wt6 zl(wOVL4tVrjW-4)G{}opc=HvW8gm-&$=9FYy5QspIQ$y#EBJ%}FvfgCHhe=?yh5zM z!YQD53Jex#Kk)A`Yk!2CV1c!FC=w@6%z&EK^W^1?Op_e%k-PTCD+6v>M?b zUsgktA|&{~{;Ww-S`gCz{PvWXtHwtBznsDU zF$VwR4gODS49H4+P!k0Go5-dv2&PGx=&nxwZ>TJF0124+fX8-^I delta 13494 zcmY+LQ;;Ugwyn#yZQHhO+qV6ex@=c<*>;z0v&*(^-?etcIp^+t`6467%zVm-oHNI) zZUVlm2ZmFU1qDL`0s?{p;xlc4%Y_8tGfl3BZ2lJ}P<;O)AIkJ!fI|!a3n}P86bWN_ z-Ak8~D}N{;AYBe1AatPQD-BG*lFpCgMhnUhKLGr~sOfLHL$*C4;;>$q$wfWL1&7_% zZ6F z)?%uE>F>sEk}2dGdg%T-Y;BJCB=7M`loH!1nS4q*Mam3VGbjzQDOo=N=d2X{GPKY0 zL)A@H44BtK3MXG-7Gq2BEtquUaV~1s^kO9)^CyenhJ*B+qa{==2KupX76nxe6?uhJ z3JW!<)*<>8%Y=Z6=9x~ivL1?mD;bGC@44e1Kp2baaWLNS>-Om(4>iS=$+1B@Cnv|e zm`LW`q)87Y%)*M9z*84+!UvGezffVOOH_%RvJS%@+=#I7&disyvz-g9bC$Q*(L}p0 zQ8;dCP=Zy9y+bu0)kVTfV6z=p{fQMauQ0jN%VA!RZ`d~yv%?a1j3u7Xr?Dc@34xIy z7d8Krq>pOIxwWvM#A(ehI#CfIk3M*69mMS@F;^t>>-Zu2*hByj&qbYPREM(|-i!@s zq@$txe&Xg*W$Ef49!YqFToK(FB)`Xrm3aPM;tAI}k*bF7FmJ0RR%!;Gj4H6~?Gl@S zQ=|WeG@hg+APuTTpTeS<(~9GVu&tC&EC3gF5fi&s$*?~4kS|Yze^|RWwIPdU$AH2_ z@?68va$(@AELQi2~Q-xCpMeEtk%S;1NJQWrtbb_v&!py_4%$24|hnQK(4pFsCEgbL^ z8;O+fc%-WYl-gN&9tNja1dHfV%}9O4+iQmROTg8)g>wLud3gfsw{{1!9$Z8FfUd(J2qCtC+&j#M%!Ug;H@%8}AdT7l{c@6Wu;1IiT z2%HtvxrYE4DwsYWp=Ji>iP528FA}rJ*&9X4Pg#UlEDCb}R(EVv&b(4ML|z_jEVXGv z6}1s+$mT$Less4BDId{i&We{{JOp;tv&0EPN$FTfOkP(yp!9@CM(2n{W%nBzMt@%u z+bn-X;+xdyqHy~e4*iT5@KQ|H&Nb<>gyi{Ow@CmLZhDeXr{CR<4hfmEFJ~FO6TSmh#NQ4arnVs;4Q4j&QZ?1Cd3!m)_NV= zH}!cqo&f#kXmY-7Hbh?n!7OhoknMnfSf-f|EMJhsjl2%HGqLdO?fDtqi9H||=q>^~ zu5bhh3tR1gG-10g?!Dhju;$n@Flhb0*ld?^7VZ*r-rP4XMqtiCG`Sj^<7E8)xaS_w z4l5>eR9i)R!*FK8SvKk{@Re_YR;jA7ERd5=NbLHhn0QB`7^y?H=M2K{wBVIbY2kJB z*G&CN^^C)0Lqs>vutdOO^Z~upbhQXcN%9(iUkp_Od2_8sl>W#HM0%Z3U_^)uG{Q^< z1|QV;U5ehGQVsDHM^GqPBLuulUyN=Cq8%L{t{MFD_jO=Vef&8oZN9?w`)=8_HL{5k z6Sp-Xc{dvwQW0{y#N$+^=Gwc|%@P|CmIE#f^iJfZWx%zG3o9;O0e;k&DPfHpk6h^?zX;K|> zMM7fa^AtPT+x7EFazX&v|7D4#aR1T2o1H_J9TLdJWyx#7%dh7pz$MKG3GD18uo_f` zccc^;;1lJA0J;zK#TH{Zi~w?J=+jCVt>9BEsJ;G9rx!Zxv>Ysjiw5XSQ5b%-rJZJBYH+TUonEEjC zGwc+MJ{tU@`69V3gGcvX>w@|0p!P0gIoZ8%mbHVYU?Q-&tePR+(^fFIIU4(&vm>xKy!?%1XjH0a2(hoa zrdd5H0TGogfH_Xst*(otJ-m3WE|93Z$hkIjbtZUDYWcJJwYI+C-dG)g`ds^N{CZIU z9Mz+VGkdr|>XfgAT;j39>6}kOxy5zieW&yON+HH)Emc&Y_KG-YW(3FS>DD>T%!2B{ zGQkV0!~o&5<72JOyRtLcGEj)pX7yPB3U3R^`wh16aqS2(MZJhn2Dkp; ztugUAM|xGDNU-4X&3jQ2j`*j$)*2X0NG5cs+)M4S7LVz-^Bgj^*zn^ak9B9bNiJ3Gns$YZ$Q~*(L@2c3 z`}k_q@#0Sz!S_z_s#JG*#S8&4>C-9oTamEYh%K-!qedswK+f|?M3 zX)Sxl4GyI5TEl*JDzbvyJvUKhFs0~6bK$@~>?{+*V4-~S^*xG25OGS6^RE|tYL&W_ z13P=ofcnGEgRQ&kS3K%0Og*WgW$y;@HnHm4a$;of*5Y;0>F$w;T7#aHd%5;H6s<<$VKY_8Y)v_8@5S2cE;W{p{ zKA|bth3A9zK&pu*I$AnKBv9!A#AGNI&T~i_2WZ+ZE#&72bfnWu^eL2f4 zy-QqF!t$TJ;j}cum|8Lbq%enn9ny|kJ&(e%+%c8$Cxqgy$&^UhhLy^l z!4F_ItpqBftO{Q|9<{3hkxfObBNvy{HDSz|u{Xfx`G4r)+GQML%d8^vL=!g;~=?rA;h{CfMQ9Ko>vrD)Glw+UZU5gnd48L>po2+KA zt7Cxo2juIN#59#qa-EIAcP%;-ndM2L+<`eMC}Puo=5%Yg&}K4u>}Y;%J!V&ItXTMz zrlYk`UOEPOSg(xOs@DlT10%{%BUr~;s;;gzsGwq@8xsPmsHl3EdT`#i%ZbmTz*4>N zB`x#V_?#X9Z59u~>YDO{j~yLb3lhx>YH&?yta*?8KU)cZSO2AA0D_uX0ZL5 ziFqz-H~Snj&JwcFu64Fi64N%+9l;n%-A-!iR3(W7Li?Acu%SyQX7=%-60BW;H1S#0 z=bZKwMCUjjl$z1$k3nK}7IQP{+2iLoch#LyB*{*FhcErRtH!?*Ul5zdvF^eDO+32^ zju7}v<#pW~Gq?#bfq=#olIsaM0d}~W9=Ur{QfShVXw(xVPlX08RDy@Ajij^zLy35k z+gi`-GY+DZ##CF#;+cr%Tv7D&Thk06YBLtN71IOvBl?pYM)brq)j%5(5+ORmnDXl; zx-4om*J46j972hO;9VY3;tyrrCkU2YDdpi!))*0@x|S9*{&}v~?K_*kfU#aiIyR-( zhsnH8%bm2oav0?Z1Qmie8DA@xC2uSjbN*@yeau4<@7yk>9?HBh zONw$tbT1eYhT9^ge6n>HNF<5$TBjsmsMx& zWz8t69SF6+wsb2+>nG(vykR+Gjnd zN^O!8H<6)(e(_YPLZjYA^*Y5sZJ$ELWr_=f_N$otB_?ebfTQ7D0rtEe)I}mj zv69aN%Jq0pJRb5hQ02|LYvCV$EJegJWodzLQz3HPNEtC5=?GZcO?GH{EXi(7PZ8-f1 z#b{#?a%4VSS>bgIfcrN)$rmG{h^$U!A{qx+h=Guk)?CM}tF5htsgJK|et!XCGy=Fg zk^HhuwQhSOC{DPA{qD2nP#C=&@E`HLV{sd0M*{OvMEP|~g)*B~z{DIluK}OehLul) zYJISfRjklGS`c5i9MKn#%^IZZTJ~m_o*vzVn}c?YoCE<10F$6E$FoTUyB<&IuNWu2 zTEA01D?ocWo=I8kps3Pj%pkYpHuP$ zg19S#_v(5N^;Y)_9{k`jbw(TkexY}%VXq%mMB2~OF)@Cw1fIz)+k_}x>6@2{G68?4 z7C2J{A4Kse;BN#B6FV_z>94T}Yh)H<8;M^TI!ef;!(%ZO=-Nzw3QXi=6VO$88;y0M zWSF)BFEi2#R5IGEKuZr7xTU;Gtfjp39&)<%rrr@$?NySr+U^uR{lO3>S{;_W_204BsA3(Qi!VE6 zcT>e(*ayXT$u_wrJuc~sX4oZZdRfCy1UGciy3mbN>3BzEXYIlaF|T1n?DRbeL|=?5xYUnCf3ryg>K#EfBKP zc-{AK*W-0bp;0=N1wgat6ejCW#$}I+osj4ZZfK$zz3;g9;AfEM&8z#fQU2E~{m@ePy+5x@u0*KY3`H6z9kr9#LqCDqW95J1C& zc_)C!BS4FYB`^c!155vNxug^hMd$mN7_mY20#E(>G4T$~(afR5K=;Z|VP9^|0Pcya zL}@`oks)mo)H|5uCP{5VTAS^p@Z4eE#3-p;da^*{fpEvsDS)__9Q0R%NL(b6lkiZH z0W?yu4H$_rO(3w%Xfx$Rn>;u^@UXw~Vf1LpAHcw^G;7^vQ`_WJ^HdGW0;{y51R(I^ z8Mr|}=i@};z_+nOkD*LB8vR~qfcJw5KQpi57oW($uE8`ms?AcTK4i(CV2EP3QISl# zB6h}B1|3``ruF6*0`ZOKz0Lvu)~gxhXLQlZVmLz>jLB@xr}QV(=ap$#^9@w<4dy1M zqv}yi*Gij7DBe0oL$-CvH(JU%9|qs!*w{r$ONbre2iKD5TzA4d^KrU9!|(Fm>z?D~ z;w~4q)a(3tWX|Pcywtvr)U~uqr`X0ibiXd=3$hO6445g5-iIsQ*-gLWue8G+)VuB- zIR`%Oi2$pLkG8_^r=kQ_fH`gS#<}Xud6h1sqkp$|^;b)iO|ySPvvs3+>7o6oZroa& zuB0P@f3JVf;}BJoRfT_%dx3*vp~H>cY}Zi(`$oIIMv$+ESwr=n`@>jb%36z2t$c

%n0n;#4Pw{ZZW?`ZTWk)ydZ4maD+^fa$rTnmmHL(a@nLVrs#$buZ)S2r#Az;WnsJ4v5^x`1tD5iXEWC*mXD2S+k97`$#X1W4$ zx-GghF-|F|x2&`;hqsMDM^~q?K=zjY02eqmeQ~GOSsj8Q9zk8Z7jbP}0`HL3x{6`@ zcZkmYgt(ntlY@i85sW8WW8X`u_Apk3g$0CNy$%*)QAb#n-9kz)RifbAnNdO=r zUgSir*ZR>1uy|#Xv(Cb!o;ka(H}9x=$iH}rRoBVF<>-*nyyYavZ7~OV-I6lwAuU)+ zc=Ot~pJmm%?Oo#<_LbjbKCgpZXD6@oG9>Sdk;Crzx7+#p-Lux;(n8T?v4`6uF0VVm zjR4meU}Z3D01O$_P2!UQx{OR3SL(SAZdpfN;m6VfF0n+#vm6YU}ln)pTV~QLX1!V&j8!OAi+;*P?%N%-u zlPI+fKwVDMgkfs_iZr1CVf^ZR-tDL|lollem=heJTc;gS>~1)FKeqi%>6`S2*Fs_T zI*U84S&+j-i%m1Gs48eOXgmpbhWf;AvZm#?eJjEyUxMFZq}tX<=AI(<`hvoNdU1Y^ zLG8S`23k>gBwgt#!yX%)fg}WJUyq`iBt*Ul09yFE7eZz%J3*zuPX2RPOV9AzNM}(4 zm_stOEHJykWlhMfo_7NR$`lADP*t0OgW}5*QH5bI2(@G{XnfI-3%W$eJve@%fVG{! zW(Mj6d+5ak}qOH4%mI1Nu1gI#Q?SeOepio0iH!c%`u~f?# zFsn>U1Gf<_v_8KIY3!Fik!2Es19z8iZS#|rbP`9q1)YeuqnLa0<#6GWxm=?XDTkmXHb?fCPf1!^4ajnA5F$ivEP3fDMD zJP=ZG^JROn(JQud`-x)~&Ti#@K3`)C;9*clWD1bupF3Y`ZSi?MX<7oE1&C~=`{1k; z5iG=M@b|r{o(DarvD;Qrc9}i*J_7=Jpomnu=*TiSrKqJHSL@R zI)`)+)-%i1l@Q)vqZ>S5gJ%nt5g7eEW8>?(ev5@(zJbm*{q;Yi1n@tEHy7mq1p8iT zX=HiVzwp#~oo&Fc=5Vg&E>;wd6W}jiCKurG7wH$}(@56puIFi|RP6LIbFy4af zT30>iiyU{ug2$;=IV4Q+xvH6eWxUzma^yQSUHW)@EUhXZmksWEg$?nOlww% zbqbK?pHryuc8ACAHc-YoRNnmsxSY05g(ffum#^5DRW>YsO^7?)lv3_xC1$}PZqy$z z_d7|)_C%;=P(!lE2^b77(b>^I&kZ%0WoJHZ^xOlE6H`2@xLNPE#&(1x(H82_Q!?AuCQ zA%Ndh@`_eE22eGw*s%3>ob2a1zaM=ad@g-YekDT!@TBdjaL@GUsJs7iFGrqmIYl#u zQ{;6yTbxhX-TiI4o9OQKy6m3uH53`fO&2?O*5T+RB(Hu zA8h9dK6{+#wr%!2r98yuWi4;bDW09k5_EW-n%b&&`B*Njqj3JIzQL++OzJkyV432m z-i>z|0sX0RAaBMg0=K+A-3@TyW<(WB^*x4pL@GXnn}#!rZxN9l!FzLmX__(9V7%m9 z%W(MPUKZjrBCV_lxG*}_8;fp`3Sa5((z6G~33lax`rX#oqtd_cjp;AnOnsKgq*x5W z5p{kUlEiG0@hGD@($Fj@^@NwDcXaNx6u2UXtIe)iXeY*;r(TEYIL035UYtthwh*UP=)n~#aOO%28GQsWS>QP zrgiBWr8EMd_=^>Snklr|lcmt&+i)PoL>Tn$;+~Q^GrELEC;Hh7$6ZZ;xk}?f+a-`1 zBLJ--g2{=S1p8^BN^9U@@BXS*Vu>8Qc@HpHNQPq18|!>JL$$njhZrxsO-z4nET5Bk zr@RI2Cq^*_kb1`USAbmNiyLccCz^nWsUT=p&M;|4tOJC{Ny$tuRr%E~Urh(0UTBNR z%^T@5Dp5<%!BtfU{<1l)CnbTps!-Tg4!A#!9g!y6b@|!q8K%)RGN3JyMcH6PwNOO0 zjI>_mneNbfweEhU0qj0DTFmBKB{EXF-G&le+A#3|)6kNx3wLw+mx&yPd@KvhI_WH_ zaVs$&6z8>^2DV*za$E)7(ULbzT%)}ivKTcpZ}-R8x(gi__^ty!uU{RZPpeYA7KUpN z@1)6c3mxX~pS)9cABN{A=SkD?l-2RQjf|&n<0UECkk_Tj zz-_?k(V5M_)Mp}~TiAY9xc>v-B z5Ee1Ns-%F-o28RUh#Ah&hF_b+A0_uvNn|G#Q^o)*Yr(8So#JxP%s}(5g78(Aiy=pp za?+pLqQ0+t`+u1zKY{(Gv*~jn1Km`Ko2W`nrmyUsH`R?Lpd3*hh`;O*VxhFi9Kmi-ucmQRttX`AO+;#s_V zo^Oq#;3Sk1=TXU?q7P z)^|ka5|%WY4nFb?b?=FjHhmR3aRjqgmgZHPSAl+%qkp;xu?EAeR9leEY&_{ zpOKh`mg9I!cF}#*9?Q7^x^X5u6)_lR2lYprjr#8Kb^WHxDOl-`@1st_r`9Y7 zjxC#>wQReO5uR-hwX?yVNlaS*;)lh>nD<{9jGBo zp`5Uh*B^5eU9o?J=xK8s`mW^D|hia(*7+gl*_>k$an1@y|5$3-^?wewU6e%ofYdIr>_ zOj**GjD2j`GQw)X^0G{V@Zsy_$so*JWw#Q3@2lOTi#MQzU?!<6(`BPr;-Sy@W9=?P z5ost3E%e;j?%951slN7HtIt>75|^M0%=p6zDJQ82B|dLBRyH34D$t9Et%`@Mb#-np z=u`5P$wvaLQ9^cNv`Lt+^#~bnvxd_a@eG|!426WU$T6dAID|2%xCE7orEPe?x4{Z& zU2vKNwL5*61xBYM@0KZ(o=pvPn_KGI>3R5?E5!%Xl1B;$MHS6;A3-T}%g0YAIuQi}`|CDYe=7_}piVBT9g!JTRlMteuyg-l&ID1CVL6jhZSVOvi zj3o=@0x!T9V@eSwGzsl)SPwi16$(KFWkZ6D8i>G+-GF){L%n5<$qIb~jfoT00lkS5 z0l*F=g&2T|8;&#s`xw2)H|!4m-tlR=Dajq1KG_p`3l{1C>?qTdMEit+30nACB@wM; z$=O=i$hmmj;EhS(I;BWnQ4T{3evvXu_E~5SaEkD=1%JctBj2&_2o|U8yEo*UHb*)s zvZ}gtCtUQ|HhHd(8}xYFYsNO0%^hf6w>1vC^a0!GSQhzz6lyOWsv>=}dUSVw5*R1*Z&s6yvg%w(6_uS+K=16yQK2^Y`M7Yy} z?AitqI`QU&W>CkCiMTPy3gDM28=K&FS-%XtlP)ohzjK25x0^qD2j|$nkj-BsSO8+$ zf`w7HUxKQa?tZeb%?!RhH!V_kFyf%Ln7NMR#^dy5uT8zcDL_CE8cRO zyX1hJ+%%snHKnWm_MfshhgVvMvI8<5e_>ueZQ5Bd3v=VOaKe0oQdewq(!O~D8z_J- z*QVg2aM>#aETbA(45$Xnk^*oQa?knKUfv_@aQK{;lRt=l)mz$0+}_0}1Q~SoF{nu% z@R6f;7Gi}=8*$#DayqYJnU(TaGm_{9HaMk8R>SXHWBtB|T)i@_&coyZ`JVgh_VL)m zpty2<)e{+cwG9i02`3NkpS{zW0~|LNV|VNyOvxDC6o`cr&;=aj*#Lml*zgTeOr$Z8 zldoeDXCzj{>gO^8vkcy+7|~Y7zZ*M_EHuWOvk)Cjm|#MbKccp8A248xBj&8@Om+5~ z$nJ=Qw!^c$dvVUt2CWKeT}Es=8O{IOpp%Bh*;l>iuobf`J`pBLTBaDPou{cuq?N~- zkD>_z5uV={{F;q0S_tq$dqCnIp#nC{`b>xDX>Mo4a6T@P_;Xt_3ouT8dw(C?cf0|Q z?*dk9Oj!?)B^apZ?lu~iotJcPZS1&x4OXv5A1hNkv@Lv>_O?gYQ9w{9e_0OIpvh_AJ%HDt?Y1dKv zTVR4plVTjglo5%e3O;D z&@olLf0VpN+?s4T+FRoP+Ry!7;vW@zz2Hl8;3K&C7@iY!FJCl;msp|c_wS&BN<@}H zQh7fKQPM)B4PH7dwN@Zp5=sz4n!+x82O$8hn!cOLLI&&vi+=1Q*#Rpy2Hy#Wq8NmL zgfo@7b<0 z8aGK^UijwDtGP%@Etts3F>M^DrmJ7$(l(e-uP30|Emy-8?35_K(IQRR7d-!oFjB`= zBl!ixk7DEUI1XRpAKKxbIs4(&XkZR^Kvz5n zbv<2v)^eu7aYjwxNOQDTRzz`4GR0hbovOKui34Daf#}byIzi3;6R~~Imvdnlua*X( zXubjKDVs4ka60$tm`KD{cadnwK3o;AV(2YcXwbIsq)ek4eE{oBuTlSV4(s_BdCGi- z%m9I$0b7vnVARa7{zvzd2=c2zGQtB#C@;;D;oU->v^zWwAIE{q`+NyP^OqgtkQexP zrYK-sLb|MxVc<(WSd}3bZ`LLs; z&JkQQ^KMCOy=5Si-7K^mU^SrrK)Gk3Z%F|0veGRTQQ$^M*VWk356GW&Wv{QeF`>XX zOP?tfbW9Y|v&NAOiOS7au))|`tzI$RfF8#Ayi0jr9kCAco|guQbzr8sWj-9OBKx~i z+Nsn{D-MfBuQ_6_cE$Jh^EEzE@c2ty3IaOU zf6`WeG;AKj&p=~Njl{AdpJd`DR_8z$nwfIu3^?`K$t4#sR<1}V%Wf`ZTJh27AqDN* zTR$GIXFF6acjO2?qA^;fI)rWi9%NC>d-~s=??Rf(W;K_{lWhd_0xdGfSR2o3=faL7E%Jl>(X zV%eI6uh6kaPDe(fN2|K1>8*9PajNeS^YAR&{7xDq%=x(8OybnXMBHvmi*YMO|3c^75B}`e-GtYuFf6y!Eqg@&*{aAb@C_Fve6=HTKi)jgRKAiG>}?WtMWVGbDoc3zsUWUS;}vBO0TcRbawo7t$D9L(d=&_S%$rrqy9 zyp8%}UeiHr9l}-N5P4DjPH$9dui0g&0*3>M`S%KK(vzVF!D8Kmzg|PHs;9v{@-@}g znoNI@T8Fm)H7nh=TY3vpl*v7gBhf*Xg0{!jq|FJ?eg*XCOjl z-miB5TAcciGg>kGE6TUZOKQWMrucjTPnHf`z1cRSH!dbcXl%Pc@}ClyQ2C34l-=Sm zl>qc?GmvaCc?X80Bl|!jx?NtqtuRfN{*#vFlg6@ zyMiFiM@Q)8ejo=-nKKS3!-exr5~t7LfLTjRlILSYrBk|OvZ0!IM^U@lfyoz0B`eKBt!U2?yG?12`bLb=z<|9byaD{{nziw3K> z+wUguHnn0FSZi|z)5dnF0A@v+KQjNu2Xne8~B}d&0 zQ;^`I+on_!AO6PR13G&|4toH0&>OXRcLIA-Lc)2eEHopV0u2y5Pd^g(0tqnKMHX|Q z3Sh~<;>rRb`64v`Q|u*J|Hp`8@xi-BoO!DClC>1@OOFg7F^qt|HOFSxzNc^V-u7kj zfv;zGrESko)j}Z0Ylw<#5RD(1hkC{n$ze#4MHpqMQ@|;m7L`(>+_+w=6s=ZB+wM8s zA@5=*)z#1@D83@wa-Ly85}RJ(P1ED~Ym+L|j#?F9&^HVgVP#Nd-xF|uK3>ujAHU7# zGn!b-k3@Q)PNMyQnJjK&7Yjc^DTXATJ~~@+)?voVgs_c=#(9H$#$c5oCEMki8;5fDYc)i)uZlm z6^96*pcrx~Q|n>Ao7ry|4oVMK;O2VF2SSKk%2DwjwE6OA2>7t^kJs8E1+H*D`@Emp zvZ&|~u=c7X2cpC)} z1yIdUzHCt^pW~*7^)Bxo&xAK0-6y`0!ipx9uxLC_qmpU>@J{A^I#$$K#jNySt-F2}JgR!7F(5CSlQr%4y@Gd`X*w>P164mz=>3{jd-5vr| zW0rIJ@><&6;zli~$LHZ+Zp=7BA=2%|r8RYhAPO@A`+d~5H>J&~eST#|#Lwcb*tq!f&IP}OQ}Bo^=8nN# zT4nn8>9|Qe`gkK;O-yhkb5oGy7knB~3bmJoy9;ofJ-}eoM$`YK`H>&gYqmSnJph0S z>?s5NoIh!&zR@+D2{+sbtJ>qQ-e_A4hn@%52JO68Ud0r5pDEFTH2I3S@am#s0kH+G zB=0-_f5O!yBwh>mkISj)Rv8@hU$@hX8vnoGP#5|aUh2~SVo?1*EjSPwBLB{0G(r&m zyAvmQLxY|0zum4tK&byT_x#Uj4su9l)D!~dNp{i{0vX3kZqy|EcdJ!RJP;Cs= 42 - r = b.where(42, 69) + q = v @ b + q.get() + v1 = q @ c + v2 = b @ c + # q1 = q @ v + v3 = b @ c + + v1.get() + v2.get() + v3.get() + + res = v3*8 + v1 - 4 + v2.abs() + + res.get() + + final_res = res + 42 + + # q.get() + # w = c @ b + # w.get() + # res = q.abs() + c + baka = final_res.get() + print(baka) + # r = b.where(42, 69) # g = b.where(v, c) # z = ~r # g = v.abs() + b + 32 # k = g + c * 8 # k = g + 1 # k = g + 2 * c - 3 * v - q = r.get() - print(q) + # q = k.get() + # print(res.get()) # w = c # for i in range(5): # y = v + b + w diff --git a/src/ast.hpp b/src/ast.hpp index 951a868..3fdbdb6 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -10,7 +10,7 @@ using ctop = ct::util::Operation; using ct_name_t = uint64_t; -using ct_array_t = std::variant; +using ct_array_t = std::variant; std::unordered_map symbol_table; inline static void insert(ct_name_t name, ct_array_t arr) noexcept { @@ -43,6 +43,8 @@ inline T peek(char* &msg) noexcept { } std::pair getMatmulOperand(char* cmd) { + uint8_t marker = extract(cmd); + if (marker != 2) CmiAbort("Matmuls only supported with Tensor Types"); uint8_t dim = extract(cmd); if (dim < 1 || dim > 2) CmiAbort("Matmuls not supported with dimension%" PRIu8 "", dim); cmd += dim * sizeof(uint64_t); @@ -86,12 +88,12 @@ std::shared_ptr to_ct_unary(uint64_t opcode, const std::vect template std::vector faster_tortoise(char *cmd) { - uint8_t dims = extract(cmd); - ckout << "DIMS> " << dims << endl; + uint8_t marker = extract(cmd); + ckout << "Marker> " << marker << endl; std::vector shape; shape.reserve(2); - if (dims == 0) { + if (marker == 0) { if constexpr (std::is_same_v) { shape.push_back(extract(cmd)); } else if constexpr (std::is_same_v) { @@ -104,6 +106,9 @@ std::vector faster_tortoise(char *cmd) return {temp_node}; } + uint8_t dims = extract(cmd); + ckout << "DIMS> " << dims << endl; + for(uint8_t i = 0; i < dims; i++) shape.push_back(extract(cmd)); ckout << "SHAPE> " << shape[0] << endl; @@ -114,8 +119,16 @@ std::vector faster_tortoise(char *cmd) ckout << "TENSORID> " << tensorID << endl; if (opcode == 0) { - const auto& tmp = std::get(lookup(tensorID)); - return tmp(); + if (marker == 1) { + ckout << "GOT ME A SCALAR TYPE YES" << endl; + auto& tmp = std::get(lookup(tensorID)); + double result = tmp.get(); + tensorAstNodeType temp_node(0, ctop::broadcast, result, shape); + return {temp_node}; + } else { + const auto& tmp = std::get(lookup(tensorID)); + return tmp(); + } } // Args for custom unops/binops @@ -141,6 +154,7 @@ std::vector faster_tortoise(char *cmd) // 2. a dot product returning a vector if one operand is a matrix and the other a vector // 3. a gemm returning a matrix if both the operands are matrices if (ctopcode == ctop::matmul) { + ckout << "IN MATMUL" << endl; uint32_t operand_size = extract(cmd); std::pair xOperandInfo = getMatmulOperand(cmd); cmd += operand_size; diff --git a/src/server.cpp b/src/server.cpp index bc1c919..e87fdc5 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -142,9 +142,9 @@ void Main::execute_operation(int epoch, int size, char *cmd) remove(name); } CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); - - if (peek(cmd) == 1) faster_tortoise(cmd); - else if (peek(cmd) == 2) faster_tortoise(cmd); + char* tagPos = cmd + sizeof(uint8_t); + if (peek(tagPos) == 1) faster_tortoise(cmd); + else if (peek(tagPos) == 2) faster_tortoise(cmd); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) From 00ce7005a876984a5571cf28e12fa15d3d94287f Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 19:46:08 +0530 Subject: [PATCH 08/34] expand matmuls to operator on rvalues as well --- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22956 -> 22956 bytes examples/graph.py | 26 +++++++++++++------------- src/ast.hpp | 21 ++++++++++++++++----- 3 files changed, 29 insertions(+), 18 deletions(-) diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index 288c351a9a1b1a586a47877a50fb7d0a3712795f..892d8544641b46ebd75227ede9e7ecc0daa5cb0e 100644 GIT binary patch delta 176 zcmZ3pnQ_f#MxFp~W)=|!1_lm>g>yIZ%wuO-ICt_I_AMYPiNgj&E#OE7QLLPfKx(rG zXR#=VyI1Zhh`U7oo;ZkZG2g`tq8(h<@qp+Lfvn6xdh+)WTM%Uy>HwlDLOns$;ZTrK lli9+YfYfHMupUMb_eOXuh%%3e0#TDAB0$tz2<0A`3IOGxMdJVf delta 176 zcmZ3pnQ_f#MxFp~W)=|!1_lm>OEWj}%wuP|G;{JA_AMYPiNgj&E#OE7QLLPfKx(rG zXR#=VyI1Zhh`U7oo;ZkZG2g`tq8(h<@qp+Lfvn6xdh+)WTM%Uy>HwlDLOns$;ZTrK lli9+YfYfHMupUMb_eOXuh%%3e0#TDAB0$tz2<0A`3IHTPNx=XB diff --git a/examples/graph.py b/examples/graph.py index 6adc400..dbf2450 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -13,29 +13,29 @@ def f(): c = ndarray(1, 10, np.float64, init_value=-30) # k = v * 2 + b + 3 + c - 32 # l = k >= 42 - q = v @ b - q.get() - v1 = q @ c - v2 = b @ c + v1 = (b + c) @ (b - c) + # q.get() + # v1 = q @ c + # v2 = b @ c # q1 = q @ v - v3 = b @ c + # v3 = b @ c - v1.get() - v2.get() - v3.get() + # v1.get() + # v2.get() + # v3.get() - res = v3*8 + v1 - 4 + v2.abs() + # res = v3 * 8 + v1 - 4 + v2.abs() - res.get() + # res.get() - final_res = res + 42 + # final_res = res + 42 # q.get() # w = c @ b # w.get() # res = q.abs() + c - baka = final_res.get() - print(baka) + # baka = final_res.get() + print(v1.get()) # r = b.where(42, 69) # g = b.where(v, c) # z = ~r diff --git a/src/ast.hpp b/src/ast.hpp index 3fdbdb6..e9a8a4f 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -42,15 +42,26 @@ inline T peek(char* &msg) noexcept { return *(reinterpret_cast(msg)); } +template +std::vector faster_tortoise(char *cmd, bool flush = false); + +template std::pair getMatmulOperand(char* cmd) { + char* recurse_cmd = cmd; + uint8_t marker = extract(cmd); if (marker != 2) CmiAbort("Matmuls only supported with Tensor Types"); + uint8_t dim = extract(cmd); if (dim < 1 || dim > 2) CmiAbort("Matmuls not supported with dimension%" PRIu8 "", dim); + cmd += dim * sizeof(uint64_t); + uint32_t opcode = extract(cmd); - if (opcode) CmiAbort("Matmuls not supported with rvalues"); + if (opcode) faster_tortoise(recurse_cmd, true); + cmd += sizeof(bool); + uint64_t tensorID = extract(cmd); return {dim, tensorID}; } @@ -86,7 +97,7 @@ std::shared_ptr to_ct_unary(uint64_t opcode, const std::vect } template -std::vector faster_tortoise(char *cmd) +std::vector faster_tortoise(char *cmd, bool flush) { uint8_t marker = extract(cmd); ckout << "Marker> " << marker << endl; @@ -156,10 +167,10 @@ std::vector faster_tortoise(char *cmd) if (ctopcode == ctop::matmul) { ckout << "IN MATMUL" << endl; uint32_t operand_size = extract(cmd); - std::pair xOperandInfo = getMatmulOperand(cmd); + std::pair xOperandInfo = getMatmulOperand(cmd); cmd += operand_size; operand_size = extract(cmd); - std::pair yOperandInfo = getMatmulOperand(cmd); + std::pair yOperandInfo = getMatmulOperand(cmd); cmd += operand_size; const uint8_t& xDim = xOperandInfo.first; @@ -324,7 +335,7 @@ std::vector faster_tortoise(char *cmd) } } - if (store) { + if (store or flush) { tensorType tensor(ast); const auto& tensorNode = tensor(); insert(tensorID, std::move(tensor)); From 1b032f1365e9c7086c75f663c43381443f01f673 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 20:22:01 +0530 Subject: [PATCH 09/34] test matmuls --- dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22956 -> 22956 bytes examples/graph.py | 9 +++++---- src/ast.hpp | 8 -------- src/server.cpp | 1 - 4 files changed, 5 insertions(+), 13 deletions(-) diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg index 892d8544641b46ebd75227ede9e7ecc0daa5cb0e..9f29ae08ccc98b5821358cd240cd8b981f57115c 100644 GIT binary patch delta 176 zcmZ3pnQ_f#MxFp~W)=|!1_lm>yhR&%=CL#7EtmtcMZAy%8P@qRbg>yIZ%wuO-ICt_I_AMYPiNgj&E#OE7QLLPfKx(rG zXR#=VyI1Zhh`U7oo;ZkZG2g`tq8(h<@qp+Lfvn6xdh+)WTM%Uy>HwlDLOns$;ZTrK lli9+YfYfHMupUMb_eOXuh%%3e0#TDAB0$tz2<0A`3IOGxMdJVf diff --git a/examples/graph.py b/examples/graph.py index dbf2450..98a5bd6 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -8,12 +8,13 @@ set_max_depth(10) def f(): - v = ndarray(2, [10, 10], np.float64, init_value=-20) - b = ndarray(1, 10, np.float64, init_value=10) - c = ndarray(1, 10, np.float64, init_value=-30) + v = ndarray(2, [10, 10], np.float64, init_value=20) + b = ndarray(2, [10, 10], np.float64, init_value=10) + # c = ndarray(1, 10, np.float64, init_value=-30) # k = v * 2 + b + 3 + c - 32 # l = k >= 42 - v1 = (b + c) @ (b - c) + v1 = v @ b + # v1 = (b + c) @ (b - c) # q.get() # v1 = q @ c # v2 = b @ c diff --git a/src/ast.hpp b/src/ast.hpp index e9a8a4f..2d73cb5 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -100,7 +100,6 @@ template std::vector faster_tortoise(char *cmd, bool flush) { uint8_t marker = extract(cmd); - ckout << "Marker> " << marker << endl; std::vector shape; shape.reserve(2); @@ -112,26 +111,21 @@ std::vector faster_tortoise(char *cmd, bool flush) shape.push_back(extract(cmd)); } double value = extract(cmd); - ckout << "VAL> " << value << endl; tensorAstNodeType temp_node(0, ctop::broadcast, value, shape); return {temp_node}; } uint8_t dims = extract(cmd); - ckout << "DIMS> " << dims << endl; for(uint8_t i = 0; i < dims; i++) shape.push_back(extract(cmd)); - ckout << "SHAPE> " << shape[0] << endl; uint32_t opcode = extract(cmd); bool store = extract(cmd); uint64_t tensorID = extract(cmd); - ckout << "TENSORID> " << tensorID << endl; if (opcode == 0) { if (marker == 1) { - ckout << "GOT ME A SCALAR TYPE YES" << endl; auto& tmp = std::get(lookup(tensorID)); double result = tmp.get(); tensorAstNodeType temp_node(0, ctop::broadcast, result, shape); @@ -158,14 +152,12 @@ std::vector faster_tortoise(char *cmd, bool flush) std::vector ast; uint8_t numOperands = extract(cmd); - ckout << "NUM OPERANDS> " << numOperands << endl; // when we encounter a matmul, we treat it as a : // 1. a dot product returning a scalar if both the operands are vectors // 2. a dot product returning a vector if one operand is a matrix and the other a vector // 3. a gemm returning a matrix if both the operands are matrices if (ctopcode == ctop::matmul) { - ckout << "IN MATMUL" << endl; uint32_t operand_size = extract(cmd); std::pair xOperandInfo = getMatmulOperand(cmd); cmd += operand_size; diff --git a/src/server.cpp b/src/server.cpp index e87fdc5..ab03c3c 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -259,7 +259,6 @@ void Main::execute_creation(int epoch, int size, char *cmd) void Main::execute_fetch(int epoch, int size, char *cmd) { ct_name_t name = extract(cmd); - ckout << "name> " << name << endl; ct_array_t &arr = lookup(name); char *reply = nullptr; int reply_size = 0; From 26d62a5d6fbc1af80316a4d17a3798f20e94f531 Mon Sep 17 00:00:00 2001 From: anant Date: Mon, 13 Oct 2025 15:37:22 +0530 Subject: [PATCH 10/34] ok --- build/lib/charmnumeric/array.py | 144 ++++++++++++++++++++++++++++---- build/lib/charmnumeric/ccs.py | 60 ++++++++++++- charmnumeric/array.py | 144 ++++++++++++++++++++++++++++---- charmnumeric/ccs.py | 60 ++++++++++++- config.cmake | 6 +- examples/custom_ops.py | 26 ++++++ examples/run.sh | 2 +- src/ast.hpp | 41 ++++++++- 8 files changed, 434 insertions(+), 49 deletions(-) create mode 100644 examples/custom_ops.py diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py index ab2f464..fd6430d 100644 --- a/build/lib/charmnumeric/array.py +++ b/build/lib/charmnumeric/array.py @@ -292,60 +292,170 @@ def copy(self): cmd_buffer = ASTNode(res, OPCODES.get('copy'), [self]) return create_ndarray(self.ndim, self.dtype,shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) + + def where(self, other, third): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def exp(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('exp'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def abs(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def negate(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('negate'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def square(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('sqare'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + def sqrt(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[0.5]) + cmd_buffer = ASTNode(res, OPCODES.get('sqrt'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def cbrt(self): + def reciprocal(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('reciprocal'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def sin(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('sin'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def cos(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[1/3]) + cmd_buffer = ASTNode(res, OPCODES.get('cos'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def relu(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('relu'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def pow(self, exponent): + def scale(self, scalar): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[exponent]) + cmd_buffer = ASTNode(res, OPCODES.get('scale'), [self], args=[scalar]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log(self, base=np.e): + def add_constant(self, constant): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) + cmd_buffer = ASTNode(res, OPCODES.get('add_constant'), [self], args=[constant]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log10(self): + def add(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[10]) + cmd_buffer = ASTNode(res, OPCODES.get('add'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def subtract(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('subtract'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log2(self): + def multiply(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[2]) + cmd_buffer = ASTNode(res, OPCODES.get('multiply'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def exp(self): + def divide(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('exp'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('divide'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def modulo(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('modulo'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def power(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('power'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def max(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('max'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def min(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('min'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def greater_than(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('greater_than'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def abs(self): + def less_than(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('less_than'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def where(self, other, third): + def equal(self, other, epsilon=1e-5): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) + cmd_buffer = ASTNode(res, OPCODES.get('equal'), [self, other], args=[epsilon]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) + def atan2(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('atan2'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def weighted_average(self, other, w1, w2): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('weighted_average'), [self, other], + args=[w1, w2]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + def any(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('any'), [self]) diff --git a/build/lib/charmnumeric/ccs.py b/build/lib/charmnumeric/ccs.py index 3e4639f..1fd8e69 100644 --- a/build/lib/charmnumeric/ccs.py +++ b/build/lib/charmnumeric/ccs.py @@ -9,10 +9,62 @@ next_name = 0 epoch = 0 -OPCODES = {'+': 1, '-': 2, '*': 3 ,'/': 4, '@': 5, 'copy': 6, 'axpy': 7, - 'axpy_multiplier': 8, 'setitem': 9, 'pow': 10, '>': 11, - '<': 12, '>=': 13, '<=': 14, '==': 15, '!=': 16, '&': 17, - '|': 18, '!':19, 'where':20, 'log': 21, 'exp': 22, 'abs': 23, 'any':24, 'all':25} +### custom opcodes +# EXP, LOG, ABS, NEGATE, SQUARE, SQRT, RECIPROCAL, SIN, COS, RELU, SCALE, ADD_CONSTANT, ADD, SUBTRACT, +# MULTIPLY, DIVIDE, POWER, MODULO, MAX, MIN, GREATER_THAN, LESS_THAN, EQUAL, ATAN2, WEIGHTED_AVERAGE + +OPCODES = { + # base_op + '+': 1, + '-': 2, + '*': 3, + '/': 4, + '@': 5, + 'copy': 6, + 'axpy': 7, + 'axpy_multiplier': 8, + 'setitem': 9, + 'pow': 10, + '>': 11, + '<': 12, + '>=': 13, + '<=': 14, + '==': 15, + '!=': 16, + '&': 17, + '|': 18, + '!': 19, + 'where': 20, + + # custom_unary_op + 'exp': 41, + 'log': 42, + 'abs': 43, + 'negate': 44, + 'square': 45, + 'sqrt': 46, + 'reciprocal': 47, + 'sin': 48, + 'cos': 49, + 'relu': 50, + 'scale': 51, + 'add_constant': 52, + + # custom_binary_op + 'add': 71, + 'subtract': 72, + 'multiply': 73, + 'divide': 74, + 'power': 75, + 'modulo': 76, + 'max': 77, + 'min': 78, + 'greater_than': 79, + 'less_than': 80, + 'equal': 81, + 'atan2': 82, + 'weighted_average': 83 +} INV_OPCODES = {v: k for k, v in OPCODES.items()} diff --git a/charmnumeric/array.py b/charmnumeric/array.py index ab2f464..fd6430d 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -292,60 +292,170 @@ def copy(self): cmd_buffer = ASTNode(res, OPCODES.get('copy'), [self]) return create_ndarray(self.ndim, self.dtype,shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) + + def where(self, other, third): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def exp(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('exp'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def abs(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def negate(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('negate'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def square(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('sqare'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + def sqrt(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[0.5]) + cmd_buffer = ASTNode(res, OPCODES.get('sqrt'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def cbrt(self): + def reciprocal(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('reciprocal'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def sin(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('sin'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def cos(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[1/3]) + cmd_buffer = ASTNode(res, OPCODES.get('cos'), [self]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def relu(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('relu'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def pow(self, exponent): + def scale(self, scalar): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('pow'), [self], args=[exponent]) + cmd_buffer = ASTNode(res, OPCODES.get('scale'), [self], args=[scalar]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log(self, base=np.e): + def add_constant(self, constant): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) + cmd_buffer = ASTNode(res, OPCODES.get('add_constant'), [self], args=[constant]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log10(self): + def add(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[10]) + cmd_buffer = ASTNode(res, OPCODES.get('add'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def subtract(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('subtract'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log2(self): + def multiply(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[2]) + cmd_buffer = ASTNode(res, OPCODES.get('multiply'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def exp(self): + def divide(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('exp'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('divide'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def modulo(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('modulo'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def power(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('power'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def max(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('max'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def min(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('min'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def greater_than(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('greater_than'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def abs(self): + def less_than(self, other): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('less_than'), [self, other]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def where(self, other, third): + def equal(self, other, epsilon=1e-5): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) + cmd_buffer = ASTNode(res, OPCODES.get('equal'), [self, other], args=[epsilon]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) + def atan2(self, other): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('atan2'), [self, other]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def weighted_average(self, other, w1, w2): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('weighted_average'), [self, other], + args=[w1, w2]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + def any(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('any'), [self]) diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 3e4639f..1fd8e69 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -9,10 +9,62 @@ next_name = 0 epoch = 0 -OPCODES = {'+': 1, '-': 2, '*': 3 ,'/': 4, '@': 5, 'copy': 6, 'axpy': 7, - 'axpy_multiplier': 8, 'setitem': 9, 'pow': 10, '>': 11, - '<': 12, '>=': 13, '<=': 14, '==': 15, '!=': 16, '&': 17, - '|': 18, '!':19, 'where':20, 'log': 21, 'exp': 22, 'abs': 23, 'any':24, 'all':25} +### custom opcodes +# EXP, LOG, ABS, NEGATE, SQUARE, SQRT, RECIPROCAL, SIN, COS, RELU, SCALE, ADD_CONSTANT, ADD, SUBTRACT, +# MULTIPLY, DIVIDE, POWER, MODULO, MAX, MIN, GREATER_THAN, LESS_THAN, EQUAL, ATAN2, WEIGHTED_AVERAGE + +OPCODES = { + # base_op + '+': 1, + '-': 2, + '*': 3, + '/': 4, + '@': 5, + 'copy': 6, + 'axpy': 7, + 'axpy_multiplier': 8, + 'setitem': 9, + 'pow': 10, + '>': 11, + '<': 12, + '>=': 13, + '<=': 14, + '==': 15, + '!=': 16, + '&': 17, + '|': 18, + '!': 19, + 'where': 20, + + # custom_unary_op + 'exp': 41, + 'log': 42, + 'abs': 43, + 'negate': 44, + 'square': 45, + 'sqrt': 46, + 'reciprocal': 47, + 'sin': 48, + 'cos': 49, + 'relu': 50, + 'scale': 51, + 'add_constant': 52, + + # custom_binary_op + 'add': 71, + 'subtract': 72, + 'multiply': 73, + 'divide': 74, + 'power': 75, + 'modulo': 76, + 'max': 77, + 'min': 78, + 'greater_than': 79, + 'less_than': 80, + 'equal': 81, + 'atan2': 82, + 'weighted_average': 83 +} INV_OPCODES = {v: k for k, v in OPCODES.items()} diff --git a/config.cmake b/config.cmake index 973ccd8..24bd89b 100644 --- a/config.cmake +++ b/config.cmake @@ -1,8 +1,8 @@ -set(CHARM_DIR "/home/shogo/master/Kale/charm/netlrts-linux-x86_64") -set(BASE_DIR "/home/shogo/master/Kale/LibCharmtyles") +set(CHARM_DIR "/home/anant/winter2024/lbp/study/charm/netlrts-linux-x86_64") +set(BASE_DIR "/home/anant/sem7/LibCharmtyles") set(EIGEN_DIR "/usr/include/eigen3") set(CUDA_DIR "/path/to/CUDA/directory") -set(KOKKOS_DIR "/home/shogo/master/Kale/LibCharmtyles/kokkos/install") +set(KOKKOS_DIR "${BASE_DIR}/kokkos/install") set(CHARMC "${CHARM_DIR}/bin/charmc") set(CPU_OPTS "-c++-option -std=c++20 -O3 -march=native -DNDEBUG") diff --git a/examples/custom_ops.py b/examples/custom_ops.py new file mode 100644 index 0000000..8776c1b --- /dev/null +++ b/examples/custom_ops.py @@ -0,0 +1,26 @@ +from charmnumeric.array import connect, ndarray +from charmnumeric.ast import set_max_depth +from charmnumeric.ccs import enable_debug +import charmnumeric.linalg as lg +import numpy as np + +set_max_depth(10) + +def f(): + v = ndarray(1, 50, np.float64, init_value=-20) + b = ndarray(1, 50, np.float64, init_value=1) + c = ndarray(1, 50, np.float64, init_value=30) + d = ndarray(1, 50, np.float64, init_value=5) + + g1 = v.abs().add(b).weighted_average(c, 0.7, 0.3) + g2 = b.log().exp() + g3 = v.abs().scale(2).scale(2).add_constant(29) + b + 32 + + + print("k1:", g1.get()) + print("k2:", g2.get()) + print("k3:", g3.get()) + +if __name__ == '__main__': + connect("127.0.0.1", 10000) + f() diff --git a/examples/run.sh b/examples/run.sh index b887cdb..43bee95 100755 --- a/examples/run.sh +++ b/examples/run.sh @@ -1,4 +1,4 @@ cd .. python setup.py install cd examples -python graph.py +python custom_ops.py diff --git a/src/ast.hpp b/src/ast.hpp index 2d73cb5..340cb3c 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -67,6 +67,8 @@ std::pair getMatmulOperand(char* cmd) { } ctop inline to_ctop(uint64_t opcode) noexcept { + if(opcode>=41 and opcode<=52) return ctop::unary_expr; + if(opcode>=71 and opcode<=83) return ctop::binary_expr; switch (opcode) { case 0: return ctop::noop; case 1: return ctop::add; @@ -84,18 +86,49 @@ ctop inline to_ctop(uint64_t opcode) noexcept { case 18: return ctop::logical_or; case 19: return ctop::logical_not; case 20: return ctop::where; - case 23: return ctop::unary_expr; default: return ctop::noop; } } std::shared_ptr to_ct_unary(uint64_t opcode, const std::vector& args) noexcept { switch(opcode) { - case 23: return ct::unary_ops::abs(args); + case 41: return ct::unary_ops::exp(args); + case 42: return ct::unary_ops::log(args); + case 43: return ct::unary_ops::abs(args); + case 44: return ct::unary_ops::negate(args); + case 45: return ct::unary_ops::square(args); + case 46: return ct::unary_ops::sqrt(args); + case 47: return ct::unary_ops::reciprocal(args); + case 48: return ct::unary_ops::sin(args); + case 49: return ct::unary_ops::cos(args); + case 50: return ct::unary_ops::relu(args); + case 51: return ct::unary_ops::scale(args); + case 52: return ct::unary_ops::add_constant(args); default: return nullptr; } } +std::shared_ptr to_ct_binary(uint64_t opcode, const std::vector& args) noexcept { + switch(opcode) { + case 71: return ct::binary_ops::add(args); + case 72: return ct::binary_ops::subtract(args); + case 73: return ct::binary_ops::multiply(args); + case 74: return ct::binary_ops::divide(args); + case 75: return ct::binary_ops::power(args); + case 76: return ct::binary_ops::modulo(args); + case 77: return ct::binary_ops::max(args); + case 78: return ct::binary_ops::min(args); + case 79: return ct::binary_ops::greater_than(args); + case 80: return ct::binary_ops::less_than(args); + case 81: return ct::binary_ops::equal(args); + case 82: return ct::binary_ops::atan2(args); + case 83: return ct::binary_ops::weighted_average(args); + default: return nullptr; + } +} + + + template std::vector faster_tortoise(char *cmd, bool flush) { @@ -146,7 +179,9 @@ std::vector faster_tortoise(char *cmd, bool flush) ctop ctopcode = to_ctop(opcode); if (ctopcode == ctop::unary_expr) { rootNode = tensorAstNodeType(-1, ctopcode, to_ct_unary(opcode, args), shape); - } else { + } else if(ctopcode==ctop::binary_expr){ + rootNode = tensorAstNodeType(-1, ctopcode, to_ct_binary(opcode, args), shape); + }else { rootNode = tensorAstNodeType(ctopcode, shape); } std::vector ast; From 34323ef0d7345197888480743e863d682cbf2004 Mon Sep 17 00:00:00 2001 From: anant Date: Mon, 13 Oct 2025 20:47:20 +0530 Subject: [PATCH 11/34] just push it --- build/lib/charmnumeric/array.py | 14 ++++++++++---- charmnumeric/array.py | 14 ++++++++++---- examples/custom_ops.py | 4 ++-- 3 files changed, 22 insertions(+), 10 deletions(-) diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py index fd6430d..09d0e8d 100644 --- a/build/lib/charmnumeric/array.py +++ b/build/lib/charmnumeric/array.py @@ -305,15 +305,21 @@ def exp(self): return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log(self): + def log(self, base = np.e): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log(self): + def log10(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args = [10]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log2(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args = [2]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index fd6430d..09d0e8d 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -305,15 +305,21 @@ def exp(self): return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log(self): + def log(self, base = np.e): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) - def log(self): + def log10(self): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self]) + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args = [10]) + return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), + name=res, command_buffer=cmd_buffer) + + def log2(self): + res = get_name() + cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args = [2]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), name=res, command_buffer=cmd_buffer) diff --git a/examples/custom_ops.py b/examples/custom_ops.py index 8776c1b..0a640d5 100644 --- a/examples/custom_ops.py +++ b/examples/custom_ops.py @@ -8,12 +8,12 @@ def f(): v = ndarray(1, 50, np.float64, init_value=-20) - b = ndarray(1, 50, np.float64, init_value=1) + b = ndarray(1, 50, np.float64, init_value=10) c = ndarray(1, 50, np.float64, init_value=30) d = ndarray(1, 50, np.float64, init_value=5) g1 = v.abs().add(b).weighted_average(c, 0.7, 0.3) - g2 = b.log().exp() + g2 = b.log(2).exp() g3 = v.abs().scale(2).scale(2).add_constant(29) + b + 32 From bfb8cb485a8e8a5c3e55e6575bc38012b144b5f9 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 20:50:27 +0530 Subject: [PATCH 12/34] cleanup --- build/lib/charmnumeric/__init__.py | 1 - build/lib/charmnumeric/array.py | 475 --------------------- build/lib/charmnumeric/ast.py | 145 ------- build/lib/charmnumeric/ccs.py | 176 -------- build/lib/charmnumeric/linalg.py | 22 - charmnumeric.egg-info/PKG-INFO | 143 ------- charmnumeric.egg-info/SOURCES.txt | 12 - charmnumeric.egg-info/dependency_links.txt | 1 - charmnumeric.egg-info/requires.txt | 12 - charmnumeric.egg-info/top_level.txt | 1 - dist/charmnumeric-0.1.dev0-py3.12.egg | Bin 22956 -> 0 bytes playground/ha.py | 12 - src/Makefile | 21 - src/server.decl.h | 190 --------- src/server.def.h | 453 -------------------- 15 files changed, 1664 deletions(-) delete mode 100644 build/lib/charmnumeric/__init__.py delete mode 100644 build/lib/charmnumeric/array.py delete mode 100644 build/lib/charmnumeric/ast.py delete mode 100644 build/lib/charmnumeric/ccs.py delete mode 100644 build/lib/charmnumeric/linalg.py delete mode 100644 charmnumeric.egg-info/PKG-INFO delete mode 100644 charmnumeric.egg-info/SOURCES.txt delete mode 100644 charmnumeric.egg-info/dependency_links.txt delete mode 100644 charmnumeric.egg-info/requires.txt delete mode 100644 charmnumeric.egg-info/top_level.txt delete mode 100644 dist/charmnumeric-0.1.dev0-py3.12.egg delete mode 100644 playground/ha.py delete mode 100644 src/Makefile delete mode 100644 src/server.decl.h delete mode 100644 src/server.def.h diff --git a/build/lib/charmnumeric/__init__.py b/build/lib/charmnumeric/__init__.py deleted file mode 100644 index a1c1976..0000000 --- a/build/lib/charmnumeric/__init__.py +++ /dev/null @@ -1 +0,0 @@ -__version__ = '0.1.dev' diff --git a/build/lib/charmnumeric/array.py b/build/lib/charmnumeric/array.py deleted file mode 100644 index 09d0e8d..0000000 --- a/build/lib/charmnumeric/array.py +++ /dev/null @@ -1,475 +0,0 @@ -import sys -import warnings -import numpy as np -import weakref -import sys -from charmnumeric.ast import get_max_depth, ASTNode -from charmnumeric.ccs import to_bytes, from_bytes, send_command_raw, send_command, \ - send_command_async, connect, get_creation_command, \ - get_epoch, get_name, get_fetch_command, Handlers, OPCODES, is_debug - - -deletion_buffer = b'' -deletion_buffer_size = 0 - - -def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None, is_scalar=False): - z = ndarray(ndim, dtype=dtype, shape=shape, name=name, - command_buffer=command_buffer, is_scalar=is_scalar) - return z - - -def from_numpy(nparr): - return ndarray(nparr.ndim, dtype=nparr.dtype, shape=nparr.shape, - nparr=nparr) - - -class ndarray: - def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, - nparr=None, name=None, command_buffer=None, is_scalar=False): - """ - This is the wrapper class for AUM array objects. - The argument 'name' should be None except when wrapping - an array that already exists on the AUM backend server - """ - - if ndim > 2: - raise NotImplementedError("Arrays of dimensionality greater than" - "2 not supported yet") - self.dtype = dtype - self.ndim = ndim - self.itemsize = np.dtype(dtype).itemsize - self.init_value = init_value - self.command_buffer = command_buffer - self.is_scalar = is_scalar - if isinstance(shape, np.ndarray) or isinstance(shape, list) or \ - isinstance(shape, tuple): - self.shape = np.asarray(shape, dtype=np.int32) - elif shape is not None: - self.shape = np.asarray([shape], dtype=np.int32) - else: - self.shape = np.zeros(self.ndim, dtype=np.int32) - self.valid = False - if command_buffer is None: - self.valid = True - if name: - self.name = name - #self.command_buffer = None - self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) - else: - self.name = get_name() - if nparr is not None: - buf = nparr.tobytes() - else: - buf = None - cmd = get_creation_command(self, self.name, self.shape, buf=buf) - send_command_async(Handlers.creation_handler, cmd) - #self.command_buffer = None - self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) - else: - self.name = name - max_depth = get_max_depth() - if self.command_buffer.depth >= max_depth: - if is_debug(): - print("Maximum AST depth exceeded for %i, " - "flushing buffer" % self.name) - self._flush_command_buffer() - - def __del__(self): - global deletion_buffer, deletion_buffer_size - if self.valid: - deletion_buffer += to_bytes(self.name, 'L') - deletion_buffer_size += 1 - - def __len__(self): - return self.shape[0] - - #def __str__(self): - # print(self.get()) - - #def __repr__(self): - # #self._flush_command_buffer() - # # FIXME add repr - # pass - - def __setitem__(self, key, value): - if not isinstance(key, slice) or key.start != None or \ - key.stop != None or key.step != None: - raise ValueError("Can't set items or slices") - self.cmd_buffer = ASTNode(res, OPCODES.get('setitem'), [self, value]) - - def __neg__(self): - return self * -1 - - def __add__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('+'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __radd__(self, other): - return self + other - - def __sub__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('-'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rsub__(self, other): - return -1 * (self - other) - - def __lt__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('<'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rlt__(self, other): - return self >= other - - def __gt__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('>'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rgt__(self, other): - return self <= other - - def __le__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('<='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rle__(self, other): - return self > other - - def __ge__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('>='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rge__(self, other): - return self < other - - def __eq__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('=='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __req__(self, other): - return self == other - - def __ne__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('!='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rne__(self, other): - return self != other - - def __and__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('&'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rand__(self, other): - return self & other - - def __or__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('|'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __ror__(self, other): - return self | other - - def __invert__(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('!'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __mul__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('*'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rmul__(self, other): - return self * other - - def __truediv__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('/'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __rtruediv__(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('/'), [1., self/other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def __matmul__(self, other): - is_scalar = False - if self.ndim == 2 and other.ndim == 2: - res_ndim = 2 - shape = np.array([self.shape[0], other.shape[1]], dtype=np.int32) - elif self.ndim == 2 and other.ndim == 1: - res_ndim = 1 - shape = np.array([self.shape[0]], dtype=np.int32) - elif self.ndim == 1 and other.ndim == 1: - res_ndim = 1 - shape = np.array([1], dtype=np.int32) - is_scalar = True - else: - raise RuntimeError("Dimension mismatch") - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('@'), [self, other]) - return create_ndarray(res_ndim, self.dtype, shape=shape, - name=res, command_buffer=cmd_buffer, is_scalar=is_scalar) - - def _flush_command_buffer(self): - # send the command to server - # finally set command buffer to array name - global deletion_buffer, deletion_buffer_size - debug = is_debug() - if debug: - self.command_buffer.plot_graph() - if self.valid: - return - validated_arrays = {self.name : self} - cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape) - reply_size = 0 - for name, arr in validated_arrays.items(): - reply_size += 8 + 8 * arr.ndim - if not debug: - cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd - cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd - send_command_async(Handlers.operation_handler, cmd) - deletion_buffer = b'' - deletion_buffer_size = 0 - for i in range(len(validated_arrays)): - arr = validated_arrays[name] - arr.validate() - else: - for name, arr in validated_arrays.items(): - arr.validate() - self.validate() - - def get(self): - self._flush_command_buffer() - cmd = get_fetch_command(self) - if self.ndim == 0: - total_size = self.itemsize - data_bytes = send_command_raw(Handlers.fetch_handler, cmd, reply_size=total_size) - return from_bytes(data_bytes, np.dtype(self.dtype).char) - else: - print("GET OSME") - total_size = self.itemsize - for i in self.shape: - total_size*=i - data_ptr = send_command_raw(Handlers.fetch_handler, cmd, reply_size=int(total_size)) - return np.frombuffer(data_ptr, np.dtype(self.dtype)).copy().reshape(self.shape) - - def evaluate(self): - self._flush_command_buffer() - - def validate(self): - self.valid = True - self.command_buffer = ASTNode(self.name, 0, [self]) - - def copy(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('copy'), [self]) - return create_ndarray(self.ndim, self.dtype,shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def where(self, other, third): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def exp(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('exp'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def log(self, base = np.e): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args=[base]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def log10(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args = [10]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def log2(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('log'), [self], args = [2]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def abs(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('abs'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def negate(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('negate'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def square(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('sqare'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def sqrt(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('sqrt'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def reciprocal(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('reciprocal'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def sin(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('sin'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def cos(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('cos'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def relu(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('relu'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def scale(self, scalar): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('scale'), [self], args=[scalar]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def add_constant(self, constant): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('add_constant'), [self], args=[constant]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def add(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('add'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def subtract(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('subtract'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def multiply(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('multiply'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def divide(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('divide'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def modulo(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('modulo'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def power(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('power'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def max(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('max'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def min(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('min'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def greater_than(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('greater_than'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def less_than(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('less_than'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def equal(self, other, epsilon=1e-5): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('equal'), [self, other], args=[epsilon]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def atan2(self, other): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('atan2'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def weighted_average(self, other, w1, w2): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('weighted_average'), [self, other], - args=[w1, w2]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - - def any(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('any'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - def all(self): - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get('all'), [self]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - diff --git a/build/lib/charmnumeric/ast.py b/build/lib/charmnumeric/ast.py deleted file mode 100644 index fa9c5f2..0000000 --- a/build/lib/charmnumeric/ast.py +++ /dev/null @@ -1,145 +0,0 @@ -import numpy as np -import networkx as nx -import matplotlib.pyplot as plt -from ctypes import c_long -from networkx.drawing.nx_pydot import graphviz_layout -from charmnumeric.ccs import OPCODES, INV_OPCODES, to_bytes - - -max_depth = 10 - - -def set_max_depth(d): - global max_depth - max_depth = d - - -def get_max_depth(): - global max_depth - return max_depth - - -class ASTNode(object): - def __init__(self, name, opcode, operands, args=[]): - from charmtiles.array import ndarray - # contains opcode, operands - # operands are ndarrays - self.name = name - self.opcode = opcode - self.operands = operands - self.depth = 0 - self.args = args - if self.opcode != 0: - for op in self.operands: - if isinstance(op, ndarray): - self.depth = max(self.depth, 1 + op.command_buffer.depth) - - ############################################################################################################################################### - # Marker determines whether we are dealing with a tensor, a scalar or an arithmetic type # - # Encoding = | Marker | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # - # | 8 | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # - # NB: If opcode is 0, the encoding is limited to ID # - # Encoding = | Marker | shape | val | # - # | 8 | 64 | 64 | # - # NB: Latter encoding for double constants # - ############################################################################################################################################### - def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): - from charmnumeric.array import ndarray - - # Ndims and Shape setup - if is_scalar: - cmd = to_bytes(1, 'B') - else: - cmd = to_bytes(2, 'B') - cmd += to_bytes(ndim, 'B') - for _shape in shape: - cmd += to_bytes(_shape, 'L') - - if self.opcode == 0: - cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') - return cmd - - cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'L') - cmd += to_bytes(len(self.args), 'I') - for arg in self.args: - cmd += to_bytes(arg, 'd') - - cmd += to_bytes(len(self.operands), 'B') - for op in self.operands: - if isinstance(op, ndarray): - if op.name in validated_arrays: - if op.is_scalar: - opcmd = to_bytes(1, 'B') - else: - opcmd = to_bytes(2, 'B') - opcmd += to_bytes(op.ndim, 'B') - for _shape in op.shape: - opcmd += to_bytes(_shape, 'L') - opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') - else: - save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - if op.is_scalar: - opcmd = op.command_buffer.get_command(validated_arrays, ndim, shape, save=save_op, is_scalar=op.is_scalar) - else: - opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) - if not op.valid and save_op: - validated_arrays[op.name] = op - elif isinstance(op, float) or isinstance(op, int): - opcmd = to_bytes(0, 'B') - for _shape in shape: - opcmd += to_bytes(_shape, 'L') - opcmd += to_bytes(float(op), 'd') - cmd += to_bytes(len(opcmd), 'I') - cmd += opcmd - return cmd - - def plot_graph(self, validated_arrays={}, G=None, node_map={}, - color_map={}, next_id=0, parent=None, save=True): - from charmnumeric.array import ndarray - if G is None: - G = nx.Graph() - if self.opcode == 0: - node_map[next_id] = 'a' + str(self.operands[0].name) - G.add_node(next_id) - if parent is not None: - G.add_edge(parent, next_id) - return next_id + 1 - opnode = next_id - G.add_node(next_id) - if parent is not None: - G.add_edge(parent, next_id) - node_map[next_id] = INV_OPCODES.get(self.opcode, '?') - if save: - color_map[next_id] = 'tab:red' - node_map[next_id] += (': a%i' % self.name) - next_id += 1 - for op in self.operands: - # an operand can also be a double - if isinstance(op, ndarray): - if op.name in validated_arrays: - G.add_node(next_id) - G.add_edge(opnode, next_id) - node_map[next_id] = 'a' + str(op.name) - color_map[next_id] = 'tab:green' - next_id += 1 - else: - save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - if not op.valid and save_op: - #color_map[next_id] = 'tab:red' - validated_arrays[op.name] = op - next_id = op.command_buffer.plot_graph( - validated_arrays, G, node_map, color_map, next_id, - opnode, save_op) - elif isinstance(op, float) or isinstance(op, int): - G.add_node(next_id) - G.add_edge(opnode, next_id) - node_map[next_id] = op - next_id += 1 - if parent is None: - pos = graphviz_layout(G, prog='dot') - color_map_list = [color_map.get(node, 'tab:blue') for node in G] - nx.draw(G, pos, labels=node_map, node_color=color_map_list, - node_size=600, font_size=10) - plt.show() - return next_id - diff --git a/build/lib/charmnumeric/ccs.py b/build/lib/charmnumeric/ccs.py deleted file mode 100644 index 1fd8e69..0000000 --- a/build/lib/charmnumeric/ccs.py +++ /dev/null @@ -1,176 +0,0 @@ -import struct -import atexit -from pyccs import Server -from charmnumeric import array - -debug = False -server = None -client_id = 0 -next_name = 0 -epoch = 0 - -### custom opcodes -# EXP, LOG, ABS, NEGATE, SQUARE, SQRT, RECIPROCAL, SIN, COS, RELU, SCALE, ADD_CONSTANT, ADD, SUBTRACT, -# MULTIPLY, DIVIDE, POWER, MODULO, MAX, MIN, GREATER_THAN, LESS_THAN, EQUAL, ATAN2, WEIGHTED_AVERAGE - -OPCODES = { - # base_op - '+': 1, - '-': 2, - '*': 3, - '/': 4, - '@': 5, - 'copy': 6, - 'axpy': 7, - 'axpy_multiplier': 8, - 'setitem': 9, - 'pow': 10, - '>': 11, - '<': 12, - '>=': 13, - '<=': 14, - '==': 15, - '!=': 16, - '&': 17, - '|': 18, - '!': 19, - 'where': 20, - - # custom_unary_op - 'exp': 41, - 'log': 42, - 'abs': 43, - 'negate': 44, - 'square': 45, - 'sqrt': 46, - 'reciprocal': 47, - 'sin': 48, - 'cos': 49, - 'relu': 50, - 'scale': 51, - 'add_constant': 52, - - # custom_binary_op - 'add': 71, - 'subtract': 72, - 'multiply': 73, - 'divide': 74, - 'power': 75, - 'modulo': 76, - 'max': 77, - 'min': 78, - 'greater_than': 79, - 'less_than': 80, - 'equal': 81, - 'atan2': 82, - 'weighted_average': 83 -} - -INV_OPCODES = {v: k for k, v in OPCODES.items()} - -def enable_debug(): - global debug - debug = True - -def disable_debug(): - global debug - debug = False - -def is_debug(): - global debug - return debug - -def get_name(): - global next_name - curr_name = next_name - next_name += 1 - return (client_id << 56) + curr_name - -def to_bytes(value, dtype='I'): - return struct.pack(dtype, value) - -def from_bytes(bvalue, dtype='I'): - return struct.unpack(dtype, bvalue)[0] - -def send_command_raw(handler, msg, reply_size): - if server is not None: - server.send_request(handler, 0, msg) - return server.receive_response(reply_size) - -def send_command(handler, msg, reply_size=1, reply_type='B'): - global server - if server is not None: - return from_bytes(send_command_raw(handler, msg, reply_size), reply_type) - -def send_command_async(handler, msg): - global server - if server is not None: - server.send_request(handler, 0, msg) - -def get_epoch(): - global epoch - curr_epoch, epoch = epoch, epoch + 1 - return curr_epoch - -def connect(server_ip, server_port): - global server, client_id, debug - if not debug: - server = Server(server_ip, server_port) - server.connect() - client_id = send_command(Handlers.connection_handler, "") - atexit.register(disconnect) - -def disconnect(): - from charmnumeric.array import deletion_buffer, deletion_buffer_size - global client_id, deletion_buffer, deletion_buffer_size - if deletion_buffer_size > 0: - cmd = to_bytes(len(deletion_buffer), 'I') + deletion_buffer - cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd - send_command_async(Handlers.delete_handler, cmd) - deletion_buffer = b'' - deletion_buffer_size = b'' - cmd = to_bytes(client_id, 'B') - cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd - send_command_async(Handlers.disconnection_handler, cmd) - -def get_creation_command(arr, name, shape, buf=None): - """ - Generate array creation CCS command - """ - cmd = to_bytes(name, 'L') - cmd += to_bytes(arr.ndim, 'I') - cmd += to_bytes(buf is not None, '?') - cmd += to_bytes(arr.init_value is not None, '?') - for s in shape: - cmd += to_bytes(int(s), 'L') - if buf is not None: - cmd += buf - elif arr.init_value is not None: - cmd += to_bytes(arr.init_value, 'd') - print(cmd) - cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd - return cmd - -def get_fetch_command(arr): - """ - Generate CCS command to fetch entire array data - """ - cmd = to_bytes(arr.name, 'L') - cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd - return cmd - -def sync(): - # FIXME remove the size from the cmd - cmd = to_bytes(get_epoch(), 'i') + to_bytes(0, 'I') - send_command_raw(Handlers.sync_handler, cmd, 1) - -class Handlers(object): - connection_handler = b'aum_connect' - disconnection_handler = b'aum_disconnect' - creation_handler = b'aum_creation' - operation_handler = b'aum_operation' - fetch_handler = b'aum_fetch' - delete_handler = b'aum_delete' - sync_handler = b'aum_sync' - exit_handler = b'aum_exit' - diff --git a/build/lib/charmnumeric/linalg.py b/build/lib/charmnumeric/linalg.py deleted file mode 100644 index 823b7d5..0000000 --- a/build/lib/charmnumeric/linalg.py +++ /dev/null @@ -1,22 +0,0 @@ -import sys -import struct -import numpy as np -from pyccs import Server -from charmnumeric.ccs import OPCODES, get_name, send_command, Handlers -from charmnumeric.array import create_ndarray -from charmnumeric.ast import ASTNode - - -def axpy(a, x, y, multiplier=None): - operands = [a, x, y] - if multiplier is not None: - operands.append(multiplier) - operation = 'axpy_multiplier' - else: - operation = 'axpy' - res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get(operation), operands) - return create_ndarray(x.ndim, x.dtype, - name=res, command_buffer=cmd_buffer) - - diff --git a/charmnumeric.egg-info/PKG-INFO b/charmnumeric.egg-info/PKG-INFO deleted file mode 100644 index ec960a2..0000000 --- a/charmnumeric.egg-info/PKG-INFO +++ /dev/null @@ -1,143 +0,0 @@ -Metadata-Version: 2.4 -Name: charmnumeric -Version: 0.1.dev0 -Summary: A python library for distributed array computations -Home-page: https://github.com/UIUC-PPL/PyProject -Author: Aditya Bhosale -Author-email: adityapb1546@gmail.com -License: BSD -Classifier: Development Status :: 4 - Beta -Classifier: Intended Audience :: Developers -Classifier: Intended Audience :: Science/Research -Classifier: License :: OSI Approved :: BSD License -Classifier: Natural Language :: English -Classifier: Operating System :: MacOS :: MacOS X -Classifier: Operating System :: POSIX -Classifier: Operating System :: Unix -Classifier: Programming Language :: Python -Classifier: Programming Language :: Python :: 3 -Classifier: Topic :: Software Development :: Libraries -Classifier: Topic :: Utilities -Requires-Dist: numpy -Requires-Dist: charm4py -Provides-Extra: docs -Requires-Dist: sphinx; extra == "docs" -Provides-Extra: tests -Requires-Dist: pytest; extra == "tests" -Provides-Extra: dev -Requires-Dist: sphinx; extra == "dev" -Requires-Dist: pytest; extra == "dev" -Dynamic: author -Dynamic: author-email -Dynamic: classifier -Dynamic: description -Dynamic: home-page -Dynamic: license -Dynamic: provides-extra -Dynamic: requires-dist -Dynamic: summary - -charmnumeric -========== - -:code:`charmnumeric` is a python interface to a C++ distributed array library -implemented using Charm++ [#charm]_. -charmnumeric uses a client-server model with a client-side python -interface and a Charm++ server on the backend. The client and server -are connected using CCS [#ccs]_. -The server maintains a symbol table of distributed arrays which -are then looked up for computation when a CCS message is -received. - -:code:`charmnumeric.array` ----------------------- - -.. highlight:: python - -:code:`charmnumeric.array.ndarray`, analogous to :code:`numpy.ndarray`, is a proxy -object that wraps the name of the corresponding array on the server. -We use a lazy evaluation scheme for array computations. -The array operations incrementally build an AST which is stored in a buffer in the -:code:`ndarray` object. This AST is encoded into a CCS message when -either the data from the array is requested on the frontend or -when the size of the AST grows beyond a user configurable -threshold. -The server side Charm++ program decodes the CCS message and -rebuilds the AST which is then executed. - -The lazy evaluation scheme reduces the number of CCS messages required to -be sent from the client to the server. -It also helps in reducing the number of temporary arrays created on the -server side by accessing the reference counts of the frontend arrays in -the python runtime. For example:: - - v = ndarray(1, 10, np.float64) - b = ndarray(1, 10, np.float64) - c = ndarray(1, 10, np.float64) - w = c - for i in range(2): - y = v + b + w - z = v - y - w = 2 * (c - z) + b - w.evaluate() - -The above code snippet generates the following AST. Nodes with labels -starting with the letter :code:`a` are arrays. Nodes with an operation -label that are colored blue are operations that generate a temporary -array. Nodes with an operation label that are colored red are operations -that generate arrays that are to be stored on the server side. -The red node labels also show the name of the resulting array. -Note that the arrays :code:`y`, :code:`z` and :code:`w` for the first iteration -of the loop are considered to be temporary because they are overwritten -in the next iteration. These operations will be executed inplace -on the server side. - -.. figure:: docs/images/simple_ast.png - :alt: simple_ast - - *AST generated by the above code snippet* - -Here's another example of a conjugate gradient solver:: - - def solve(A, b): - x = ndarray(1, 1000, np.float64) - r = b - A @ x - p = r.copy() - rsold = r @ r - - for i in range(1000): - Ap = A @ p - alpha = rsold / (p @ Ap) - - x = lg.axpy(alpha, p, x) - r = lg.axpy(alpha, Ap, r, multiplier=-1.) - - rsnew = r @ r - - if np.sqrt(rsnew.get()) < 1e-8: - print("Converged in %i iterations" % (i + 1)) - break - - p = lg.axpy(rsnew / rsold, p, r) - rsold = rsnew - - return x - -This generates the following AST, - -.. figure:: docs/images/conj_ast.png - :alt: conj_ast - - *AST generated by the conjugate gradient example* - -Here the green nodes are arrays that do not exist on the server when the AST is -sent, but will be created and stored as a result of an operation in the current -AST before being referenced. - - -References ----------- - -.. [#charm] Charm++ Documentation - https://charm.readthedocs.io/en/latest/ -.. [#ccs] CCS Documentation - https://charm.readthedocs.io/en/latest/converse/manual.html?converse-client-server-interface#converse-client-server-interface - diff --git a/charmnumeric.egg-info/SOURCES.txt b/charmnumeric.egg-info/SOURCES.txt deleted file mode 100644 index 88cd83e..0000000 --- a/charmnumeric.egg-info/SOURCES.txt +++ /dev/null @@ -1,12 +0,0 @@ -README.rst -setup.py -charmnumeric/__init__.py -charmnumeric/array.py -charmnumeric/ast.py -charmnumeric/ccs.py -charmnumeric/linalg.py -charmnumeric.egg-info/PKG-INFO -charmnumeric.egg-info/SOURCES.txt -charmnumeric.egg-info/dependency_links.txt -charmnumeric.egg-info/requires.txt -charmnumeric.egg-info/top_level.txt \ No newline at end of file diff --git a/charmnumeric.egg-info/dependency_links.txt b/charmnumeric.egg-info/dependency_links.txt deleted file mode 100644 index 8b13789..0000000 --- a/charmnumeric.egg-info/dependency_links.txt +++ /dev/null @@ -1 +0,0 @@ - diff --git a/charmnumeric.egg-info/requires.txt b/charmnumeric.egg-info/requires.txt deleted file mode 100644 index 33d7698..0000000 --- a/charmnumeric.egg-info/requires.txt +++ /dev/null @@ -1,12 +0,0 @@ -numpy -charm4py - -[dev] -sphinx -pytest - -[docs] -sphinx - -[tests] -pytest diff --git a/charmnumeric.egg-info/top_level.txt b/charmnumeric.egg-info/top_level.txt deleted file mode 100644 index 590bcaf..0000000 --- a/charmnumeric.egg-info/top_level.txt +++ /dev/null @@ -1 +0,0 @@ -charmnumeric diff --git a/dist/charmnumeric-0.1.dev0-py3.12.egg b/dist/charmnumeric-0.1.dev0-py3.12.egg deleted file mode 100644 index 9f29ae08ccc98b5821358cd240cd8b981f57115c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 22956 zcmaI7bF64Vvpslh+vgtJwr$(Ct$S?Swr$(CZQFC-yvh9D_cED3D(StGs$Q$Jv#WY_ zwY(HC2nqlI00e+-fsB@F1V*$2H~@e<5&!_=zqcY{Vl)!6qH=WdQvW^3RyVNQWJmg* z(PKC>ba2I-gIba=K>Ze z-nTbpe>`zm%2kO=hH-pxJML-Mr>_9pExV&PnT-vsBSX^#) z-pryc+YU*#89N&g5QM%gTi|8aaTZV^&Q8>#wApqQ08_s|Ak(VUph~rGgvx(@_wHFT zSVqNMI?3Jj>B-X3>&?}(tIhtFNfyv{w%^~YS}>qGzn<{4P^QZ%*R)JbjEb~z_?TXA z-KEnQIT`ZOsTdht-pr(Ouqb-<%twvVDd#e=)~Y0(gB85wKIX`tkJ=%m1FyKv$jg8V zZ9k)|&QPp3?Zq$m(0wh5iha~eQd>x_48-*bs(QUY!PRN1ZgiuM8%+y64y3_Hs7Vaj znKWG|h_Ct-YGmHP&-e+ui+eJaVAH}^p7!LMd^nuG+0UJ|Z4*_VweqojI^l;L*5cGy zt!lwPbs3uc&6|+(znK?+vU1W%GCjE z-~hb4e#xhA8NfV(c$8I(`>lhNJyBkN7z?md>sbE^MyZ^8=jhh2Mi=u6eAqW<(u6$t zo>sVaXY;)4wXeF-<-R4b)Um7ArZX1w1#mrB&#_;-iR&l<+)oTn?&nsTju$X%^I7#kQim@CrHiymF)EcP& zOdFylU023YzAf5~DI1hjAef}nPmSW;C(#K1kdU$`OKP#`OYbzNvtwUaaLVekfe;@c z`C(V?t%kND9~&TVh&;4**Q{kWkHq~0SJ&OFwlH( z>n=EuX@cL1ObdSz?rga0&dCCoe|@9m;EJ^BX;~tDdWa9j2!J)ZYd4`Iu|%r<{uoL= zf`Y3DjVa>zoQ51#TDoQXXzPovvdT=y{o;gg|W2 zfC4uTFv!Ll_;=5;L1ioF!=Tn#TPFXnruq@lLmNM|+2vb`VKC^8${m#TQ~6)XE^tU= zBt*o=!rM{ihRfncE4ko}4G52;sQe}BC!|#DJn{jka$OB2+&^qB({uPX9i04p-XY30 z-@tiX06l3ee4KcXX!TXB6~<+S0Q1wxDnbFB4X*$os9VXAK6<`+RT1^1P;eOE9Hy1G zYE{?%!9!e4b@wT%6HVIJM`8UgXOOSfR)=BBt;G5SuZpxUi%R^J!cD$)hba7rqwx!_ zGIv6_BZAMOE=1~EOvQl}H7cTfKn<2e>Clhb5QH6tP8SL+96b4pt7J<}>jUbTJv^BW zqW4vz?^PpMSR{P*vuA0zQ{a{lrWuV%ep+Q)^HgnKB9{X0_`ysYa6qh2A-W4iK~KID zAM?H|=$bMm$X;Fa*%1Le5n+--NoPffl!Wv9O1x)~ z__Pl{J<#`kB0!UEh-s#6WN4iFgAk)jJzXpIx=J}n7xO!pL@^?(ZlqCP8R77(Wk)te zxO_#4m|cEdz0s`(zb!q{W#KG~GvEsb?5$_G`&d?=iEKFLyPfQlHJeZ>k|j=Oc1o)D zRn3d!6ndK!?o+f}kN2K0;Ha8bMr&7nEc}jE_M4HT3B#{C!~##6IE<$qroQQ)>``Bo zK4TlE<8RZ&`|EZ|qbuH+;ZA8_&H9S3=_-<>=U9f85YX6^OJd=+kA-Yn>}hcdus<8~5!|<#e?- zz}9t3BEPM=fz$j1^cIB!hh$3^1G7PIg6OzPuXW!jYCI-Hxl1>>PUCD|7<9qU@$;S$@1FOr6tJ69WcPvW2s^LYSr|l2>iatBr+D{@90(AIgLAwqJRkMojUycDWc8$L? z(cw$B_7e?p>}ssnhu7&k(xZR%kd_%iH2UetxEEb(GTQf{f<5e@h5T^jf8?l4hH}*& zHCeNn*D04y>;mmw?|hiBd-ZxeWd*{10p$N9Kl=b-v<3ak%glfP0RNS>|KI$qB&VV% zB%(y??C$I{EdV$02O)Uh=T^}Ztk67ap4CPhSx}64fdVctezQ_bqVszvw@cU7)r^AN z5!GJ_fk5q~#$tdOkYkbjc&=V?I4|6pmc`LvWN4sE==z!ux~|35Ju~yhxhyu-r;{H3 zx_Ng(;4TwA0ug8r!o1p3_XPMqQ73|}G6MfY`wuh#?*Bw> zY+`R>Yiwd`ObNcI64}5{0p|M=@q-#hWP8p2YNW1 zU0iig^}102F(VMz+0DxDM3K=%Ue`)*9^SGno)lBmc=`0P&E*|dM6BsimAwJvN)yjC zn7%u8m#?6^f{Gkw_pVW6cS~Q$p|ft*L2*elfHS|5fbbO)Z}Lc{|l!2|xrYJgW&@_G=1a z(qVY5oh`5#`*Zg~t7?dWh%Gt~ZdBt~0aYEgl9xKaD6gSnvO1)u-rBUZBBS{pH&UUe zG)(WQsXvp6_*ABt|IS_7o;FJ)8$t3nZ{*3}ZX}mF5-O7K3(h(j=GLK}f%v=sc@Ewo zt4p*nzT2;JhCqZJ$tvImqsA(X&St*B@NAe*m%s$Q0_``%{1iDho_`ezDH@77Wg@D- z%TmeMTp&&vF#Zz-31zz)a=F0(tL`feYuqpzbO1ESss%J93p$3f0wrOP&i(^#0p2mL zo2<$t&}S?aO796rhsVl)#D1oj@xVj_UARKB!D@sOEn0W1-=K&DNLG-;7>U-gUW6){ zUHU>qjNli%W*)TwRa!e!bB2XjNQ+Rk-Xw(}&j@Bg^t+`q>4C{asFMi2h}i?yzkr?S znXg3$7Ytjl?*MN@_}U(Dz*R`h5Lr}F-|3bp_c6=S-$Js0bpJA3A{0Xa;TaOjzTx7d zIMgmz-MY`kVY60}X}LDOU;DJxpuRj=>3%!;5txEsZyJn%j)(N{X;FB$tYwp6|BI-A z7eV!Y%+tG-Tm&d)l1X2A{StV`0bjjgFhX=d>qO))A|djN2E0!-wzEX=eyNx}9^9XD zes%TOCF+%UTuOmd$PNarzY^0GpVb#pc84irygfS6nIxJ@M|VDmzht2| z;wd`$W^7(ye4kqm(bBg)3CPnQ-X}*-KBcCN-Nz9Jbyikj9$3-3`=4D~NelKxB$L50 z#`M>dERUJo0z#J~<{H*-SSf-<&Di;NS@#6^LaW;MQ=6v#*E_brErF(u`xd>F6`-{8 zW|e8q*eBJ&MSu!2_%_5HT=S9X+iIUb_`fbV@b~B4l=N~w&`^S(XlbxnQlXiXTGP%o z?QQxtHBP6L!=U#_!w!+5@ti}>ykX5mLVVp{tCdzkjW7}tJ7slt4>Sk*%+!mn$Zs*$ z$VF(zvw|Ef{ZU6W-Vssg9+FFhkg>>T{!CLu*=hU9N{K4Xa>birHqh5GKNQalnFaXt zxPz=Ae>ok!2t>$v+})Ufoh1Cg^oMZuQLc=D``AMu1%&dddTm5#Nh$?;3rEi+sN@0M zp)Ei^g;x#^_mlUKS$giART{b_CeM~QG(0ZbH&54z;-ajb#9 z?FSaB3L6J|IdQUPM$bddTbZT^i?O0kB-6m^wYN+oOTruxtWCPDwTtD zOKFVJza?{JCOM_8fybG0^Hv>7wdbd2&l;Z6uN9-t`d2l(n!$xXR9@ddcemJ-4-FoH zZ$?VeA|^8g!VAyjJ6b7U=Z&Q?U}^dxS5C>C-WI@z}?@!S^ZOiyCYfPS$Gk0KGV@zf9?`YWm%5mIBJkXXGqqklTo zD&n_<7Oq+)>y+~xYJ%2g0_Mb5;H%$i>hcwhRghomHI3h}O93N#)Ualc7KrTGH4uwk zSJ@o$I>@%!FFo!>Ki*h{xh=#C-SvIH2Iw&{29INGTb-h^@VJ;J#3D6l!94YfEHt>( zt|nUs3Xs~&zw#lx@Jq>g>iameP&7UVxzK4e5&0C6=$uS#>)7di-0@#UY`cbhcn(wi z-3j?naUOEZUw_^|Xgt49mY>OgkScA2JKl?LZOJb0sc!>Sp>k|9u3cPCrBfewBW7)B$$I%ln4*cD{p;Q&^`&!t-JbOS9-KIf z-CRZ>QNl$_GR@2jDJfF{PXjr*q!4-yffc>h#{=UM(ejcmPWq6^Aw--X*8@8|tJt@3 zieeDwZ-Q>Eeu@GK<2`eor+mBjzuNGHapQj8t^^_eq8bDYahk2;-OCG+3x5GNb&JKq zmw4^E1&kHO!kc(1Tx~xUuv;$9{(%3VG7>3SMr-5S>*o^`0H6XB008-4$@HJ)jg#}g z;$|9C$8MALPNz3ONPbL|r~f>J5F!utdIJ=&!&yC|8^oVqs%ct<5HvydVCM6dOR-2= zvcag(Mir?3yz_ATzNdJu;zW?3rQ(8R0I8m{08My_2R#=L=BZzRs_G)4GGQjcv)T6Ws7y2E;Tt>kl;=V|RNq`13}F#C*aS2(@#4Dq&`8z-8+W@k#dUNEVQpgGGcYJ6QSeWx_LVgg0QE z4Xj~|5a_r7a%huK;S}U#eWYLwxSVpbo0&si8#yKWdvoZtJf$a`B2tc8%D9wi)kh9Q zSVtU8SF+tzOJ~7eqx*0O2oVus5zM2z?}Rdvzc=CGfdP9+3uxO)ZET_u^y8o)#0ea< z3~8Ewrv4($p(-$obs)i{-`*cPdye)sYnn!|%J#!jnuF##hWmeygrtM*aV7ZovYkE9 zcxzWzta4hij`;%a1w!}2HfWLUXZWsg{}8`RihGK2n)7pHcQyO>GIeq=<peQ&Y&n4P$UhI2n?0O=Ow5r#{)?gwam6^ zefpesh1}#Bd#e4kTq5pr)Hd&UNoF_Q?ihh;w7R6$XoKoaOBPHv*GNh7=F9%$%~f>M z(^9oT3Mn8A4Wp3FLL-T8t0|K~o|zUA=ZiwMrkKuRLir7wngG)x$7TYknfd zUB$mjnqg3;zU-@I8612~R0|~z##Rs|OGfUC9IJ%{F+8R;F>6mmiQDYIlT-g!3>8-23y) z-D%2P$OYU2j7xuGx_V!sjH8ig?;F&19slR&qI5u4b$jRx`7>nM?aC~occ2RJ!+&U< zj!3+4Fa{@waYcfw?Y1QwJ5I64s=!s}(LLo1DQ&+1L3jd@hQH4}>+i4snb;H1cR)!9 z72}6@HvO60n6C?8QO)M&Tk*Eqz7)`<`te4XWBvmPWdH>xH!^TOqVY$RNt4!NjVv3h zsO_{(fKk!aYLLfxJ=P^;Phg%jEJ}nfkgOZ=V&OH}k-I(E^Y}EZ^WH|og%Z4q>R`Us zD34*F9pd&$VJYJG2@z8*wJQwKIp44BoQUTzrgor!+ytez)m|K^qpssV1OgE}WuXX^ zYxVwoB*kPgu6azZ9u5cluOJ_sc}PAT?bG3xkfT8j^9%gd?e~o(o2nJA6GKeCp#(|D zrC=wQInolQz3y;+>K}GarBLXKJ{G9i8BmG?u`CD)`iNOXUs0nc>(7}{ zr8P|YXKMF3dB*}h6m^t|XQ@C%M`j|3&C(n)TmwAlUO+NdhX$ga7Y^r4bk*to!cN>) zgrGWi^_@7rt&iT4fJ3eIaYTj7S|UyTRYM`%U8-B~$RNkw>ZinN^i6(D>TlEbu!a-J z!2_zME1`g|Gyr_48$c@f5o3A!&*6rX4;8!9TtK2aYnnI0Xy}>_n$2|%s&Zxfmy+PL zigoQ_;W?NB^biZ3V(H>gLI($Iw1JafWKlfu*c5j#g&f;%OIJ~TEGRtJ8iLd(CJ8;+ z{Onl{rRmv43-`zlWqR%huiEF3Y|4nA{~!2N&Z3L_(@f4QPs z)y5861mVZG*ASa{hE&X6aUlQ|d}(e$e2x&5fH+9@@^MYqvDNMBhJ&H_H#>dZs}*;l z;>WS^k+*SnO4CHdHmhzXDI7Y9PXhdP0#qu|tYD2a95s0lYziz2o!H8PQO8tYS#h$M z!4Vq-IxUro0lC)LlAz8yXq2rz6^@hVdN1L@i9;0Y$1OW-^qwZI;V<0 z8W?JifDW`Q$JpHyO-{sD%z#W>7cmg=K?{X(*)nA=tlm;BO{h7?9Kk7cRxCM=QI(8Y6UwyFs1Hs*783Wy z;0ApCQ-i*ciJ#bLKs78KhBaeQJObcEFpeo{Nh3>~XA+u$7_);WY@;Q?`vbu&KJ3WD zmDt$p2lpNqeKG9x6ol~LO*DGw+Cj?HiNxHTL_yM4BGvw!#Qrs}e24O{Dg5KIB^|XL z)34DZx!ail-64q_6Qo|zo#++uB7MU6b(Nr+2xn}qXL#iv)bu+DN-F0YceqkmobIcj zZmhRXkW0;2WLHkxq)YDZXn&9;u(>I;_7?h!p`Gbx{hY?dkE6~9!YjSz*q1cTiYHE3 zKkvp4j;nFr6xYhcocB@F_p%tawbNpD0KP}fy+;P5=Z`Dg*v~w>XP+SARV*a72ae&2 zu!<)`&9hI+C}Acx1vC44(~2a_K}80#Uq>7$CTp2&6{<5{Y##aUm*~dk=n0oC=Et+L z=RJIup({y^(DFUFO6ielMXQ4kDA%2KWZ%7uRa-y``!0`mZN)agK!;@;{aT0E=HZc> z#BeDFi_a|Mr8HBW;yf{6q9L|w?4dz^`!O+aFY}7ECUk&jty1?h?Sq*$=GLM9{B8or z1q?XGLF*F(ckzqPxeAzt+oue)3Et$`%DesxEQn|QF%eeaY>YVST7sy&2oN4lb`+~| zs$T6Gb2DXjZcfgF+t|GRKQZ6G)CyckYHDl+jw_4!OP4}Hd@dahTZn~=$ldbkweGc0 zC(Ak(W$)X-GfAqnC!?2t5x@EBpH71&ZwlKlr7g7O4`nadkpq=oqZDSS#TDM|SwjO~ zZ0O$#pmZ@NN&#=k09CrP@7MN>A!#+-1RNn?voA;G1Fr9Kvz1CjCj>d)5g6Yt+~)7c zCl4&jC*GvHI)1)SQ_B*Mu@P)x1IRK`X|pZK@Ds+1fOoP>doh(H3)!;c*@ppT6fLUD znnmBg=sa<}TRM%VE5%YYw8Ov9HaY>ZmQ^*ct2|M!-l{-!Y3R-hUe8&OYR?Fmlnrf@ zb?v@eJQy}Boii875f&rQ5w{VMnP39`r*8XSN*~HP-UrD)`O^mIzvadMC|&?ru4HE<7za=-~nx32-1^@xf#5WNKI3I-( z|BEQ;cLIR4h^dHCfI*4Ll3uX}`vmi>s5tq>jA`dEI+|LHhH7?lIy_oy{05`=Gs#@e z%tR)_M?uoqCQL`hRNExjIxs-JGbPN&!lT2-K~zu9R?E!ROwOdi$IHt^*GSB(TbuWQ z9x7w&D`_EHp@WFM^k5jb$xF|~$hfnIY131*v6c28BtDDR=!eIzQBw(YA)bRkI3L8>PVDb8kYvfa1O6pFVFZ8+dhXE zaU5m(S=@0#SEmjm5$C93GQvcHcb)agb-HQa-SI-}rKDX^dUJYQ{Kc#CiYoXCRP;_; zdJW7qbc`27Dfmv3`Y6$1eqU*8I?8llh;WY1g+*zD0t&7X#JXwr)yW%EL7D4(0t}o) zpznzCH&k=T8&W}{Z?FdLrIE60Zhe)dxnqTY=iLx%2J0?1J0V`BR^ey*1tG?t5bLX5+a6k}PiL5^%vJAYd=5rrxYEvqOiQ=5pYK^z6= zM2L;|v_}{6R7%WK)D!Oy7=s>ZKe_K4yF18bQBNPn&3%jMf_AVqhQ2pxAKe8j&JpLr zPf3q>F5ZUV$5k|I#`PiU?Yp!Q>TO_9xr>g&vFcdk)9U4>MRoez@&fPq#&1vDci;1) zy3n_>uds!T@aI&vd3<&6W9p4gJ9M0CDPe}WU)oh}(^QTGyJZ^pU04bow?vP_eO2GP zIJEU|nt5;<)PkWWub8wD7UgMRdlY3We{|$<<^)Z+Y9?aT7+9#oZ#Ik_xGD(($GpTN zeHy*o{0FeTbL`xanW*!OLPqY-;3AT379}G|!WZbE?wQwapN%kfLt1*sZfR zvH2|9|PUhvQ8G)h4A-MuFKkP6g$=$&uf6%(q1~ zy>f>XQteDH5{F#tg27FK%hjrxH6k+YZFuV?^-6hLRo-f0x5kq(7kAlPG4hm;ECE?& zV+mS*8;>El8YUBt3)33s$CHci(-8`)Wl=)wu@hTQRvXST?l>eCwsmwB?>i~WmM;sr zl{c&+-OZHG6`qN+i%5p|*%~#icgY}>V+SaA*9v=MNNB9&p7U{=aFi&lizS|V46T~MTFx@5 zFTF;d23{F-S8=1uC5n={DRkc?W9I3ycv5=fjP!dc(d{ho&6Q22?uc}TU$Xo6$IW-B z1HA3CA{R#Cd%DG5QiJaBM<)`iVuv}gjIusQJU*5rZdq7Q!MJa|^kW<|4>|h=F>X(v zJo|M>!Bp?^Hzu;K)4gDe+NoG6Aft9fQDyhChM30CkM>U8 zJKo0%tjE6u%7f1E?e*OSLx(W%KEC-3lnqMtfx$O}yRiol^dsV7;=$bmh2j)u(F-N2 zbiU3};86~!i$?+~8jmu}a_H$3@AcU)2*iVpr<*rNyY211AUB+M#%1@#H|G>P>KQre^Hrn`a;0hO!E zC8CNMd#nE=tcuaU91)yDAD=w-5il@=OYsM3|4m-q*g=Jtc%LPW*>GQD=8v=VpR$(3C9(do_{cMDv^s*74YQ34TaGiD5c%W_ z7zu%JEFNYH5~Oa9@z40+6iV|qAWW{w6JAL8fxI{IS&>s`c-0o76BLuJjME>UFDESd zB_1XdX1S%_l^)~uzXFRT-SFfa9q@Xnx1Cm%IA*%5WRsM4YV(RB$0pk(+ez0li-yB3 z?!&B8)Uv8Iqk1LA*C`IB0&2-kY(iB2OG{COiH1CEr+I`(J!E%y5ipu zk5sfO($&2eT?fiVUB1le7p47pOiN_fEPbER;~7=1()4_kALd4r%pEtG_tVKepVyma zw%P5ADQj+oEw7p3(Qz?_KwkFrYr-~kr z&YRZjGM})Y+N;-vtuZ_2+T?r;`8tejUE4itZEQPjZZ&@WUq=87JjYj8cHWIemfX&~sQfke9g7YmrZqp`_ zCns&~@4$~KULr{zZnPEPAVP7=2qJN)EJ>GB zk1Qrrsq~%%hpJFjuQmNoqN7&0QX>_uGcru8k){!*Zlo7@k=Xdyd_Bg_(zsFBY(w{p z08*HfPQ%3#k(2Ra+F@X!f_TP?J+c|4;7*;FrvdBRBvDFn^U@uW$v>h;-o_0-U+U6o zG%n(m=2E_tpLCY%N|@R6=~2*H1x;8nf=iL^Jg}<=$}G{6<)m2^FMUFa3`lIoA-Mtv zf({u}V-VDVI?MF-V54GyGL|Uj{JAqiS7e5_PatgjDj~=?L#teOj)*Ux9~@t6vFUwI zSZ}6K+?4x=bp3JO#8bZHpa6+Q{s zB+>j4X<@Pxw@dr4Wt$^iET_+uO*SbJC+^f%~jx@~joXA54laU~j` z3vL2z8!mksjVc~3XOnX@PQIUQq*5-_WxNI&$V|5yl{J1{4>!$feNQLcOkasvuQSa~ z+S}`ekLw*yz)QGapy2SYQ0EcRter#`Kkt7JW}_-Jz&Pw$G?2i+F{70ysL4KNVn>`& z6*DKqDBcaOeJD4HP0C6Tjgd!U2^)ko`m>Fv{Abbz(^TW|5$>s%PE(Xa1Jkq;CUGgJ zkH_V;k(YHByAm&sCFr|n&!(9+OgdSaM2xq|M1`+H`M^M4v{SA%!leDo-JL_?tHNOo zwuT?&1SlPp;px^g}FWmJNozLb5DdutZ#466G zFXbqj1v#NT44m=H~Xo`c%)iD-TbC`lFO*nh$!tGX8?_OjNvKT zoK@Clhne2?ER!L%S`+Snu9kaS{gGt8$`WzfZZtz`nfZ-nL1@WarDtkCWtJ*lF-}c?Ljno6X^~_lxu-|3;}X`H6E)dIZO4EvLRF ze|0jx9x6j^5~6Ans@362H)5;WrITWFR!%I;{D`tOErMkm-VtAM&BU7_)je_VTSPnd z2p;+bC%4a<=oDb|A~*2*dJPHD`=%gTn6Bh(?i*{!b{l9D;#n#(SUp%ON=cH9w*N`@ zEXvlj@`c6Rvh~N`kGnm-nzl&CELB}fqV6Gn7Kwnj2%CXLc?L%=P$dv9N&SWFd610q z0)LoX44bAUnqv%RnAj2_f2N_#1^4u1CLPB2&iV z-~AdV&n9bzwy0w@?yljSM9sNnVBZX?PEvCo2ZA_toY0K4?>LxrQx}xSQ7|~RK}2^&Z$a+M*HV%0aM7ok+|E_I|Kzon6vyn*xK+kw2j;fm`pf-|vB>i~ z5x$0{kqZ}cN=sV3jg6UX;Nq0aca*O!u!F8!4)E(UwS(U5f(K!gU!?}*mZ(LKBF~O{ z*z~bULERD=hg_^+gk@2woMVn94W{yg6`rZa1hwm$2$O)=ycKjGG7b%Y(lExOCre7}+Ur#+Lhz?EC^2olvIkRsG~_;9FjaolF-~Iq^zxcKTgp_?of3szFfxmU5F=n zTWM!DCC6SSx}Fwvv)J0nthMfr;#oYt6CLUxM@tyJ8-Ev>Roq)HCYMP4xRW%w{haa8 z%-r9WG>}Lg3#wdJu^6mB*HGAUc$YHRa%XygYI+ZzS|q8AVcw{yUmefbd19iJg|-h8KbZ41-xMq0hoaHJ+Ssu+3getE2!*ybg& z2AcK`pm|LvjfxCjm%Jx0bY1UrzC&i!Ia22HQ0DWr8JbLXM7BGQeP37oRSaWK!v_6T zXN^t8!DjUNCH-Z+CR5A~N5+~ru8z0f{5nRf6M1DYa^=wB@_sE`DUwY49=Rsy*?G#H zQ_dN4?|B(_ysW7)u=+2#kH=PeOkf&k*MM=0>p~V~oTEfb65m|)Eb`0;N-~_6!(_Om zSjj~f^ZeB+{LvTj9zAN?jQAzJhwm77AT>73FPlF31N(xN8BNq6LG++$D8f-1GSTrBFMQsT#bf)B` z_XaO5Uu0y^Yw$gZka0iNouCl4Z8He`72se}xp{yDYr`~n-T!^&T?e{_DA#KCa()r! z7q91mSBMuMXK&w{s8{pjGs)7w6OEhk2fw`Qvy*D#&dD~`>mHaZC0{gUAK%#x-MCHK zc}5wfCWea*2g*^vpqO7~VRY`~0xxWsbFGI)u}XgY#Rhz(B#G>F z6U~}OkBa0`Lxy4j?Bik*Xk{Q~ssX@l+e&G?|2-SJWl%3mvZsJtb?G3Om@Yy_UQ#UB z$q#sMA>t2f_1_?y-PHlL7ZsJaoSrW8WN@)KMBbTi82}MzWh@km-KxZ5(D8}|MVvK} zW~&smuOgV^ysxb<6r^&SQIH*6cy_berEoW{=|$P z*c}jEJ!KYCy8?sPESsEcbi->o66?{PmVe6hG7Tqt#%cjyQ3Ew6yC>;p0X~Dp`?p6i zC|MoUzk!d&g5yoV8{wM?Xlo|UNyr?8@TE}Qs1y-iZxUfP13KKB5T1b>HheEZKObZf zG(=ZSpU@omFA%~T4FMirxd%Q1AAtvDd$GYVtoIW9)IO zf4>06u=#x2dsi)P-jSovKemspy8DkSJD@s;op&p6gn>N={ zKEq7cqFRV#ICcy6Jh6kjwxenuo)4t%{B zKDm8*Q%u>m=;rFYZZE~bx*3_~yhJMs9t!LQ-1%wbG{brSB#R4z%K z!A5p8;_;qAdkzv(eTSf~--P5EBTi4`zHeMWLLZe96|JWHJI?!5D*bCbm}Ol$Xp!X% zAb!X7t^$=7rpOGk{x=lYjetu^+WbgNnh+2{Ljk$xsyGtk!J z0`oUP@oyZXkGKYJk>uTTuL2j}viTX_hZ3dGZ|Izt=Y-}D)X<-yvIcNa&5pMo1tBip zWxv4U`XllO5zTMD|J3&_;pYz>|Mi}C{A&lN{{PkY|Ft>&FLl3K!^Rr<6t|b_^QzG& zX7>S#m>n>SotS_E;SUNqv5YvBzaUsO<^wJ7DEs5)4Q-rp%!=gqo^-r@hm1puWD%C) zG`}UO4A#PjzeMs#6a3?{bNjX;ti_d3v5aKC(z5TyFGugtNNR-_$MEdq)}!Cg@9FOg z_v~%^SHCr@;a%4E1^%k<{9gmVK}kGDJ-)*{^#S#$VWBi|HOqlQ1xuT>r!#wDBEb*( zP`FH<6Z_;+s1Naxfs361ZdFzI;^@8*`V>!SBUs;09T{86 zSR*huY(9}`1vc7C?;Uw&q`xU7eGK(htAUL>2u#S?NLyGMemCB~?f$8B6D9leQac0C zB*NW?OaZySD41u1QdfdD%(G0A^;9toYcQ_h_}zh%A@oewCf^MESX04`+*lW16<*$y z7>2LWjRLx7P@qIgi`k2zUftc`FYmzqK0Q`2=rPj4-`aDnvHpnlnCnxYpmqvqW`GRL zU+z38cKWf^Eo`B=1S;NX8M-PFx`Ofch=*~AS&$K27-k|i%%ZH|6D1D(xv(-q6pP#p zDrQZ~6L$?BPr+??@+bFH49^IF)<9(%YDi$AMSTNrIN?SP+%X>E1AXj(b{PkmU^`90Y-rKclF?TC zGHTv~q>?&t*Pa2|DWz#OlW zDUXK0Ra%Ifqr)g(!<9)Zb|X_ZDl0)JD^%2|nUhXV7e0z9P2;DxNHuS!3`FjiVi@2J zCk}NKGO+Y4eAE~EbbGu`ta;_Q@l>`fLtBA^4+jQXfzO8BtvZozdb4ueCC1N+4>jfY zpW#Y6#=c7?0u}#F2x8FDKpLP$CF5w6hk>j#XbzU2J|}X945ls~dd^H@ua`(?p;Cq= z#vq=V>1KiCr9Dm$rU_r*Q*1z}ExHJ`L*wMm~YX)7Os9vQ!oET7dd z4p3;eL_2QWd1BG9Q|z+*z<+TPb6hE_ zngP6H?$01(mzti}5;mlJ;-dN_b5nWgR}k-uZw)T;dfM)KMi@T)jaObauMoS&RG+8SOqdL;ZJ%vqpD*A8~QpKDH;-^?j>@-e|w%IMDcrM(SWnSPI;T7A>b zsjT_IPixt-d2&#yNghIHox5>BbjoxZNkdj+MAo#cC;-t(q0Mrir<2~5f&jU$pdA@NRjx#0%ixq z4P@l$7a7zIG2!5wofM8s9>0_}pO|g_QvpdZ#;qz35yp!c!5B~lDGJh>F4Rt{g$hOV zrcqmfGV4-E;Uzz>kj4#8%-lmyb=egG)&MpK8QLID3fKoA5NrJcw8AfIpTY}uum?x@ z$_b@kDBMn)#0Hyzvvc+#9Xii)`C?O^lSF4_mwxM~XifG(GSNhzy)ckjEK;P2gjMeP zsRYB;h8&1w^M1d-v?uAeGEcf-^Z1R8bq4+uQfP1$X6+8)sO-ge*q9s zXnQ_(fLPQCT}0*iT+7}N!$a6jR@w>e%r@uo(l}|>_^t%apXQ{FdjC!P(?azPLof*-_LHQv9}g?_#F40ai70WAY$TpG{bN5#pw+ zh3Q9kXZvT`4Er^5`7`m~Rj1BfE3Z~qluC}I1&QbA*2~43Wu5gC-TEH4bdX_7I387u z^d_RswRfru-)Uf(7B7BloHu(;r=Nk1_sN6z$@JCUY}^h?R$bJ}+Me$RWVO+Z%dj)f z2Z70N=w1gC9kNwzp#rNjgT^z@V}i%{%Pq#t_Y^XYq*ZmtmMRV}VYAtc$Fwu<6b{}D z4!)FjXA&m7De3Jupuw8M(y&=al@Sk^8?EW@A;x$0;Oes3+Y= zU+eCjcEdpDN;|0p#YNh?G#fuh`qQQGCc4eH0QK*IgULOH&7aYg47UrrTM1-7w^BM& zY)@e;c>4UZ-o;+QA8Qb#!FhrOTuDk|_?g^ygWnNfi0cu+sS9xoRO+lvU+-cB5SxW| zD-sQ`u>A@=vYoP>t*52Eoo#O(F>6o;)H_fAWr&gkv`Co+OCnVI4;Fhk+!K5Hgfob6 z#V88Ou3Hgi4hAa7O85(f)inG*$XlOtKFMl&fv0@y9(-UsgKeHMfIV?&3+(du5 z6M=j*DMue#+QJ);@?#@l=}xMY0A&)ZlYKleAkYG8rQII!P5K1OnPrpr!djE(H>u9v z22y`>%lwNoPQT&hOJVb3Zlb9K&Eg~V&%I6v#S0II09+TZh=9^!+o0l{XK+?!SIVbM zO)H_aBZma5(K0JDsM;5#UCZwf0CZUHDOUzGZ`W@wpgN#ub&#ffSAFa@$|g|I_Aek!A@YYQnop0U>_Fv zYe2=10;+=Q0sG@x3&+1 z0@5L%w9+6Q5|UCPT|*6xLkJ8h9gcvcq`(1TUfI|h;NlI~6k>HhG2zjH*E-&x;& z{+Km?+}C>cdgfVs-Fx=$s=UY&pE;>C&|vzjmjp{1wcrPJjF6u@UfLb6x6x&+t&pg7E!6ZQXHQF#OHK~@I8=;z^|_;S66xo(Fre;)}s`h=G&@= z5K?SvV%!W$v0h+@t?KXc+urqiN-JJ^*4scUG3xTNgVV_j2t0OX;PnT!@9!h zY=u)@Xmb}m_5463%@WN<*eOTh`6tnhw z^!VZQ9=!a#`XCPOB549N{Nc1-tX=oQ;gKwE<{gtV`J8F!>GXVjGt3sZ9&>fY9^;f? z!y}zxMR!oN$Z|$kJ$dvSBXYLj#rS%cuVyyFQu->C&bj(wfar+vAti`uZQ;-qRoOS$ zMP%h*DxWSUhY*hcp<1Jt=Azu&l%vul?QYulpoNXY<(TDt=!-)zk~D3>dG>CX?^O0I ze8)jRq;UMwey~VkA-;);Xv1l!8+HO)Y^SStIQEYD_AL>y!+`_da+3gpp-{0pP8yxp zgaAX)r$&nA_Ehl9fI1vnzhT8toa#5L@=T|=8wVXS1^g7COo=Q-kGh#pCK7^}756#~ zjXwG<^&PnH0Y$XbnecUI`>B3BKs(+gm5EL}F3ATE6gA9`PWJ%5dnvu1j4y{%RrY%4 zuQ6;mNZ{wz=rYVXkbHuO(4d9%b4&(0%p_g6x*f&-q{#3ODt5Ije7?Gck7E*qGV`R9 zISKdns({9lkP0mLu@PM#ar|M6nbCFZ1!cH6B&)a8(E_M>7(b~`M2)j_Mb_a%X#@*b zKJ}95>2T<*Pc`Tg+!Wx4EM0asq+OtJa)e!>mRzCco}f&eU<~hIIPVmUsuBocx{#GD z;WVmKLMD>QFlv__7g)na=ov2kVY&ntw!imoJ&J4bX4ptb3)eB;f9>6V9y`9(yVYsy zTdTJdL#vL)5#^>r^^4)jbSQz;=M3W7_qEY$&I55kB~#^FVEtA{YTRH@uFi&YIwE+!iIm52@-I~oJkyM>CC545f_GZ{O+n5KuumW=Cyhx^>E~3j?wOcw6$-POxC-S4|zPC=!;ngQ<%b7wemxu-#FDeWhrStkz|{vLkP-WJwKs`dIkZXwlX$i0(iMZ-2sF#yIhK) z*xBMyq>p!9zaa{{kh@j^pG6)&e4&Dd>JP!AdYc@`{Sk4B1TTKifrp)8YG3RV*!1li z!j(0iYDQ?}SsH&a%J*mo6~YSfNO z@H3fu;#(6(x8R4<9h=Pu>gdZvA_WRupal#l5{i_HTp`_8l>G!LLcRDD0ddk*IqQg& zl&a_s*C&agbVS{hs7>_hb0o+DHEEV|ReGS$Wh74w0sAfc*hCdvF%@f1wD`<=#iBh+ z0Q~pfW@mK*di6R$Kc>UzvnBbP)zGQhc|plwOgZFQua8 zfHs{wPDdDdq^mBnYX-Mza7RMLUvil;jgs*nQwOkACL2EH&&i4HX-)}>I(qUYsbfwb z(q~^h4R-dCm&O1uu8ZQMWqo=2d7B%QxRwqpbPTjo=;iU>Jay0S>-U50(&IZ7qT$50 zc=32uRSTLo)L2oi)YZhLPgRenDiQ`{A9Ctq#e7Nvc{fcr@*)!@H<`^gmP2X7aCEWo zimKR2r8u~Sq_s(@2+J@5FQYgC`I*E|GXQGzIZa@u$11Hd(K&b6^cy4Tf+W?sU zu8*%etdxjP4SwnIG+LRIqPDYmDO$QI?hw7j-fkAq=`<3gH3dj6PW)I*!P%~<%bCQ> z9NQ4jHOoBExig>J`UXdC|89#d;lJ3Q8J37Pw@1-CkOHfqxjVO>3}E7*&*v@+7GGaI6L z%|M#*=y|iTMi?16e1foQ&@52@Q9tji6_)pT7(SEd%;=D4)=3QaO4EC#+u{eIWx-P7ib1Ig zR)}v2EYA`Yp7aylOJ_(PCmkpCF9;I`g0b5D3`FQeAs%9_!xQH2>GWbJ^Lm@8xD zytArGLqR@RizdV;r|yj2pY>9So3Xe%UixZZQ#3cHw`wKqVa}nl5VEk#InQOQLu1K! zEC(61{BRlQ@h||hEc5eJ9y=#t<+dA*t)X*TaLz%5$>^;7p@&FoON4qFO4zI)J|wiD zRv5+5mp0r$UZ?XF@IDsGXOxhH=XobCX3EM_s)~k@)Z*N7UL&VA5#!dw_G4I(O2)NN z&c*w8+;CctPpyuf`mid?mSWFbx0DuCDwid@qiyWHT*6oS&yKzhC5D_2m3@!*85B9VY?ruzo;19>n$?Q?ad~MO z`I5)QhkL|u$K@Qhl6xt=+vyn6{}npucD44x*X>Hsf`)%;+il#FX~xq6c2zW~ZavOT z$pp3nM)8a7Y0L=4hCC3q8T0O$5fNp4ApG4h)Q6sZ--nq4%(3|_@+i?&Bl;*2!Z7gJ zV?lWXZ)F3IBTG4bp+J!3;m&xd#r-(n-jBS@W3TzTVgrrl8Y=PwWSe-|X{~%8SV&21 zKyf@U%==muzX)z#bZRF#X`?q=c}!2ULKygyq6gBmK|V;#^mI+aY+=~)P?Q30%jDvxN(Z4r@?6=vl^TIxI2 z?S~KPN4`VHE_-{t+;P8b*>uX=#B#!6k~4(qsVNkQqxl#+3Y@rsuG&gMFsDOIwd~$ zr4EwBaxg_>OaP^al0hy*cd==$20EUg;fmJ!hbH@;V_J<^Ny?0vizFW|qG3UY`7u4q zi3@AKbtEww>OdSM%-x6!j-C( zN8~k@tF0QSo#k-JC*7W=olf-7kY1$D7S$vBf;#PmG}PKYz~~#^ZNZKnzj2QlAKeNe z!wL_C4{SZ?TiNak-eI1zgBWw5S28vRN`MrNA05qm3Mov%AlgNmo(Kg!YW_$`XsFQY z99b^wsPsTkp?z46cbcB4?hs~45CN%1GBpXMDaot*71Y;fHbs_6&bu^0X>EBGn5-`- z-+GGU?roFJ6)s~05T?;lntMtMc#2R4Z9LU5z69yiXFPG5-AwI`yy3baRJDVc_? zyA4nr)UUuz_s~ted@v}Ook9t`g{E4>q*cm6k9EycRbv=Wb^S#cQ#a8EZIRFOTe&IS zgCidmmFLUPtkpX$Cq3_O5LL)YI6D6lU@X~E-?6)GcHa5mN;AFjTQ&Zkl^;~ge)We! za^?%xu06_btF#Q^g!>QCaEFrb_*;C~or%En zHcWe8&_BFV%(1~#?WJR0^fm6yhLp28WoOjt1&Ojt_dKYnax6cwVX1l<8=P-7J-b}< z02@5lHvc>cSo?!1)lqtTS?`t3qV>ksi8@+My@zp|(abt9{n^>%Mlx|MWg^_Bt&DVY zxV&l*ZK7XL>{Vev7;A!!hq7qS@EC|OC#N05F>__jWFqb8N+HgBr;+$!ZhLjCNLJZ` z3&QZso6vP#Y^UmDW0mjVh&j+)4fP6==OTUIZYY>__O;C0Xgj;II}4P#N})%20@9OZ z>jb-JuBfX9GJJ5?#7eJ>fe;U^PfzTPGNv%n)taVeTwm4NoVL^x)LxdHh6UcacM^G- zDT(l2JinGV-gG>^9?A``yv`xry^cq6{M&y`|C>v?)w0;AP016f64kpPT=ZOqJPWr= z)foEd?*|3au|`r|C-Rm-E*EN*IX%Mt%E|`{j+A900-Y z>xzAh$<;2C#PoPwf&q?OCrOUpjtFPx^|$R^aWh!jNV9kX)DoRrVy5djc5mhr_Bdyb4<-O8qz2-do2L^TnJu&splYMH?F{l*mRx`jWrFXE>^b&B<Uqh=gY(bbcvp4JAQWvQzy{1?QIh@TL zLcL@;oO7aztTk1*SVJ$B)pYm0c!pn#LP*0`y*J)ad$?-z4=)+EIr0c1EsF5qP`9ru z=Ke|;>SGwRkBW}^QVV5_r49%cOM}DFeQ_+F?0!{iPNR8_JY+KoeqdK=#U@zGqs3OG z1nyx*oeKp1$SIo}pDOw~YJp1}BIwaTGn#86CFdLlVyh6Cf8nPd&E#E&Y?6bVpg+Y@ zp_P}B#Wkc+Q=c98Nq1yp)4>^Vm9KAmIIX92v0hjBalGHdVCuEPrqdA|>H*X<)B~Pd zSnHEc>5Z;a*$R)X3Dl3*i&pX-=U+AUoxzreAl)AKXcWS?k(t7`9lde>;;+skkwON% zi_rKU(P1qimilf*eJtEs=<#{@Kz{UK_zq|0exR}Udp|?;?}7(Us-|eaD#royv~ubW z8O7P{uV~Z*#=Z3HbJ*R*(V_`Kq1c>dj1uS-5CqP%is!xTt7jMt6<-$FNv2Wxafv9y z7)gEx@{sQk-Tl8+r`K=7e?~gw=f^){wZFxux65yv*#29Lgj5Pix%Q|2Q+~tCcAIe1 z7xvSN_V*}-XkI(K{vrIyk#-w(Taoq)_3Dr6? z+l?6iM>}#8@Tc_oFF^bCCed$z|7Ou|0{)a2{RPmz{;B*1_$NJjlkul$;-+3;)-nf5HDTpm>{o`#8iew(GUp?zidRy!D?C -#include "sdag.h" -#include "libcharmtyles.decl.h" - - - - - - - - - - - - - - - - - - - - - - - - - - -/* DECLS: mainchare Main: Chare{ -Main(CkArgMsg* impl_msg); -void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); -}; - */ - class Main; - class CkIndex_Main; - class CProxy_Main; -/* --------------- index object ------------------ */ -class CkIndex_Main:public CkIndex_Chare{ - public: - typedef Main local_t; - typedef CkIndex_Main index_t; - typedef CProxy_Main proxy_t; - typedef CProxy_Main element_t; - - static int __idx; - static void __register(const char *s, size_t size); - /* DECLS: Main(CkArgMsg* impl_msg); - */ - // Entry point registration at startup - - static int reg_Main_CkArgMsg(); - // Entry point index lookup - - inline static int idx_Main_CkArgMsg() { - static int epidx = reg_Main_CkArgMsg(); - return epidx; - } - - - static int ckNew(CkArgMsg* impl_msg) { return idx_Main_CkArgMsg(); } - - static void _call_Main_CkArgMsg(void* impl_msg, void* impl_obj); - - static void _call_sdag_Main_CkArgMsg(void* impl_msg, void* impl_obj); - /* DECLS: void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); - */ - // Entry point registration at startup - - static int reg_handle_command_marshall2(); - // Entry point index lookup - - inline static int idx_handle_command_marshall2() { - static int epidx = reg_handle_command_marshall2(); - return epidx; - } - - - inline static int idx_handle_command(void (Main::*)(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd) ) { - return idx_handle_command_marshall2(); - } - - - - static int handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd) { return idx_handle_command_marshall2(); } - - static void _call_handle_command_marshall2(void* impl_msg, void* impl_obj); - - static void _call_sdag_handle_command_marshall2(void* impl_msg, void* impl_obj); - - static int _callmarshall_handle_command_marshall2(char* impl_buf, void* impl_obj_void); - - static void _marshallmessagepup_handle_command_marshall2(PUP::er &p,void *msg); -}; -/* --------------- element proxy ------------------ */ -class CProxy_Main:public CProxy_Chare{ - public: - typedef Main local_t; - typedef CkIndex_Main index_t; - typedef CProxy_Main proxy_t; - typedef CProxy_Main element_t; - - CProxy_Main(void) {}; - CProxy_Main(CkChareID __cid) : CProxy_Chare(__cid){ } - CProxy_Main(const Chare *c) : CProxy_Chare(c){ } - - int ckIsDelegated(void) const - { return CProxy_Chare::ckIsDelegated(); } - inline CkDelegateMgr *ckDelegatedTo(void) const - { return CProxy_Chare::ckDelegatedTo(); } - inline CkDelegateData *ckDelegatedPtr(void) const - { return CProxy_Chare::ckDelegatedPtr(); } - CkGroupID ckDelegatedIdx(void) const - { return CProxy_Chare::ckDelegatedIdx(); } - - inline void ckCheck(void) const - { CProxy_Chare::ckCheck(); } - const CkChareID &ckGetChareID(void) const - { return CProxy_Chare::ckGetChareID(); } - operator const CkChareID &(void) const - { return ckGetChareID(); } - - void ckDelegate(CkDelegateMgr *dTo,CkDelegateData *dPtr=NULL) - { CProxy_Chare::ckDelegate(dTo,dPtr); } - void ckUndelegate(void) - { CProxy_Chare::ckUndelegate(); } - void pup(PUP::er &p) - { CProxy_Chare::pup(p); - } - - void ckSetChareID(const CkChareID &c) - { CProxy_Chare::ckSetChareID(c); } - Main *ckLocal(void) const - { return (Main *)CkLocalChare(&ckGetChareID()); } -/* DECLS: Main(CkArgMsg* impl_msg); - */ - static CkChareID ckNew(CkArgMsg* impl_msg, int onPE=CK_PE_ANY); - static void ckNew(CkArgMsg* impl_msg, CkChareID* pcid, int onPE=CK_PE_ANY); - -/* DECLS: void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); - */ - - void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd, const CkEntryOptions *impl_e_opts=NULL); - -}; -#define Main_SDAG_CODE -typedef CBaseT1CBase_Main; - - - - - - - - - - - - - - - - - - - - - - - - - - - -/* ---------------- method closures -------------- */ -class Closure_Main { - public: - - - struct handle_command_2_closure; - -}; - -extern void _registerserver(void); -extern "C" void CkRegisterMainModule(void); -#endif diff --git a/src/server.def.h b/src/server.def.h deleted file mode 100644 index 5e00c13..0000000 --- a/src/server.def.h +++ /dev/null @@ -1,453 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - -/* ---------------- method closures -------------- */ -#ifndef CK_TEMPLATES_ONLY -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - - struct Closure_Main::handle_command_2_closure : public SDAG::Closure { - int epoch; - uint8_t kind; - uint32_t size; - char *cmd; - - CkMarshallMsg* _impl_marshall; - char* _impl_buf_in; - int _impl_buf_size; - - handle_command_2_closure() { - init(); - _impl_marshall = 0; - _impl_buf_in = 0; - _impl_buf_size = 0; - } - handle_command_2_closure(CkMigrateMessage*) { - init(); - _impl_marshall = 0; - _impl_buf_in = 0; - _impl_buf_size = 0; - } - int & getP0() { return epoch;} - uint8_t & getP1() { return kind;} - uint32_t & getP2() { return size;} - char *& getP3() { return cmd;} - void pup(PUP::er& __p) { - __p | epoch; - __p | kind; - __p | size; - packClosure(__p); - __p | _impl_buf_size; - bool hasMsg = (_impl_marshall != 0); __p | hasMsg; - if (hasMsg) CkPupMessage(__p, (void**)&_impl_marshall); - else PUParray(__p, _impl_buf_in, _impl_buf_size); - if (__p.isUnpacking()) { - char *impl_buf = _impl_marshall ? _impl_marshall->msgBuf : _impl_buf_in; - PUP::fromMem implP(impl_buf); - PUP::detail::TemporaryObjectHolder epoch; - implP|epoch; - PUP::detail::TemporaryObjectHolder kind; - implP|kind; - PUP::detail::TemporaryObjectHolder size; - implP|size; - int impl_off_cmd, impl_cnt_cmd; - implP|impl_off_cmd; - implP|impl_cnt_cmd; - impl_buf+=CK_ALIGN(implP.size(),16); - cmd = (char *)(impl_buf+impl_off_cmd); - } - } - virtual ~handle_command_2_closure() { - if (_impl_marshall) CmiFree(UsrToEnv(_impl_marshall)); - } - PUPable_decl(SINGLE_ARG(handle_command_2_closure)); - }; -#endif /* CK_TEMPLATES_ONLY */ - - - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::negate_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::abs_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::square_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::sqrt_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::reciprocal_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::sin_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::cos_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::log_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::exp_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::scale_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::add_constant_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::relu_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::add_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::subtract_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::multiply_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::divide_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::power_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::modulo_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::max_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::min_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::greater_than_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::less_than_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::equal_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::atan2_op) -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY - PUPable_def(ct::weighted_average_op) -#endif /* CK_TEMPLATES_ONLY */ - -/* DEFS: mainchare Main: Chare{ -Main(CkArgMsg* impl_msg); -void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); -}; - */ -#ifndef CK_TEMPLATES_ONLY - int CkIndex_Main::__idx=0; -#endif /* CK_TEMPLATES_ONLY */ -#ifndef CK_TEMPLATES_ONLY -#endif /* CK_TEMPLATES_ONLY */ -#ifndef CK_TEMPLATES_ONLY -/* DEFS: Main(CkArgMsg* impl_msg); - */ -CkChareID CProxy_Main::ckNew(CkArgMsg* impl_msg, int impl_onPE) -{ - CkChareID impl_ret; - CkCreateChare(CkIndex_Main::__idx, CkIndex_Main::idx_Main_CkArgMsg(), impl_msg, &impl_ret, impl_onPE); - return impl_ret; -} -void CProxy_Main::ckNew(CkArgMsg* impl_msg, CkChareID* pcid, int impl_onPE) -{ - CkCreateChare(CkIndex_Main::__idx, CkIndex_Main::idx_Main_CkArgMsg(), impl_msg, pcid, impl_onPE); -} - -// Entry point registration function -int CkIndex_Main::reg_Main_CkArgMsg() { - int epidx = CkRegisterEp("Main(CkArgMsg* impl_msg)", - reinterpret_cast(_call_Main_CkArgMsg), CMessage_CkArgMsg::__idx, __idx, 0); - CkRegisterMessagePupFn(epidx, (CkMessagePupFn)CkArgMsg::ckDebugPup); - return epidx; -} - -void CkIndex_Main::_call_Main_CkArgMsg(void* impl_msg, void* impl_obj_void) -{ - Main* impl_obj = static_cast(impl_obj_void); - new (impl_obj_void) Main((CkArgMsg*)impl_msg); -} -#endif /* CK_TEMPLATES_ONLY */ - -#ifndef CK_TEMPLATES_ONLY -/* DEFS: void handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd); - */ -void CProxy_Main::handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd, const CkEntryOptions *impl_e_opts) -{ - ckCheck(); - //Marshall: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd - int impl_off=0; - int impl_arrstart=0; - int impl_off_cmd, impl_cnt_cmd; - impl_off_cmd=impl_off=CK_ALIGN(impl_off,sizeof(char)); - impl_off+=(impl_cnt_cmd=sizeof(char)*(size)); - { //Find the size of the PUP'd data - PUP::sizer implP; - implP|epoch; - //Have to cast away const-ness to get pup routine - implP|(typename std::remove_cv::type>::type &)kind; - //Have to cast away const-ness to get pup routine - implP|(typename std::remove_cv::type>::type &)size; - implP|impl_off_cmd; - implP|impl_cnt_cmd; - impl_arrstart=CK_ALIGN(implP.size(),16); - impl_off+=impl_arrstart; - } - CkMarshallMsg *impl_msg=CkAllocateMarshallMsg(impl_off,impl_e_opts); - { //Copy over the PUP'd data - PUP::toMem implP((void *)impl_msg->msgBuf); - implP|epoch; - //Have to cast away const-ness to get pup routine - implP|(typename std::remove_cv::type>::type &)kind; - //Have to cast away const-ness to get pup routine - implP|(typename std::remove_cv::type>::type &)size; - implP|impl_off_cmd; - implP|impl_cnt_cmd; - } - char *impl_buf=impl_msg->msgBuf+impl_arrstart; - memcpy(impl_buf+impl_off_cmd,cmd,impl_cnt_cmd); - if (ckIsDelegated()) { - int destPE=CkChareMsgPrep(CkIndex_Main::idx_handle_command_marshall2(), impl_msg, &ckGetChareID()); - if (destPE!=-1) ckDelegatedTo()->ChareSend(ckDelegatedPtr(),CkIndex_Main::idx_handle_command_marshall2(), impl_msg, &ckGetChareID(),destPE); - } else { - CkSendMsg(CkIndex_Main::idx_handle_command_marshall2(), impl_msg, &ckGetChareID(),0); - } -} - -// Entry point registration function -int CkIndex_Main::reg_handle_command_marshall2() { - int epidx = CkRegisterEp("handle_command(int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd)", - reinterpret_cast(_call_handle_command_marshall2), CkMarshallMsg::__idx, __idx, 0+CK_EP_NOKEEP); - CkRegisterMarshallUnpackFn(epidx, _callmarshall_handle_command_marshall2); - CkRegisterMessagePupFn(epidx, _marshallmessagepup_handle_command_marshall2); - - return epidx; -} - -void CkIndex_Main::_call_handle_command_marshall2(void* impl_msg, void* impl_obj_void) -{ - Main* impl_obj = static_cast(impl_obj_void); - CkMarshallMsg *impl_msg_typed=(CkMarshallMsg *)impl_msg; - char *impl_buf=impl_msg_typed->msgBuf; - envelope *env = UsrToEnv(impl_msg_typed); - /*Unmarshall pup'd fields: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd*/ - PUP::fromMem implP(impl_buf); - PUP::detail::TemporaryObjectHolder epoch; - implP|epoch; - PUP::detail::TemporaryObjectHolder kind; - implP|kind; - PUP::detail::TemporaryObjectHolder size; - implP|size; - int impl_off_cmd, impl_cnt_cmd; - implP|impl_off_cmd; - implP|impl_cnt_cmd; - impl_buf+=CK_ALIGN(implP.size(),16); - /*Unmarshall arrays:*/ - char *cmd=(char *)(impl_buf+impl_off_cmd); - impl_obj->handle_command(std::move(epoch.t), std::move(kind.t), std::move(size.t), cmd); -} -int CkIndex_Main::_callmarshall_handle_command_marshall2(char* impl_buf, void* impl_obj_void) { - Main* impl_obj = static_cast(impl_obj_void); - envelope *env = UsrToEnv(impl_buf); - /*Unmarshall pup'd fields: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd*/ - PUP::fromMem implP(impl_buf); - PUP::detail::TemporaryObjectHolder epoch; - implP|epoch; - PUP::detail::TemporaryObjectHolder kind; - implP|kind; - PUP::detail::TemporaryObjectHolder size; - implP|size; - int impl_off_cmd, impl_cnt_cmd; - implP|impl_off_cmd; - implP|impl_cnt_cmd; - impl_buf+=CK_ALIGN(implP.size(),16); - /*Unmarshall arrays:*/ - char *cmd=(char *)(impl_buf+impl_off_cmd); - impl_obj->handle_command(std::move(epoch.t), std::move(kind.t), std::move(size.t), cmd); - return implP.size(); -} -void CkIndex_Main::_marshallmessagepup_handle_command_marshall2(PUP::er &implDestP,void *impl_msg) { - CkMarshallMsg *impl_msg_typed=(CkMarshallMsg *)impl_msg; - char *impl_buf=impl_msg_typed->msgBuf; - envelope *env = UsrToEnv(impl_msg_typed); - /*Unmarshall pup'd fields: int epoch, const uint8_t &kind, const uint32_t &size, const char *cmd*/ - PUP::fromMem implP(impl_buf); - PUP::detail::TemporaryObjectHolder epoch; - implP|epoch; - PUP::detail::TemporaryObjectHolder kind; - implP|kind; - PUP::detail::TemporaryObjectHolder size; - implP|size; - int impl_off_cmd, impl_cnt_cmd; - implP|impl_off_cmd; - implP|impl_cnt_cmd; - impl_buf+=CK_ALIGN(implP.size(),16); - /*Unmarshall arrays:*/ - char *cmd=(char *)(impl_buf+impl_off_cmd); - if (implDestP.hasComments()) implDestP.comment("epoch"); - implDestP|epoch; - if (implDestP.hasComments()) implDestP.comment("kind"); - implDestP|kind; - if (implDestP.hasComments()) implDestP.comment("size"); - implDestP|size; - if (implDestP.hasComments()) implDestP.comment("cmd"); - implDestP.synchronize(PUP::sync_begin_array); - for (int impl_i=0;impl_i*(sizeof(*cmd)) -void CBase_Main::virtual_pup(PUP::er &p) { - recursive_pup

(dynamic_cast(this), p); -} -#endif /* CK_TEMPLATES_ONLY */ From 13bf3effd99511964abf5a78337c44259093112b Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 20:53:13 +0530 Subject: [PATCH 13/34] add gitignore --- .gitignore | 6 ++++++ .vscode/settings.json | 20 -------------------- 2 files changed, 6 insertions(+), 20 deletions(-) create mode 100644 .gitignore delete mode 100644 .vscode/settings.json diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..a8240ad --- /dev/null +++ b/.gitignore @@ -0,0 +1,6 @@ +build +charmnumeric.egg-info +*.decl.h +*.def.h +dist +.vscode diff --git a/.vscode/settings.json b/.vscode/settings.json deleted file mode 100644 index 251dd1b..0000000 --- a/.vscode/settings.json +++ /dev/null @@ -1,20 +0,0 @@ -{ - "files.associations": { - "*.sage": "python", - "vector": "cpp", - "iosfwd": "cpp", - "compare": "cpp", - "cstdint": "cpp", - "format": "cpp", - "unordered_map": "cpp", - "map": "cpp", - "set": "cpp", - "chrono": "cpp", - "memory": "cpp", - "utility": "cpp", - "array": "cpp", - "ranges": "cpp", - "tuple": "cpp", - "variant": "cpp" - } -} \ No newline at end of file From 0af35eeb8359e4c57dc4e08f5db84601226230cf Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 13 Oct 2025 20:54:43 +0530 Subject: [PATCH 14/34] nit --- examples/run.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/run.sh b/examples/run.sh index 43bee95..92d37a7 100755 --- a/examples/run.sh +++ b/examples/run.sh @@ -1,4 +1,4 @@ cd .. python setup.py install cd examples -python custom_ops.py +python $1 From 90eb20ea47ae8b4a7678bee3ae8c0278f3c76c3b Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Tue, 14 Oct 2025 15:58:09 +0530 Subject: [PATCH 15/34] add support for copy operation --- charmnumeric/array.py | 1 - charmnumeric/ast.py | 3 ++ config.cmake | 4 +-- examples/bench.py | 51 ++++++++++++++++++++++++++++++++++ examples/conjugate_gradient.py | 15 +++++----- examples/graph.py | 5 ++-- src/CMakeLists.txt | 2 +- src/ast.hpp | 23 +++++++++++---- 8 files changed, 85 insertions(+), 19 deletions(-) create mode 100644 examples/bench.py diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 09d0e8d..72108ec 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -273,7 +273,6 @@ def get(self): data_bytes = send_command_raw(Handlers.fetch_handler, cmd, reply_size=total_size) return from_bytes(data_bytes, np.dtype(self.dtype).char) else: - print("GET OSME") total_size = self.itemsize for i in self.shape: total_size*=i diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index fa9c5f2..fd764ab 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -36,6 +36,9 @@ def __init__(self, name, opcode, operands, args=[]): ############################################################################################################################################### # Marker determines whether we are dealing with a tensor, a scalar or an arithmetic type # + # Marker = 0 : arithmetic type # + # Marker = 1 : scalar type # + # Marker = 2 : tensor type # # Encoding = | Marker | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # # | 8 | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # # NB: If opcode is 0, the encoding is limited to ID # diff --git a/config.cmake b/config.cmake index 24bd89b..4d72c5c 100644 --- a/config.cmake +++ b/config.cmake @@ -1,5 +1,5 @@ -set(CHARM_DIR "/home/anant/winter2024/lbp/study/charm/netlrts-linux-x86_64") -set(BASE_DIR "/home/anant/sem7/LibCharmtyles") +set(CHARM_DIR "/home/shogo/master/Kale/charm/netlrts-linux-x86_64") +set(BASE_DIR "/home/shogo/master/Kale/LibCharmtyles") set(EIGEN_DIR "/usr/include/eigen3") set(CUDA_DIR "/path/to/CUDA/directory") set(KOKKOS_DIR "${BASE_DIR}/kokkos/install") diff --git a/examples/bench.py b/examples/bench.py new file mode 100644 index 0000000..5e63348 --- /dev/null +++ b/examples/bench.py @@ -0,0 +1,51 @@ +from charmnumeric.array import connect, ndarray +from charmnumeric.ast import set_max_depth +from charmnumeric.ccs import enable_debug +import charmnumeric.linalg as lg +import numpy as np +import time + +set_max_depth(10) + +def f(): + b = ndarray(1, 10, np.float64, init_value=10) + v = ndarray(1, 10, np.float64, init_value=20) + c = ndarray(1, 10, np.float64, init_value=20) + v1 = (b + v) - c * 3 + v2 = b.scale(3) + c.add_constant(10) + v3 = (b + c) @ v + v1.get() + v2.get() + v3.get() + start = time.time() + for i in range(100): + v1 = (b + v) - c * 3 + v2 = b.scale(3) + c.add_constant(10) + v3 = (b + c) @ v + v1.get() + v2.get() + v3.get() + + end = time.time() + + print("VECTOR BENCH ", end-start) + + + start = time.time() + b = ndarray(2, [10,10], np.float64, init_value=10) + v = ndarray(2, [10,10], np.float64, init_value=20) + c = ndarray(2, [10,10], np.float64, init_value=20) + for i in range(100): + v1 = (b + v) - c * 3 + v2 = b.exp() + c.add_constant(10) + v3 = (b + c) @ v + v1.get() + v2.get() + v3.get() + + end = time.time() + print("MARIX BENCH ", end-start) + +if __name__ == '__main__': + connect("172.17.0.1", 10000) + s = f() \ No newline at end of file diff --git a/examples/conjugate_gradient.py b/examples/conjugate_gradient.py index 3f9c216..fa316d8 100644 --- a/examples/conjugate_gradient.py +++ b/examples/conjugate_gradient.py @@ -16,14 +16,16 @@ def solve(A, b, x): p = r.copy() rsold = r @ r - for i in range(100): + for i in range(20): #if i % 10 == 0: - gc.collect() + # gc.collect() Ap = A @ p alpha = rsold / (p @ Ap) - x = lg.axpy(alpha, p, x) - r = lg.axpy(alpha, Ap, r, multiplier=-1.) + x = alpha * p + x + r = alpha * Ap - r + # x = lg.axpy(alpha, p, x) + # r = lg.axpy(alpha, Ap, r, multiplier=-1.) rsnew = r @ r @@ -31,7 +33,8 @@ def solve(A, b, x): # print("Converged in %i iterations" % (i + 1)) # break - p = lg.axpy(rsnew / rsold, p, r) + p = (rsnew / rsold) * p + r + # p = lg.axpy(rsnew / rsold, p, r) rsold = rsnew return x @@ -43,8 +46,6 @@ def solve(A, b, x): b = ndarray(1, 184, np.float64) x = ndarray(1, 184, np.float64) - #d = (b @ x).get() - start = time.time() x = solve(A, b, x) x.evaluate() diff --git a/examples/graph.py b/examples/graph.py index 98a5bd6..d2944b3 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -10,10 +10,10 @@ def f(): v = ndarray(2, [10, 10], np.float64, init_value=20) b = ndarray(2, [10, 10], np.float64, init_value=10) - # c = ndarray(1, 10, np.float64, init_value=-30) + c = ndarray(1, 10, np.float64, init_value=-30) # k = v * 2 + b + 3 + c - 32 # l = k >= 42 - v1 = v @ b + # v1 = v @ b # v1 = (b + c) @ (b - c) # q.get() # v1 = q @ c @@ -36,6 +36,7 @@ def f(): # w.get() # res = q.abs() + c # baka = final_res.get() + v1 = b.copy() print(v1.get()) # r = b.where(42, 69) # g = b.where(v, c) diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 67d384a..2eeb08e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -48,4 +48,4 @@ else() add_executable(server.out server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) target_include_directories(server.out PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}) target_link_libraries(server.out Kokkos::kokkos) -endif() \ No newline at end of file +endif() diff --git a/src/ast.hpp b/src/ast.hpp index 340cb3c..e36e290 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -46,7 +46,7 @@ template std::vector faster_tortoise(char *cmd, bool flush = false); template -std::pair getMatmulOperand(char* cmd) { +std::pair getFlushedOperand(char* cmd) { char* recurse_cmd = cmd; uint8_t marker = extract(cmd); @@ -76,6 +76,7 @@ ctop inline to_ctop(uint64_t opcode) noexcept { case 3: return ctop::multiply; case 4: return ctop::divide; case 5: return ctop::matmul; + case 6: return ctop::copy; case 11: return ctop::greater; case 12: return ctop::lesser; case 13: return ctop::geq; @@ -127,8 +128,6 @@ std::shared_ptr to_ct_binary(uint64_t opcode, const std::ve } } - - template std::vector faster_tortoise(char *cmd, bool flush) { @@ -181,7 +180,7 @@ std::vector faster_tortoise(char *cmd, bool flush) rootNode = tensorAstNodeType(-1, ctopcode, to_ct_unary(opcode, args), shape); } else if(ctopcode==ctop::binary_expr){ rootNode = tensorAstNodeType(-1, ctopcode, to_ct_binary(opcode, args), shape); - }else { + } else { rootNode = tensorAstNodeType(ctopcode, shape); } std::vector ast; @@ -194,10 +193,10 @@ std::vector faster_tortoise(char *cmd, bool flush) // 3. a gemm returning a matrix if both the operands are matrices if (ctopcode == ctop::matmul) { uint32_t operand_size = extract(cmd); - std::pair xOperandInfo = getMatmulOperand(cmd); + std::pair xOperandInfo = getFlushedOperand(cmd); cmd += operand_size; operand_size = extract(cmd); - std::pair yOperandInfo = getMatmulOperand(cmd); + std::pair yOperandInfo = getFlushedOperand(cmd); cmd += operand_size; const uint8_t& xDim = xOperandInfo.first; @@ -247,6 +246,18 @@ std::vector faster_tortoise(char *cmd, bool flush) return tensorNode; } } + } else if (ctopcode == ctop::copy) { + uint32_t operand_size = extract(cmd); + std::pair copyOperandInfo = getFlushedOperand(cmd); + cmd += operand_size; + + const uint64_t& copyID = copyOperandInfo.second; + const auto& copy = std::get(lookup(copyID)); + tensorType tensor(copy); + + const auto& tensorNode = tensor(); + insert(tensorID, std::move(tensor)); + return tensorNode; } if(numOperands <= 2) { From 1c57953b84c82350b23be96abba959886dbdb640 Mon Sep 17 00:00:00 2001 From: anant Date: Tue, 14 Oct 2025 17:02:09 +0530 Subject: [PATCH 16/34] add axpy support for charmnumerics --- charmnumeric/ccs.py | 29 +++++++++++++------------- charmnumeric/linalg.py | 14 +++---------- src/ast.hpp | 46 ++++++++++++++++++++++++++++-------------- src/server.ci | 1 + 4 files changed, 49 insertions(+), 41 deletions(-) diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 1fd8e69..2f75b25 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -21,20 +21,18 @@ '/': 4, '@': 5, 'copy': 6, - 'axpy': 7, - 'axpy_multiplier': 8, - 'setitem': 9, - 'pow': 10, - '>': 11, - '<': 12, - '>=': 13, - '<=': 14, - '==': 15, - '!=': 16, - '&': 17, - '|': 18, - '!': 19, - 'where': 20, + 'setitem': 7, + 'pow': 8, + '>': 9, + '<': 10, + '>=': 11, + '<=': 12, + '==': 13, + '!=': 14, + '&': 15, + '|': 16, + '!': 17, + 'where': 18, # custom_unary_op 'exp': 41, @@ -63,7 +61,8 @@ 'less_than': 80, 'equal': 81, 'atan2': 82, - 'weighted_average': 83 + 'weighted_average': 83, + 'axpy': 84 } INV_OPCODES = {v: k for k, v in OPCODES.items()} diff --git a/charmnumeric/linalg.py b/charmnumeric/linalg.py index 823b7d5..2894e4b 100644 --- a/charmnumeric/linalg.py +++ b/charmnumeric/linalg.py @@ -7,16 +7,8 @@ from charmnumeric.ast import ASTNode -def axpy(a, x, y, multiplier=None): - operands = [a, x, y] - if multiplier is not None: - operands.append(multiplier) - operation = 'axpy_multiplier' - else: - operation = 'axpy' +def axpy(a, x, y): res = get_name() - cmd_buffer = ASTNode(res, OPCODES.get(operation), operands) - return create_ndarray(x.ndim, x.dtype, + cmd_buffer = ASTNode(res, OPCODES.get('axpy'), [x, y], args=[a]) + return create_ndarray(x.ndim, x.dtype, x.shape, name=res, command_buffer=cmd_buffer) - - diff --git a/src/ast.hpp b/src/ast.hpp index e36e290..f6aa0bf 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -68,7 +68,7 @@ std::pair getFlushedOperand(char* cmd) { ctop inline to_ctop(uint64_t opcode) noexcept { if(opcode>=41 and opcode<=52) return ctop::unary_expr; - if(opcode>=71 and opcode<=83) return ctop::binary_expr; + if(opcode>=71 and opcode<=84) return ctop::binary_expr; switch (opcode) { case 0: return ctop::noop; case 1: return ctop::add; @@ -77,16 +77,16 @@ ctop inline to_ctop(uint64_t opcode) noexcept { case 4: return ctop::divide; case 5: return ctop::matmul; case 6: return ctop::copy; - case 11: return ctop::greater; - case 12: return ctop::lesser; - case 13: return ctop::geq; - case 14: return ctop::leq; - case 15: return ctop::eq; - case 16: return ctop::neq; - case 17: return ctop::logical_and; - case 18: return ctop::logical_or; - case 19: return ctop::logical_not; - case 20: return ctop::where; + case 9: return ctop::greater; + case 10: return ctop::lesser; + case 11: return ctop::geq; + case 12: return ctop::leq; + case 13: return ctop::eq; + case 14: return ctop::neq; + case 15: return ctop::logical_and; + case 16: return ctop::logical_or; + case 17: return ctop::logical_not; + case 18: return ctop::where; default: return ctop::noop; } } @@ -124,6 +124,7 @@ std::shared_ptr to_ct_binary(uint64_t opcode, const std::ve case 81: return ct::binary_ops::equal(args); case 82: return ct::binary_ops::atan2(args); case 83: return ct::binary_ops::weighted_average(args); + case 84: return ct::binary_ops::axpy(args); default: return nullptr; } } @@ -155,6 +156,7 @@ std::vector faster_tortoise(char *cmd, bool flush) uint32_t opcode = extract(cmd); bool store = extract(cmd); uint64_t tensorID = extract(cmd); + ckout<<"for tensorid "< "< faster_tortoise(char *cmd, bool flush) uint32_t operand_size = extract(cmd); std::vector left = faster_tortoise(cmd); cmd += operand_size; - operand_size = extract(cmd); - std::vector right = faster_tortoise(cmd); - cmd += operand_size; + std::vector right; + if(numOperands==2){ + operand_size = extract(cmd); + right = faster_tortoise(cmd); + cmd += operand_size; + } rootNode.left_ = 1; size_t right_size; + size_t left_size; if (ctopcode == ctop::unary_expr || ctopcode == ctop::logical_not || ctopcode == ctop::custom_expr) { rootNode.right_ = -1; right_size = 0; + left_size = left.size(); + } else if(ctopcode == ctop::copy){ + //assuming copy is done on non temps only + rootNode.right_ = -1; + left_size = 0; + right_size = 0; + rootNode.copy_id_ = left[0].name_; } else { rootNode.right_ = left.size() + 1; right_size = right.size(); + left_size = left.size(); } ast.reserve(left.size() + right_size + 1); @@ -317,6 +331,7 @@ std::vector faster_tortoise(char *cmd, bool flush) ast[i].ter_ += 1 + left.size(); } } + ckout<<"HERE "<(cmd); std::vector left = faster_tortoise(cmd); @@ -372,8 +387,9 @@ std::vector faster_tortoise(char *cmd, bool flush) ast[i].ter_ += 1 + left.size() + right.size(); } } - + if (store or flush) { + ckout<<"store through AST break "< Date: Tue, 14 Oct 2025 19:52:56 +0530 Subject: [PATCH 17/34] handle scalar computation separately --- charmnumeric/array.py | 100 ++++++--- charmnumeric/ast.py | 5 +- charmnumeric/ccs.py | 1 - examples/graph.py | 19 +- src/ast.hpp | 504 +++++++++++++++++++++++++++++------------- src/server.cpp | 13 +- 6 files changed, 437 insertions(+), 205 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 72108ec..0f80220 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -23,6 +23,18 @@ def from_numpy(nparr): return ndarray(nparr.ndim, dtype=nparr.dtype, shape=nparr.shape, nparr=nparr) +def isScalarResult(a, b): + return a.is_scalar and b.is_scalar + +def getDimShape(a, b): + if isinstance(b, float) or isinstance(b, int): + return [a.ndim, a.shape.copy()] + elif isinstance(a, float) or isinstance(a, int): + return [b.ndim, b.shape.copy()] + elif a.is_scalar: + return [b.ndim, b.shape.copy()] + else: + return [a.ndim, a.shape.copy()] class ndarray: def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, @@ -104,8 +116,9 @@ def __neg__(self): def __add__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('+'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) def __radd__(self, other): return self + other @@ -113,8 +126,10 @@ def __radd__(self, other): def __sub__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('-'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rsub__(self, other): return -1 * (self - other) @@ -122,8 +137,10 @@ def __rsub__(self, other): def __lt__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('<'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rlt__(self, other): return self >= other @@ -131,8 +148,10 @@ def __rlt__(self, other): def __gt__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('>'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rgt__(self, other): return self <= other @@ -140,8 +159,10 @@ def __rgt__(self, other): def __le__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('<='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rle__(self, other): return self > other @@ -149,8 +170,10 @@ def __rle__(self, other): def __ge__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('>='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rge__(self, other): return self < other @@ -158,8 +181,10 @@ def __rge__(self, other): def __eq__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('=='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __req__(self, other): return self == other @@ -167,8 +192,10 @@ def __req__(self, other): def __ne__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('!='), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rne__(self, other): return self != other @@ -176,8 +203,10 @@ def __rne__(self, other): def __and__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('&'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rand__(self, other): return self & other @@ -185,8 +214,10 @@ def __rand__(self, other): def __or__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('|'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __ror__(self, other): return self | other @@ -195,13 +226,15 @@ def __invert__(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('!'), [self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + name=res, command_buffer=cmd_buffer, is_scalar=self.is_scalar) def __mul__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('*'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rmul__(self, other): return self * other @@ -209,14 +242,18 @@ def __rmul__(self, other): def __truediv__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('/'), [self, other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __rtruediv__(self, other): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('/'), [1., self/other]) - return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + ndim, shape = getDimShape(self, other) + return create_ndarray(ndim, self.dtype, shape=shape, + name=res, command_buffer=cmd_buffer, is_scalar=isScalarResult(self, other)) + def __matmul__(self, other): is_scalar = False @@ -247,7 +284,7 @@ def _flush_command_buffer(self): if self.valid: return validated_arrays = {self.name : self} - cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape) + cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape, is_scalar=self.is_scalar) reply_size = 0 for name, arr in validated_arrays.items(): reply_size += 8 + 8 * arr.ndim @@ -289,14 +326,13 @@ def validate(self): def copy(self): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('copy'), [self]) - return create_ndarray(self.ndim, self.dtype,shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) - + return create_ndarray(self.ndim, self.dtype,shape=self.shape.copy(), name=res, command_buffer=cmd_buffer, is_scalar=self.is_scalar) + def where(self, other, third): res = get_name() cmd_buffer = ASTNode(res, OPCODES.get('where'), [other, third, self]) return create_ndarray(self.ndim, self.dtype, shape=self.shape.copy(), - name=res, command_buffer=cmd_buffer) + name=res, command_buffer=cmd_buffer, is_scalar=self.is_scalar) def exp(self): res = get_name() diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index fd764ab..dbb9cba 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -81,10 +81,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False) opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - if op.is_scalar: - opcmd = op.command_buffer.get_command(validated_arrays, ndim, shape, save=save_op, is_scalar=op.is_scalar) - else: - opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) + opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) if not op.valid and save_op: validated_arrays[op.name] = op elif isinstance(op, float) or isinstance(op, int): diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 2f75b25..190ad6c 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -146,7 +146,6 @@ def get_creation_command(arr, name, shape, buf=None): cmd += buf elif arr.init_value is not None: cmd += to_bytes(arr.init_value, 'd') - print(cmd) cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd return cmd diff --git a/examples/graph.py b/examples/graph.py index d2944b3..caa36e0 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -8,9 +8,9 @@ set_max_depth(10) def f(): - v = ndarray(2, [10, 10], np.float64, init_value=20) - b = ndarray(2, [10, 10], np.float64, init_value=10) - c = ndarray(1, 10, np.float64, init_value=-30) + v = ndarray(1, 10, np.float64, init_value=2) + b = ndarray(1, 10, np.float64, init_value=1) + c = ndarray(1, 10, np.float64, init_value=3) # k = v * 2 + b + 3 + c - 32 # l = k >= 42 # v1 = v @ b @@ -32,12 +32,21 @@ def f(): # final_res = res + 42 # q.get() + a1 = b @ c + print(a1.get()) + a2 = v @ c + print(a2.get()) + res = (a1 / a2) * b + c + v = 2 + # a3 = a1 + a2 + print(v.get()) + # res = a3 * v # w = c @ b # w.get() # res = q.abs() + c # baka = final_res.get() - v1 = b.copy() - print(v1.get()) + # v1 = b.copy() + # print(res.get()) # r = b.where(42, 69) # g = b.where(v, c) # z = ~r diff --git a/src/ast.hpp b/src/ast.hpp index f6aa0bf..5c19b55 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -1,26 +1,24 @@ #include -#include #include -#include +#include +#include #include +#include #include -#include +#include #include -#include using ctop = ct::util::Operation; using ct_name_t = uint64_t; -using ct_array_t = std::variant; +using ct_array_t = std::variant; std::unordered_map symbol_table; -inline static void insert(ct_name_t name, ct_array_t arr) noexcept { +inline static void insert(ct_name_t name, ct_array_t arr) { CkPrintf("Created array %" PRIu64 " on server\n", name); symbol_table[name] = std::move(arr); } -inline static void remove(ct_name_t name) noexcept { - symbol_table.erase(name); -} +inline static void remove(ct_name_t name) noexcept { symbol_table.erase(name); } static ct_array_t &lookup(ct_name_t name) { auto find = symbol_table.find(name); @@ -30,35 +28,36 @@ static ct_array_t &lookup(ct_name_t name) { return find->second; } -template -inline T extract(char *&msg) noexcept { +template inline T extract(char *&msg) noexcept { T arg = *(reinterpret_cast(msg)); msg += sizeof(T); return arg; } -template -inline T peek(char* &msg) noexcept { - return *(reinterpret_cast(msg)); +template inline T peek(char *&msg) noexcept { + return *(reinterpret_cast(msg)); } -template +template std::vector faster_tortoise(char *cmd, bool flush = false); -template -std::pair getFlushedOperand(char* cmd) { - char* recurse_cmd = cmd; +template +std::pair getFlushedOperand(char *cmd) { + char *recurse_cmd = cmd; uint8_t marker = extract(cmd); - if (marker != 2) CmiAbort("Matmuls only supported with Tensor Types"); + if (marker != 2) + CmiAbort("Matmuls only supported with Tensor Types"); uint8_t dim = extract(cmd); - if (dim < 1 || dim > 2) CmiAbort("Matmuls not supported with dimension%" PRIu8 "", dim); + if (dim < 1 || dim > 2) + CmiAbort("Matmuls not supported with dimension%" PRIu8 "", dim); cmd += dim * sizeof(uint64_t); uint32_t opcode = extract(cmd); - if (opcode) faster_tortoise(recurse_cmd, true); + if (opcode) + faster_tortoise(recurse_cmd, true); cmd += sizeof(bool); @@ -67,74 +66,271 @@ std::pair getFlushedOperand(char* cmd) { } ctop inline to_ctop(uint64_t opcode) noexcept { - if(opcode>=41 and opcode<=52) return ctop::unary_expr; - if(opcode>=71 and opcode<=84) return ctop::binary_expr; + if (opcode >= 41 and opcode <= 52) + return ctop::unary_expr; + if (opcode >= 71 and opcode <= 83) + return ctop::binary_expr; + switch (opcode) { + case 0: + return ctop::noop; + case 1: + return ctop::add; + case 2: + return ctop::sub; + case 3: + return ctop::multiply; + case 4: + return ctop::divide; + case 5: + return ctop::matmul; + case 6: + return ctop::copy; + case 11: + return ctop::greater; + case 12: + return ctop::lesser; + case 13: + return ctop::geq; + case 14: + return ctop::leq; + case 15: + return ctop::eq; + case 16: + return ctop::neq; + case 17: + return ctop::logical_and; + case 18: + return ctop::logical_or; + case 19: + return ctop::logical_not; + case 20: + return ctop::where; + default: + return ctop::noop; + } +} + +std::shared_ptr +to_ct_unary(uint64_t opcode, const std::vector &args) noexcept { switch (opcode) { - case 0: return ctop::noop; - case 1: return ctop::add; - case 2: return ctop::sub; - case 3: return ctop::multiply; - case 4: return ctop::divide; - case 5: return ctop::matmul; - case 6: return ctop::copy; - case 9: return ctop::greater; - case 10: return ctop::lesser; - case 11: return ctop::geq; - case 12: return ctop::leq; - case 13: return ctop::eq; - case 14: return ctop::neq; - case 15: return ctop::logical_and; - case 16: return ctop::logical_or; - case 17: return ctop::logical_not; - case 18: return ctop::where; - default: return ctop::noop; + case 41: + return ct::unary_ops::exp(args); + case 42: + return ct::unary_ops::log(args); + case 43: + return ct::unary_ops::abs(args); + case 44: + return ct::unary_ops::negate(args); + case 45: + return ct::unary_ops::square(args); + case 46: + return ct::unary_ops::sqrt(args); + case 47: + return ct::unary_ops::reciprocal(args); + case 48: + return ct::unary_ops::sin(args); + case 49: + return ct::unary_ops::cos(args); + case 50: + return ct::unary_ops::relu(args); + case 51: + return ct::unary_ops::scale(args); + case 52: + return ct::unary_ops::add_constant(args); + default: + return nullptr; } } -std::shared_ptr to_ct_unary(uint64_t opcode, const std::vector& args) noexcept { - switch(opcode) { - case 41: return ct::unary_ops::exp(args); - case 42: return ct::unary_ops::log(args); - case 43: return ct::unary_ops::abs(args); - case 44: return ct::unary_ops::negate(args); - case 45: return ct::unary_ops::square(args); - case 46: return ct::unary_ops::sqrt(args); - case 47: return ct::unary_ops::reciprocal(args); - case 48: return ct::unary_ops::sin(args); - case 49: return ct::unary_ops::cos(args); - case 50: return ct::unary_ops::relu(args); - case 51: return ct::unary_ops::scale(args); - case 52: return ct::unary_ops::add_constant(args); - default: return nullptr; +std::shared_ptr +to_ct_binary(uint64_t opcode, const std::vector &args) noexcept { + switch (opcode) { + case 71: + return ct::binary_ops::add(args); + case 72: + return ct::binary_ops::subtract(args); + case 73: + return ct::binary_ops::multiply(args); + case 74: + return ct::binary_ops::divide(args); + case 75: + return ct::binary_ops::power(args); + case 76: + return ct::binary_ops::modulo(args); + case 77: + return ct::binary_ops::max(args); + case 78: + return ct::binary_ops::min(args); + case 79: + return ct::binary_ops::greater_than(args); + case 80: + return ct::binary_ops::less_than(args); + case 81: + return ct::binary_ops::equal(args); + case 82: + return ct::binary_ops::atan2(args); + case 83: + return ct::binary_ops::weighted_average(args); + default: + return nullptr; } } -std::shared_ptr to_ct_binary(uint64_t opcode, const std::vector& args) noexcept { - switch(opcode) { - case 71: return ct::binary_ops::add(args); - case 72: return ct::binary_ops::subtract(args); - case 73: return ct::binary_ops::multiply(args); - case 74: return ct::binary_ops::divide(args); - case 75: return ct::binary_ops::power(args); - case 76: return ct::binary_ops::modulo(args); - case 77: return ct::binary_ops::max(args); - case 78: return ct::binary_ops::min(args); - case 79: return ct::binary_ops::greater_than(args); - case 80: return ct::binary_ops::less_than(args); - case 81: return ct::binary_ops::equal(args); - case 82: return ct::binary_ops::atan2(args); - case 83: return ct::binary_ops::weighted_average(args); - case 84: return ct::binary_ops::axpy(args); - default: return nullptr; +double slower_hare(char *cmd) { + uint8_t marker = extract(cmd); + if (marker == 0) + return extract(cmd); + + /* dims = */ extract(cmd); + /* shape = */ extract(cmd); + + ctop ctopcode = to_ctop(extract(cmd)); + bool store = extract(cmd); + uint64_t tensorID = extract(cmd); + + if (ctopcode == ctop::noop) + return std::get(lookup(tensorID)); + + /* customOpArgs = */ extract(cmd); + + if (ctopcode == ctop::unary_expr || ctopcode == ctop::binary_expr) + CmiAbort("Custom Ops are not defined for scalar type"); + + uint8_t numOperands = extract(cmd); + + // when we encounter a matmul, we treat it as a dot product returning a + // scalar. + if (ctopcode == ctop::matmul) { + uint32_t operand_size = extract(cmd); + std::pair xOperandInfo = + getFlushedOperand(cmd); + cmd += operand_size; + operand_size = extract(cmd); + std::pair yOperandInfo = + getFlushedOperand(cmd); + cmd += operand_size; + + const uint8_t &xDim = xOperandInfo.first; + const uint8_t &yDim = yOperandInfo.first; + const uint64_t &xID = xOperandInfo.second; + const uint64_t &yID = yOperandInfo.second; + + if (xDim == 1 and yDim == 1) { + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); + + ct::scalar tensor0D = ct::dot(x, y); + double result = tensor0D.get(); + insert(tensorID, result); + return result; + } else { + CmiAbort("dot product of tensors does not result in a scalar"); + } + } + + double result; + + if (numOperands == 1) { + uint32_t operand_size = extract(cmd); + double lhs = slower_hare(cmd); + cmd += operand_size; + + switch (ctopcode) { + case ctop::copy: + result = lhs; + break; + case ctop::logical_not: + result = !lhs; + break; + default: + CmiAbort("unrecognized unary op for scalar operands"); + } + } else if (numOperands == 2) { + uint32_t operand_size = extract(cmd); + double lhs = slower_hare(cmd); + cmd += operand_size; + operand_size = extract(cmd); + double rhs = slower_hare(cmd); + cmd += operand_size; + + switch (ctopcode) { + case ctop::add: + result = lhs + rhs; + break; + case ctop::sub: + result = lhs - rhs; + break; + case ctop::multiply: + result = lhs * rhs; + break; + case ctop::divide: + result = lhs / rhs; + break; + case ctop::greater: + result = lhs > rhs; + break; + case ctop::lesser: + result = lhs < rhs; + break; + case ctop::geq: + result = lhs >= rhs; + break; + case ctop::leq: + result = lhs <= rhs; + break; + case ctop::eq: + result = lhs == rhs; + break; + case ctop::neq: + result = lhs != rhs; + break; + default: + CmiAbort("unrecognized binary op for scalar operands"); + } + } else if (numOperands == 3) { + uint32_t operand_size = extract(cmd); + double lhs = slower_hare(cmd); + cmd += operand_size; + + operand_size = extract(cmd); + double rhs = slower_hare(cmd); + cmd += operand_size; + + operand_size = extract(cmd); + double ths = slower_hare(cmd); + cmd += operand_size; + + switch (ctopcode) { + case ctop::where: + result = ths ? lhs : rhs; + break; + default: + CmiAbort("unrecognized ternary op for scalar operands"); + } } + + if (store) + insert(tensorID, result); + return result; } -template -std::vector faster_tortoise(char *cmd, bool flush) -{ +template +std::vector faster_tortoise(char *cmd, bool flush) { + if(peek(cmd) == 1) { + double result = slower_hare(cmd); + if constexpr (std::is_same_v) { + tensorAstNodeType temp_node(0, ctop::broadcast, result, {1}); + return {temp_node}; + } else if constexpr (std::is_same_v) { + tensorAstNodeType temp_node(0, ctop::broadcast, result, {1, 1}); + return {temp_node}; + } + } + uint8_t marker = extract(cmd); - std::vector shape; shape.reserve(2); + std::vector shape; + shape.reserve(2); if (marker == 0) { if constexpr (std::is_same_v) { @@ -150,99 +346,97 @@ std::vector faster_tortoise(char *cmd, bool flush) uint8_t dims = extract(cmd); - for(uint8_t i = 0; i < dims; i++) + for (uint8_t i = 0; i < dims; i++) shape.push_back(extract(cmd)); uint32_t opcode = extract(cmd); - bool store = extract(cmd); + bool store = extract(cmd); uint64_t tensorID = extract(cmd); ckout<<"for tensorid "< "<(lookup(tensorID)); - double result = tmp.get(); - tensorAstNodeType temp_node(0, ctop::broadcast, result, shape); - return {temp_node}; - } else { - const auto& tmp = std::get(lookup(tensorID)); - return tmp(); - } + const auto &tmp = std::get(lookup(tensorID)); + return tmp(); } // Args for custom unops/binops - uint32_t numArgs = extract(cmd); + uint32_t numArgs = extract(cmd); std::vector args; - for(uint32_t i = 0; i < numArgs; i++) + for (uint32_t i = 0; i < numArgs; i++) args.push_back(extract(cmd)); tensorAstNodeType rootNode; ctop ctopcode = to_ctop(opcode); if (ctopcode == ctop::unary_expr) { - rootNode = tensorAstNodeType(-1, ctopcode, to_ct_unary(opcode, args), shape); - } else if(ctopcode==ctop::binary_expr){ - rootNode = tensorAstNodeType(-1, ctopcode, to_ct_binary(opcode, args), shape); + rootNode = + tensorAstNodeType(-1, ctopcode, to_ct_unary(opcode, args), shape); + } else if (ctopcode == ctop::binary_expr) { + rootNode = + tensorAstNodeType(-1, ctopcode, to_ct_binary(opcode, args), shape); } else { rootNode = tensorAstNodeType(ctopcode, shape); } std::vector ast; - uint8_t numOperands = extract(cmd); + uint8_t numOperands = extract(cmd); // when we encounter a matmul, we treat it as a : // 1. a dot product returning a scalar if both the operands are vectors - // 2. a dot product returning a vector if one operand is a matrix and the other a vector + // 2. a dot product returning a vector if one operand is a matrix and the + // other a vector // 3. a gemm returning a matrix if both the operands are matrices if (ctopcode == ctop::matmul) { uint32_t operand_size = extract(cmd); - std::pair xOperandInfo = getFlushedOperand(cmd); + std::pair xOperandInfo = + getFlushedOperand(cmd); cmd += operand_size; operand_size = extract(cmd); - std::pair yOperandInfo = getFlushedOperand(cmd); + std::pair yOperandInfo = + getFlushedOperand(cmd); cmd += operand_size; - const uint8_t& xDim = xOperandInfo.first; - const uint8_t& yDim = yOperandInfo.first; - const uint64_t& xID = xOperandInfo.second; - const uint64_t& yID = yOperandInfo.second; + const uint8_t &xDim = xOperandInfo.first; + const uint8_t &yDim = yOperandInfo.first; + const uint64_t &xID = xOperandInfo.second; + const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); ct::scalar tensor0D = ct::dot(x, y); double result = tensor0D.get(); - insert(tensorID, std::move(tensor0D)); + insert(tensorID, result); tensorAstNodeType temp_node(0, ctop::broadcast, result, shape); return {temp_node}; } else if constexpr (std::is_same_v) { if (xDim == 1 and yDim == 2) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); ct::vector tensor = ct::dot(x, y); - const auto& tensorNode = tensor(); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } else if (xDim == 2 and yDim == 1) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); ct::vector tensor = ct::dot(x, y); - const auto& tensorNode = tensor(); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } } else if constexpr (std::is_same_v) { if (xDim == 2 and yDim == 2) { - const auto& x = std::get(lookup(xID)); - const auto& y = std::get(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); ct::matrix tensor = ct::matmul(x, y); - const auto& tensorNode = tensor(); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; @@ -250,34 +444,32 @@ std::vector faster_tortoise(char *cmd, bool flush) } } else if (ctopcode == ctop::copy) { uint32_t operand_size = extract(cmd); - std::pair copyOperandInfo = getFlushedOperand(cmd); + std::pair copyOperandInfo = + getFlushedOperand(cmd); cmd += operand_size; - const uint64_t& copyID = copyOperandInfo.second; - const auto& copy = std::get(lookup(copyID)); + const uint64_t ©ID = copyOperandInfo.second; + const auto © = std::get(lookup(copyID)); tensorType tensor(copy); - const auto& tensorNode = tensor(); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } - if(numOperands <= 2) { + if (numOperands <= 2) { uint32_t operand_size = extract(cmd); - std::vector left = faster_tortoise(cmd); + std::vector left = + faster_tortoise(cmd); + cmd += operand_size; + operand_size = extract(cmd); + std::vector right = + faster_tortoise(cmd); cmd += operand_size; - std::vector right; - if(numOperands==2){ - operand_size = extract(cmd); - right = faster_tortoise(cmd); - cmd += operand_size; - } rootNode.left_ = 1; size_t right_size; - size_t left_size; - if (ctopcode == ctop::unary_expr || - ctopcode == ctop::logical_not || + if (ctopcode == ctop::unary_expr || ctopcode == ctop::logical_not || ctopcode == ctop::custom_expr) { rootNode.right_ = -1; right_size = 0; @@ -299,48 +491,48 @@ std::vector faster_tortoise(char *cmd, bool flush) std::copy(left.begin(), left.end(), std::back_inserter(ast)); if (right_size) - std::copy(right.begin(), right.end(), std::back_inserter(ast)); + std::copy(right.begin(), right.end(), std::back_inserter(ast)); for (int i = 1; i != left.size(); ++i) { - if (ast[i].left_ != -1) { - ast[i].left_ += 1; - } + if (ast[i].left_ != -1) { + ast[i].left_ += 1; + } - if (ast[i].right_ != -1) { - ast[i].right_ += 1; - } + if (ast[i].right_ != -1) { + ast[i].right_ += 1; + } - if (ast[i].ter_ != -1) { - ast[i].ter_ += 1; - } + if (ast[i].ter_ != -1) { + ast[i].ter_ += 1; + } } for (int i = 1 + left.size(); i != ast.size(); ++i) { - if (ast[i].left_ != -1) - { - ast[i].left_ += 1 + left.size(); - } - - if (ast[i].right_ != -1) - { - ast[i].right_ += 1 + left.size(); - } - - if (ast[i].ter_ != -1) - { - ast[i].ter_ += 1 + left.size(); - } + if (ast[i].left_ != -1) { + ast[i].left_ += 1 + left.size(); + } + + if (ast[i].right_ != -1) { + ast[i].right_ += 1 + left.size(); + } + + if (ast[i].ter_ != -1) { + ast[i].ter_ += 1 + left.size(); + } } ckout<<"HERE "<(cmd); - std::vector left = faster_tortoise(cmd); + std::vector left = + faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = faster_tortoise(cmd); + std::vector right = + faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector ter = faster_tortoise(cmd); + std::vector ter = + faster_tortoise(cmd); cmd += operand_size; rootNode.left_ = 1; @@ -391,7 +583,7 @@ std::vector faster_tortoise(char *cmd, bool flush) if (store or flush) { ckout<<"store through AST break "<(tagPos) == 1) faster_tortoise(cmd); - else if (peek(tagPos) == 2) faster_tortoise(cmd); + char* dimPos = cmd + sizeof(uint8_t); + if (peek(cmd) == 1) slower_hare(cmd); + else if (peek(dimPos) == 1) faster_tortoise(cmd); + else if (peek(dimPos) == 2) faster_tortoise(cmd); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) @@ -266,13 +267,11 @@ void Main::execute_fetch(int epoch, int size, char *cmd) [&](auto &x) { using T = std::decay_t; - if constexpr (std::is_same_v) + if constexpr (std::is_same_v) { - double value = x.get(); - reply = (char *)&value; + reply = (char *)&x; reply_size += 8; send_reply(epoch, reply_size, reply); - // CcsSendReply(reply_size, reply); } else if constexpr (std::is_same_v) { From 1512955c1e1c8d3ced0589235e34886d52387d8a Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Wed, 15 Oct 2025 00:04:09 +0530 Subject: [PATCH 18/34] fix tensor handling --- src/ast.hpp | 75 +++++++++++++++++++++++------------------------------ 1 file changed, 32 insertions(+), 43 deletions(-) diff --git a/src/ast.hpp b/src/ast.hpp index 5c19b55..26300f6 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -457,33 +457,24 @@ std::vector faster_tortoise(char *cmd, bool flush) { return tensorNode; } - if (numOperands <= 2) { + if(numOperands <= 2) { uint32_t operand_size = extract(cmd); - std::vector left = - faster_tortoise(cmd); + std::vector left = faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = - faster_tortoise(cmd); + std::vector right = faster_tortoise(cmd); cmd += operand_size; rootNode.left_ = 1; size_t right_size; - if (ctopcode == ctop::unary_expr || ctopcode == ctop::logical_not || + if (ctopcode == ctop::unary_expr || + ctopcode == ctop::logical_not || ctopcode == ctop::custom_expr) { rootNode.right_ = -1; right_size = 0; - left_size = left.size(); - } else if(ctopcode == ctop::copy){ - //assuming copy is done on non temps only - rootNode.right_ = -1; - left_size = 0; - right_size = 0; - rootNode.copy_id_ = left[0].name_; } else { rootNode.right_ = left.size() + 1; right_size = right.size(); - left_size = left.size(); } ast.reserve(left.size() + right_size + 1); @@ -491,48 +482,47 @@ std::vector faster_tortoise(char *cmd, bool flush) { std::copy(left.begin(), left.end(), std::back_inserter(ast)); if (right_size) - std::copy(right.begin(), right.end(), std::back_inserter(ast)); + std::copy(right.begin(), right.end(), std::back_inserter(ast)); for (int i = 1; i != left.size(); ++i) { - if (ast[i].left_ != -1) { - ast[i].left_ += 1; - } + if (ast[i].left_ != -1) { + ast[i].left_ += 1; + } - if (ast[i].right_ != -1) { - ast[i].right_ += 1; - } + if (ast[i].right_ != -1) { + ast[i].right_ += 1; + } - if (ast[i].ter_ != -1) { - ast[i].ter_ += 1; - } + if (ast[i].ter_ != -1) { + ast[i].ter_ += 1; + } } for (int i = 1 + left.size(); i != ast.size(); ++i) { - if (ast[i].left_ != -1) { - ast[i].left_ += 1 + left.size(); - } - - if (ast[i].right_ != -1) { - ast[i].right_ += 1 + left.size(); - } - - if (ast[i].ter_ != -1) { - ast[i].ter_ += 1 + left.size(); - } + if (ast[i].left_ != -1) + { + ast[i].left_ += 1 + left.size(); + } + + if (ast[i].right_ != -1) + { + ast[i].right_ += 1 + left.size(); + } + + if (ast[i].ter_ != -1) + { + ast[i].ter_ += 1 + left.size(); + } } - ckout<<"HERE "<(cmd); - std::vector left = - faster_tortoise(cmd); + std::vector left = faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = - faster_tortoise(cmd); + std::vector right = faster_tortoise(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector ter = - faster_tortoise(cmd); + std::vector ter = faster_tortoise(cmd); cmd += operand_size; rootNode.left_ = 1; @@ -581,7 +571,6 @@ std::vector faster_tortoise(char *cmd, bool flush) { } if (store or flush) { - ckout<<"store through AST break "< Date: Wed, 15 Oct 2025 00:28:54 +0530 Subject: [PATCH 19/34] functionally correct conjugate gradient --- examples/conjugate_gradient.py | 43 +++++++++++++++++----------------- src/ast.hpp | 1 - 2 files changed, 22 insertions(+), 22 deletions(-) diff --git a/examples/conjugate_gradient.py b/examples/conjugate_gradient.py index fa316d8..0309036 100644 --- a/examples/conjugate_gradient.py +++ b/examples/conjugate_gradient.py @@ -3,38 +3,24 @@ from charmnumeric.ccs import enable_debug, sync from charmnumeric.ast import set_max_depth import numpy as np -import gc - import time -#enable_debug() set_max_depth(10) -#gc.set_threshold(1, 1, 1) def solve(A, b, x): r = b - A @ x p = r.copy() rsold = r @ r - for i in range(20): - #if i % 10 == 0: - # gc.collect() + for _ in range(10): Ap = A @ p alpha = rsold / (p @ Ap) x = alpha * p + x r = alpha * Ap - r - # x = lg.axpy(alpha, p, x) - # r = lg.axpy(alpha, Ap, r, multiplier=-1.) - rsnew = r @ r - #if np.sqrt(rsnew.get()) < 1e-8: - # print("Converged in %i iterations" % (i + 1)) - # break - p = (rsnew / rsold) * p + r - # p = lg.axpy(rsnew / rsold, p, r) rsold = rsnew return x @@ -42,12 +28,27 @@ def solve(A, b, x): if __name__ == '__main__': connect("172.17.0.1", 10000) - A = ndarray(2, (184, 184), np.float64) - b = ndarray(1, 184, np.float64) - x = ndarray(1, 184, np.float64) + n = 184 + + A = ndarray(2, (n, n), np.float64, init_value = 1e-4) + b = ndarray(1, n, np.float64, init_value = 1e-4) + x = ndarray(1, n, np.float64, init_value = 1e-4) start = time.time() x = solve(A, b, x) - x.evaluate() - sync() - print("Execution time = %.6f" % (time.time() - start)) + x_charm = x.get() + print("Execution time (Charm) = %.6f s" % (time.time() - start)) + + # Initialize all arrays to 1 + A = np.ones((n, n), dtype=np.float64) * 1e-4 + b = np.ones(n, dtype=np.float64) * 1e-4 + x = np.ones(n, dtype=np.float64) * 1e-4 + + start = time.time() + x_np = solve(A, b, x) + print("Execution time (NumPy) = %.6f s" % (time.time() - start)) + + if np.allclose(x_np, x_charm, atol=1e-5): + print("[SUCCESS]") + else: + print("[FAIL]") diff --git a/src/ast.hpp b/src/ast.hpp index 26300f6..2952734 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -352,7 +352,6 @@ std::vector faster_tortoise(char *cmd, bool flush) { uint32_t opcode = extract(cmd); bool store = extract(cmd); uint64_t tensorID = extract(cmd); - ckout<<"for tensorid "< "<(lookup(tensorID)); From 4c5f45c3d891989be6ba69c1eaecfea8c8e3b726 Mon Sep 17 00:00:00 2001 From: Anant jain <129408892+anant37289@users.noreply.github.com> Date: Wed, 15 Oct 2025 23:58:38 +0530 Subject: [PATCH 20/34] feat: fusing multiple ASTs using decorators (#8) * feat: adding fusion across lines * add encoding update * nit * separate handling of unary and binary ops * test fused unops/binops --------- Co-authored-by: Sh0g0-1758 --- charmnumeric/ast.py | 41 ++++++++++++++-------- charmnumeric/ccs.py | 4 --- examples/charm_fuse.py | 44 ++++++++++++++++++++++++ src/ast.hpp | 78 +++++++++++++++++++++++++----------------- src/server.cpp | 6 ++-- 5 files changed, 120 insertions(+), 53 deletions(-) create mode 100644 examples/charm_fuse.py diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index dbb9cba..6180f01 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -7,7 +7,7 @@ max_depth = 10 - +multiLineFuse = False def set_max_depth(d): global max_depth @@ -18,10 +18,22 @@ def get_max_depth(): global max_depth return max_depth +def charm_fuse(func): + def compile_wrapper(*args, **kwargs): + global multiLineFuse + orig_max_depth = get_max_depth() + multiLineFuse = True + set_max_depth(float('inf')) + out = func(*args, **kwargs) + multiLineFuse = False + set_max_depth(orig_max_depth) + return out + return compile_wrapper class ASTNode(object): def __init__(self, name, opcode, operands, args=[]): from charmtiles.array import ndarray + global multiLineFuse # contains opcode, operands # operands are ndarrays self.name = name @@ -29,23 +41,24 @@ def __init__(self, name, opcode, operands, args=[]): self.operands = operands self.depth = 0 self.args = args + self.multiLineFuse = multiLineFuse if self.opcode != 0: for op in self.operands: if isinstance(op, ndarray): self.depth = max(self.depth, 1 + op.command_buffer.depth) - ############################################################################################################################################### - # Marker determines whether we are dealing with a tensor, a scalar or an arithmetic type # - # Marker = 0 : arithmetic type # - # Marker = 1 : scalar type # - # Marker = 2 : tensor type # - # Encoding = | Marker | dim | shape | opcode | save_op | ID | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # - # | 8 | 8 | 64 | 32 | 1 | 64 | 32 | 64 | 8 | 32 | ........................ | # - # NB: If opcode is 0, the encoding is limited to ID # - # Encoding = | Marker | shape | val | # - # | 8 | 64 | 64 | # - # NB: Latter encoding for double constants # - ############################################################################################################################################### + ################################################################################################################################################################# + # Marker determines whether we are dealing with a tensor, a scalar or an arithmetic type # + # Marker = 0 : arithmetic type # + # Marker = 1 : scalar type # + # Marker = 2 : tensor type # + # Encoding = | Marker | dim | shape | opcode | save_op | ID | multiLineFuse | NumArgs | Args | NumOperands | OperandEncodingSize | RecursiveOperandEncoding | # + # | 8 | 8 | 64 | 32 | 1 | 64 | 1 | 32 | 64 | 8 | 32 | ........................ | # + # NB: If opcode is 0, the encoding is limited to ID # + # Encoding = | Marker | shape | val | # + # | 8 | 64 | 64 | # + # NB: Latter encoding for double constants # + ################################################################################################################################################################# def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): from charmnumeric.array import ndarray @@ -62,7 +75,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False) cmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(self.operands[0].name, 'L') return cmd - cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'L') + cmd += to_bytes(self.opcode, 'I') + to_bytes(save, '?') + to_bytes(self.name, 'L') + to_bytes(self.multiLineFuse, '?') cmd += to_bytes(len(self.args), 'I') for arg in self.args: cmd += to_bytes(arg, 'd') diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 190ad6c..43516b6 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -9,10 +9,6 @@ next_name = 0 epoch = 0 -### custom opcodes -# EXP, LOG, ABS, NEGATE, SQUARE, SQRT, RECIPROCAL, SIN, COS, RELU, SCALE, ADD_CONSTANT, ADD, SUBTRACT, -# MULTIPLY, DIVIDE, POWER, MODULO, MAX, MIN, GREATER_THAN, LESS_THAN, EQUAL, ATAN2, WEIGHTED_AVERAGE - OPCODES = { # base_op '+': 1, diff --git a/examples/charm_fuse.py b/examples/charm_fuse.py new file mode 100644 index 0000000..f91493e --- /dev/null +++ b/examples/charm_fuse.py @@ -0,0 +1,44 @@ +from charmnumeric.array import connect, ndarray +from charmnumeric.ast import set_max_depth, charm_fuse +from charmnumeric.ccs import enable_debug +import charmnumeric.linalg as lg +import numpy as np +import time +set_max_depth(10) + +@charm_fuse +def f(): + v = ndarray(1, 1e5, np.float64, init_value=-20) + b = ndarray(1, 1e5, np.float64, init_value=10) + + g1 = v.abs().scale(2).scale(2).add_constant(29) + b + 32 + g2 = b.log(2).exp() + d = g1 + g2 + return d.get() + +def g(): + v = ndarray(1, 1e5, np.float64, init_value=-20) + b = ndarray(1, 1e5, np.float64, init_value=10) + + g1 = v.abs().scale(2).scale(2).add_constant(29) + b + 32 + g2 = b.log(2).exp() + d = g1 + g2 + return d.get() + +if __name__ == '__main__': + connect("127.0.0.1", 10000) + s = f() + start = time.time() + for(i) in range(100): + s = f() + end = time.time() + print(s) + print("Time taken(multi line fused): ", end - start) + + s = g() + start = time.time() + for(i) in range(100): + s = g() + end = time.time() + print(s) + print("Time taken(multi line unfused): ", end - start) diff --git a/src/ast.hpp b/src/ast.hpp index 2952734..709585c 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -39,7 +39,7 @@ template inline T peek(char *&msg) noexcept { } template -std::vector faster_tortoise(char *cmd, bool flush = false); +std::vector process_tensor(char *cmd, bool flush = false); template std::pair getFlushedOperand(char *cmd) { @@ -57,7 +57,7 @@ std::pair getFlushedOperand(char *cmd) { uint32_t opcode = extract(cmd); if (opcode) - faster_tortoise(recurse_cmd, true); + process_tensor(recurse_cmd, true); cmd += sizeof(bool); @@ -176,7 +176,7 @@ to_ct_binary(uint64_t opcode, const std::vector &args) noexcept { } } -double slower_hare(char *cmd) { +double process_scalar(char *cmd) { uint8_t marker = extract(cmd); if (marker == 0) return extract(cmd); @@ -191,7 +191,8 @@ double slower_hare(char *cmd) { if (ctopcode == ctop::noop) return std::get(lookup(tensorID)); - /* customOpArgs = */ extract(cmd); + /* multLineFuse = */ extract(cmd); + /* NumcustomOpArgs = */ extract(cmd); if (ctopcode == ctop::unary_expr || ctopcode == ctop::binary_expr) CmiAbort("Custom Ops are not defined for scalar type"); @@ -232,7 +233,7 @@ double slower_hare(char *cmd) { if (numOperands == 1) { uint32_t operand_size = extract(cmd); - double lhs = slower_hare(cmd); + double lhs = process_scalar(cmd); cmd += operand_size; switch (ctopcode) { @@ -247,10 +248,10 @@ double slower_hare(char *cmd) { } } else if (numOperands == 2) { uint32_t operand_size = extract(cmd); - double lhs = slower_hare(cmd); + double lhs = process_scalar(cmd); cmd += operand_size; operand_size = extract(cmd); - double rhs = slower_hare(cmd); + double rhs = process_scalar(cmd); cmd += operand_size; switch (ctopcode) { @@ -289,15 +290,15 @@ double slower_hare(char *cmd) { } } else if (numOperands == 3) { uint32_t operand_size = extract(cmd); - double lhs = slower_hare(cmd); + double lhs = process_scalar(cmd); cmd += operand_size; operand_size = extract(cmd); - double rhs = slower_hare(cmd); + double rhs = process_scalar(cmd); cmd += operand_size; operand_size = extract(cmd); - double ths = slower_hare(cmd); + double ths = process_scalar(cmd); cmd += operand_size; switch (ctopcode) { @@ -315,9 +316,9 @@ double slower_hare(char *cmd) { } template -std::vector faster_tortoise(char *cmd, bool flush) { +std::vector process_tensor(char *cmd, bool flush) { if(peek(cmd) == 1) { - double result = slower_hare(cmd); + double result = process_scalar(cmd); if constexpr (std::is_same_v) { tensorAstNodeType temp_node(0, ctop::broadcast, result, {1}); return {temp_node}; @@ -357,6 +358,7 @@ std::vector faster_tortoise(char *cmd, bool flush) { const auto &tmp = std::get(lookup(tensorID)); return tmp(); } + bool multiLineFuse = extract(cmd); // Args for custom unops/binops uint32_t numArgs = extract(cmd); @@ -375,6 +377,7 @@ std::vector faster_tortoise(char *cmd, bool flush) { } else { rootNode = tensorAstNodeType(ctopcode, shape); } + rootNode.multiLineFuse = multiLineFuse; std::vector ast; uint8_t numOperands = extract(cmd); @@ -456,32 +459,43 @@ std::vector faster_tortoise(char *cmd, bool flush) { return tensorNode; } - if(numOperands <= 2) { + if(numOperands == 1){ uint32_t operand_size = extract(cmd); - std::vector left = faster_tortoise(cmd); + std::vector left = process_tensor(cmd); + cmd += operand_size; + rootNode.left_ = 1; + rootNode.right_ = -1; + ast.reserve(left.size() + 1); + ast.emplace_back(rootNode); + std::copy(left.begin(), left.end(), std::back_inserter(ast)); + for (int i = 1; i != left.size(); ++i) { + if (ast[i].left_ != -1) { + ast[i].left_ += 1; + } + + if (ast[i].right_ != -1) { + ast[i].right_ += 1; + } + + if (ast[i].ter_ != -1) { + ast[i].ter_ += 1; + } + } + } else if(numOperands == 2) { + uint32_t operand_size = extract(cmd); + std::vector left = process_tensor(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = faster_tortoise(cmd); + std::vector right = process_tensor(cmd); cmd += operand_size; rootNode.left_ = 1; - size_t right_size; - if (ctopcode == ctop::unary_expr || - ctopcode == ctop::logical_not || - ctopcode == ctop::custom_expr) { - rootNode.right_ = -1; - right_size = 0; - } else { - rootNode.right_ = left.size() + 1; - right_size = right.size(); - } + rootNode.right_ = left.size() + 1; - ast.reserve(left.size() + right_size + 1); + ast.reserve(left.size() + right.size() + 1); ast.emplace_back(rootNode); std::copy(left.begin(), left.end(), std::back_inserter(ast)); - - if (right_size) - std::copy(right.begin(), right.end(), std::back_inserter(ast)); + std::copy(right.begin(), right.end(), std::back_inserter(ast)); for (int i = 1; i != left.size(); ++i) { if (ast[i].left_ != -1) { @@ -515,13 +529,13 @@ std::vector faster_tortoise(char *cmd, bool flush) { } } else { uint32_t operand_size = extract(cmd); - std::vector left = faster_tortoise(cmd); + std::vector left = process_tensor(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector right = faster_tortoise(cmd); + std::vector right = process_tensor(cmd); cmd += operand_size; operand_size = extract(cmd); - std::vector ter = faster_tortoise(cmd); + std::vector ter = process_tensor(cmd); cmd += operand_size; rootNode.left_ = 1; diff --git a/src/server.cpp b/src/server.cpp index 4be7e9d..2a69062 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -143,9 +143,9 @@ void Main::execute_operation(int epoch, int size, char *cmd) } CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); char* dimPos = cmd + sizeof(uint8_t); - if (peek(cmd) == 1) slower_hare(cmd); - else if (peek(dimPos) == 1) faster_tortoise(cmd); - else if (peek(dimPos) == 2) faster_tortoise(cmd); + if (peek(cmd) == 1) process_scalar(cmd); + else if (peek(dimPos) == 1) process_tensor(cmd); + else if (peek(dimPos) == 2) process_tensor(cmd); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) From 62eb3ef43b836209e781bcb5ec4658246b11ddbf Mon Sep 17 00:00:00 2001 From: Shourya Goel Date: Fri, 17 Oct 2025 15:03:34 +0530 Subject: [PATCH 21/34] dealloc (#9) * feat: adding fusion across lines * add encoding update * nit * separate handling of unary and binary ops * test fused unops/binops * gpu fix * nit * fix deletion of validated arrays * broken_fuse * Revert "broken_fuse" This reverts commit 29ed4fa9d7c30a2703bdfcff2a8c4b3147ed0a9b. * Revert "fix deletion of validated arrays" This reverts commit b990b299177c5a89fbe5db3b8d93a85f9b02303a. * Reapply "fix deletion of validated arrays" This reverts commit 03845e651fccc5e456c0ff545dc69d81e1e0db30. * Reapply "broken_fuse" This reverts commit 81c431a5f04717815385a5285723d4de048eb53a. * fix validate array bug * nit --------- Co-authored-by: anant --- charmnumeric/array.py | 28 +---------- charmnumeric/ast.py | 10 ++-- charmnumeric/ccs.py | 4 +- examples/conjugate_gradient.py | 19 +++++--- examples/graph.py | 24 ++++++---- setup.py | 11 ----- src/CMakeLists.txt | 12 +++-- src/ast.hpp | 79 +++++++++++++++--------------- src/server.cpp | 88 ++++++++++++++-------------------- 9 files changed, 120 insertions(+), 155 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 0f80220..19052a1 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -96,20 +96,6 @@ def __del__(self): def __len__(self): return self.shape[0] - #def __str__(self): - # print(self.get()) - - #def __repr__(self): - # #self._flush_command_buffer() - # # FIXME add repr - # pass - - def __setitem__(self, key, value): - if not isinstance(key, slice) or key.start != None or \ - key.stop != None or key.step != None: - raise ValueError("Can't set items or slices") - self.cmd_buffer = ASTNode(res, OPCODES.get('setitem'), [self, value]) - def __neg__(self): return self * -1 @@ -283,23 +269,13 @@ def _flush_command_buffer(self): self.command_buffer.plot_graph() if self.valid: return - validated_arrays = {self.name : self} - cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape, is_scalar=self.is_scalar) - reply_size = 0 - for name, arr in validated_arrays.items(): - reply_size += 8 + 8 * arr.ndim + cmd = self.command_buffer.get_command(self.ndim, self.shape, is_scalar=self.is_scalar) if not debug: cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.operation_handler, cmd) deletion_buffer = b'' deletion_buffer_size = 0 - for i in range(len(validated_arrays)): - arr = validated_arrays[name] - arr.validate() - else: - for name, arr in validated_arrays.items(): - arr.validate() self.validate() def get(self): @@ -321,7 +297,7 @@ def evaluate(self): def validate(self): self.valid = True - self.command_buffer = ASTNode(self.name, 0, [self]) + self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) def copy(self): res = get_name() diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 6180f01..486b5d2 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -59,7 +59,7 @@ def __init__(self, name, opcode, operands, args=[]): # | 8 | 64 | 64 | # # NB: Latter encoding for double constants # ################################################################################################################################################################# - def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): + def get_command(self, ndim, shape, save=True, is_scalar=False): from charmnumeric.array import ndarray # Ndims and Shape setup @@ -83,7 +83,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False) cmd += to_bytes(len(self.operands), 'B') for op in self.operands: if isinstance(op, ndarray): - if op.name in validated_arrays: + if op.valid: if op.is_scalar: opcmd = to_bytes(1, 'B') else: @@ -94,9 +94,9 @@ def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False) opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) - if not op.valid and save_op: - validated_arrays[op.name] = op + opcmd = op.command_buffer.get_command(op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) + if save_op: + op.validate() elif isinstance(op, float) or isinstance(op, int): opcmd = to_bytes(0, 'B') for _shape in shape: diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 43516b6..3eb8c5a 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -17,8 +17,6 @@ '/': 4, '@': 5, 'copy': 6, - 'setitem': 7, - 'pow': 8, '>': 9, '<': 10, '>=': 11, @@ -117,7 +115,7 @@ def connect(server_ip, server_port): def disconnect(): from charmnumeric.array import deletion_buffer, deletion_buffer_size - global client_id, deletion_buffer, deletion_buffer_size + global client_id if deletion_buffer_size > 0: cmd = to_bytes(len(deletion_buffer), 'I') + deletion_buffer cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd diff --git a/examples/conjugate_gradient.py b/examples/conjugate_gradient.py index 0309036..90e73d6 100644 --- a/examples/conjugate_gradient.py +++ b/examples/conjugate_gradient.py @@ -28,11 +28,16 @@ def solve(A, b, x): if __name__ == '__main__': connect("172.17.0.1", 10000) - n = 184 + n = int(1e4) - A = ndarray(2, (n, n), np.float64, init_value = 1e-4) - b = ndarray(1, n, np.float64, init_value = 1e-4) - x = ndarray(1, n, np.float64, init_value = 1e-4) + A = ndarray(2, (n, n), np.float64, init_value = 1e-6) + b = ndarray(1, n, np.float64, init_value = 1e-6) + x = ndarray(1, n, np.float64, init_value = 1e-6) + + # Pre-Compilation + _ = solve(A, b, x) + __ = _.get() + print(__) start = time.time() x = solve(A, b, x) @@ -40,9 +45,9 @@ def solve(A, b, x): print("Execution time (Charm) = %.6f s" % (time.time() - start)) # Initialize all arrays to 1 - A = np.ones((n, n), dtype=np.float64) * 1e-4 - b = np.ones(n, dtype=np.float64) * 1e-4 - x = np.ones(n, dtype=np.float64) * 1e-4 + A = np.ones((n, n), dtype=np.float64) * 1e-6 + b = np.ones(n, dtype=np.float64) * 1e-6 + x = np.ones(n, dtype=np.float64) * 1e-6 start = time.time() x_np = solve(A, b, x) diff --git a/examples/graph.py b/examples/graph.py index caa36e0..853c039 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -11,8 +11,12 @@ def f(): v = ndarray(1, 10, np.float64, init_value=2) b = ndarray(1, 10, np.float64, init_value=1) c = ndarray(1, 10, np.float64, init_value=3) - # k = v * 2 + b + 3 + c - 32 - # l = k >= 42 + for _ in range(2): + k = v * 2 + b + 3 + c - 32 + l = k >= 42 + l.get() + print(l.get()) + # print(l.get()) # v1 = v @ b # v1 = (b + c) @ (b - c) # q.get() @@ -32,14 +36,14 @@ def f(): # final_res = res + 42 # q.get() - a1 = b @ c - print(a1.get()) - a2 = v @ c - print(a2.get()) - res = (a1 / a2) * b + c - v = 2 - # a3 = a1 + a2 - print(v.get()) + # a1 = b @ c + # print(a1.get()) + # a2 = v @ c + # print(a2.get()) + # res = (a1 / a2) * b + c + # v = 2 + # # a3 = a1 + a2 + # print(v.get()) # res = a3 * v # w = c @ b # w.get() diff --git a/setup.py b/setup.py index 8071e21..9c0b53a 100644 --- a/setup.py +++ b/setup.py @@ -10,15 +10,6 @@ def get_version(): exec(compile(open(fname).read(), fname, 'exec'), data) return data.get('__version__') - -def compile_server(): - charmc = os.environ.get('CHARMC', - '/home/adityapb/charm/charm/netlrts-linux-x86_64/bin/charmc') - aum_base = os.environ.get('AUM_HOME', '/home/adityapb/charm/LibCharmtyles') - subprocess.run(["make", "-C", "src/", - "CHARMC=%s" % charmc, "BASE_DIR=%s" % aum_base]) - - install_requires = ['numpy', 'charm4py'] tests_require = ['pytest'] docs_require = ['sphinx'] @@ -39,8 +30,6 @@ def compile_server(): ''' classifiers = [x.strip() for x in classes.splitlines() if x] -compile_server() - setup( name='charmnumeric', version=get_version(), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2eeb08e..7308a44 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,10 +40,16 @@ if(Charm_ENABLE_GPU) target_include_directories(server PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}/include ${CHARM_DIR}/include) target_link_libraries(server Kokkos::kokkos) - add_custom_command(result ALL - COMMAND ${CHARMC} ${GPU_LINK_OPTS} $:server> -o ${CMAKE_BINARY_DIR}/server.out + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/server.out" + COMMAND ${CHARMC} ${GPU_LINK_OPTS} $ -o ${CMAKE_BINARY_DIR}/server.out DEPENDS server - COMMENT "Linking charm build against kokkos and cuda") + COMMENT "Linking charm build against kokkos and cuda" + ) + + add_custom_target(result ALL + DEPENDS "${CMAKE_BINARY_DIR}/server.out" + ) else() add_executable(server.out server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) target_include_directories(server.out PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}) diff --git a/src/ast.hpp b/src/ast.hpp index 709585c..b3ab573 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -7,10 +7,11 @@ #include #include #include +#include using ctop = ct::util::Operation; using ct_name_t = uint64_t; -using ct_array_t = std::variant; +using ct_array_t = std::variant, std::unique_ptr>; std::unordered_map symbol_table; inline static void insert(ct_name_t name, ct_array_t arr) { @@ -85,25 +86,25 @@ ctop inline to_ctop(uint64_t opcode) noexcept { return ctop::matmul; case 6: return ctop::copy; - case 11: + case 9: return ctop::greater; - case 12: + case 10: return ctop::lesser; - case 13: + case 11: return ctop::geq; - case 14: + case 12: return ctop::leq; - case 15: + case 13: return ctop::eq; - case 16: + case 14: return ctop::neq; - case 17: + case 15: return ctop::logical_and; - case 18: + case 16: return ctop::logical_or; - case 19: + case 17: return ctop::logical_not; - case 20: + case 18: return ctop::where; default: return ctop::noop; @@ -217,10 +218,10 @@ double process_scalar(char *cmd) { const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::scalar tensor0D = ct::dot(x, y); + ct::scalar tensor0D = ct::dot(*x, *y); double result = tensor0D.get(); insert(tensorID, result); return result; @@ -355,8 +356,8 @@ std::vector process_tensor(char *cmd, bool flush) { uint64_t tensorID = extract(cmd); if (opcode == 0) { - const auto &tmp = std::get(lookup(tensorID)); - return tmp(); + const auto &tmp = std::get>(lookup(tensorID)); + return (*tmp)(); } bool multiLineFuse = extract(cmd); @@ -403,10 +404,10 @@ std::vector process_tensor(char *cmd, bool flush) { const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::scalar tensor0D = ct::dot(x, y); + ct::scalar tensor0D = ct::dot(*x, *y); double result = tensor0D.get(); insert(tensorID, result); @@ -414,31 +415,31 @@ std::vector process_tensor(char *cmd, bool flush) { return {temp_node}; } else if constexpr (std::is_same_v) { if (xDim == 1 and yDim == 2) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::vector tensor = ct::dot(x, y); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(std::move(ct::dot(*x, *y))); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } else if (xDim == 2 and yDim == 1) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::vector tensor = ct::dot(x, y); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(std::move(ct::dot(*x, *y))); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } } else if constexpr (std::is_same_v) { if (xDim == 2 and yDim == 2) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::matrix tensor = ct::matmul(x, y); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(std::move(ct::matmul(*x, *y))); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; @@ -451,15 +452,15 @@ std::vector process_tensor(char *cmd, bool flush) { cmd += operand_size; const uint64_t ©ID = copyOperandInfo.second; - const auto © = std::get(lookup(copyID)); - tensorType tensor(copy); + const auto © = std::get>(lookup(copyID)); + std::unique_ptr tensor = std::make_unique(*copy); - const auto &tensorNode = tensor(); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } - if(numOperands == 1){ + if (numOperands == 1) { uint32_t operand_size = extract(cmd); std::vector left = process_tensor(cmd); cmd += operand_size; @@ -480,8 +481,8 @@ std::vector process_tensor(char *cmd, bool flush) { if (ast[i].ter_ != -1) { ast[i].ter_ += 1; } - } - } else if(numOperands == 2) { + } + } else if (numOperands == 2) { uint32_t operand_size = extract(cmd); std::vector left = process_tensor(cmd); cmd += operand_size; @@ -584,8 +585,8 @@ std::vector process_tensor(char *cmd, bool flush) { } if (store or flush) { - tensorType tensor(ast); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(ast); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } diff --git a/src/server.cpp b/src/server.cpp index 2a69062..89ade04 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -105,7 +105,6 @@ void Main::handle_command(int epoch, uint8_t kind, uint32_t size, char *cmd) while (!command_buffer.empty() && std::get<0>(command_buffer.top()) == EPOCH) { buffer_t buffer = command_buffer.top(); - // CkPrintf("Executing buffered at epoch %i, current %i\n", std::get<0>(buffer), EPOCH); execute_command(std::get<0>(buffer), std::get<1>(buffer), (int)size, std::get<2>(buffer)); free(std::get<2>(buffer)); command_buffer.pop(); @@ -130,18 +129,10 @@ void Main::send_reply(int epoch, int size, char *msg) server.reply_buffer.erase(epoch); } -void Main::execute_operation(int epoch, int size, char *cmd) -{ - // first delete arrays +void Main::execute_operation(int epoch, int size, char *cmd) { uint32_t num_deletions = extract(cmd); - // CkPrintf("Num deletions = %u\n", num_deletions); - CkPrintf("Memory usage before delete is %f MB\n", CmiMemoryUsage() / (1024. * 1024.)); for (int i = 0; i < num_deletions; i++) - { - ct_name_t name = extract(cmd); - remove(name); - } - CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); + remove(extract(cmd)); char* dimPos = cmd + sizeof(uint8_t); if (peek(cmd) == 1) process_scalar(cmd); else if (peek(dimPos) == 1) process_tensor(cmd); @@ -201,8 +192,7 @@ void Main::execute_creation(int epoch, int size, char *cmd) { case 0: { - // create scalar - CmiAbort("Not implemented"); + CmiAbort("Scalars can only be made through reduction ops and matmuls"); } case 1: { @@ -212,16 +202,16 @@ void Main::execute_creation(int epoch, int size, char *cmd) if (has_buf) { double *init_buf = (double *)cmd; - res = ct::from_vector(init_buf, size); + res = ct::from_vector_unique(init_buf, size); } else if (has_init) { double init_value = extract(cmd); - res = ct::vector(size, init_value); + res = std::make_unique(size, init_value); } else { - res = ct::vector(size); + res = std::make_unique(size); } insert(res_name, std::move(res)); break; @@ -235,23 +225,22 @@ void Main::execute_creation(int epoch, int size, char *cmd) if (has_buf) { double *init_buf = (double *)cmd; - res = ct::from_matrix(init_buf, size1, size2); + res = ct::from_matrix_unique(init_buf, size1, size2); } else if (has_init) { double init_value = extract(cmd); - res = ct::matrix(size1, size2, init_value); + res = std::make_unique(size1, size2, init_value); } else { - res = ct::matrix(size1, size2); + res = std::make_unique(size1, size2); } insert(res_name, std::move(res)); break; } default: { - // FIXME is this correctly caught? CmiAbort("Greater than 2 dimensions not supported"); } } @@ -264,44 +253,41 @@ void Main::execute_fetch(int epoch, int size, char *cmd) char *reply = nullptr; int reply_size = 0; std::visit( - [&](auto &x) + [&](auto &x) + { + using T = std::decay_t; + if constexpr (std::is_same_v) { - using T = std::decay_t; - if constexpr (std::is_same_v) - { - reply = (char *)&x; - reply_size += 8; - send_reply(epoch, reply_size, reply); - } - else if constexpr (std::is_same_v) - { - std::vector values = x.get(); - reply = (char *)values.data(); - reply_size += values.size() * sizeof(double); - send_reply(epoch, reply_size, reply); - } - else if constexpr (std::is_same_v) - { - std::vector> values = x.get(); - std::vector flat; - for (const auto &row : values) - flat.insert(flat.end(), row.begin(), row.end()); - reply = reinterpret_cast(flat.data()); - reply_size += flat.size() * sizeof(double); - send_reply(epoch, reply_size, reply); - } - }, - arr); + reply = (char *)&x; + reply_size += 8; + send_reply(epoch, reply_size, reply); + } + else if constexpr (std::is_same_v>) + { + std::vector values = x->get(); + reply = (char *)values.data(); + reply_size += values.size() * sizeof(double); + send_reply(epoch, reply_size, reply); + } + else if constexpr (std::is_same_v>) + { + std::vector> values = x->get(); + std::vector flat; + for (const auto &row : values) + flat.insert(flat.end(), row.begin(), row.end()); + reply = reinterpret_cast(flat.data()); + reply_size += flat.size() * sizeof(double); + send_reply(epoch, reply_size, reply); + } + }, + arr); } void Main::execute_delete(int epoch, int size, char *cmd) { uint32_t num_deletions = extract(cmd); for (int i = 0; i < num_deletions; i++) - { - ct_name_t name = extract(cmd); - remove(name); - } + remove(extract(cmd)); } void Main::execute_disconnect(int epoch, int size, char *cmd) From f068b6c81584a857832948b89d4f80bd835ea21c Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Fri, 17 Oct 2025 21:14:20 +0530 Subject: [PATCH 22/34] nit --- charmnumeric/ast.py | 2 +- examples/black-scholes.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 486b5d2..f8d3262 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -32,7 +32,7 @@ def compile_wrapper(*args, **kwargs): class ASTNode(object): def __init__(self, name, opcode, operands, args=[]): - from charmtiles.array import ndarray + from charmnumeric.array import ndarray global multiLineFuse # contains opcode, operands # operands are ndarrays diff --git a/examples/black-scholes.py b/examples/black-scholes.py index 098a020..e94ba03 100644 --- a/examples/black-scholes.py +++ b/examples/black-scholes.py @@ -1,7 +1,7 @@ -from charmtiles.array import connect, ndarray -import charmtiles.linalg as lg -from charmtiles.ccs import sync -from charmtiles.ast import set_max_depth +from charmnumeric.array import connect, ndarray +import charmnumeric.linalg as lg +from charmnumeric.ccs import sync +from charmnumeric.ast import set_max_depth import time import numpy as np import gc From dd4d3aefa577e1cf2b34954ec5abb1d9d8e4c09b Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Fri, 17 Oct 2025 21:38:23 +0530 Subject: [PATCH 23/34] Revert "nit" This reverts commit f068b6c81584a857832948b89d4f80bd835ea21c. --- charmnumeric/ast.py | 2 +- examples/black-scholes.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index f8d3262..486b5d2 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -32,7 +32,7 @@ def compile_wrapper(*args, **kwargs): class ASTNode(object): def __init__(self, name, opcode, operands, args=[]): - from charmnumeric.array import ndarray + from charmtiles.array import ndarray global multiLineFuse # contains opcode, operands # operands are ndarrays diff --git a/examples/black-scholes.py b/examples/black-scholes.py index e94ba03..098a020 100644 --- a/examples/black-scholes.py +++ b/examples/black-scholes.py @@ -1,7 +1,7 @@ -from charmnumeric.array import connect, ndarray -import charmnumeric.linalg as lg -from charmnumeric.ccs import sync -from charmnumeric.ast import set_max_depth +from charmtiles.array import connect, ndarray +import charmtiles.linalg as lg +from charmtiles.ccs import sync +from charmtiles.ast import set_max_depth import time import numpy as np import gc From d6bf0ae8fcc6e032bb50776960bf25370382d7eb Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Fri, 17 Oct 2025 21:38:33 +0530 Subject: [PATCH 24/34] Revert "dealloc (#9)" This reverts commit 62eb3ef43b836209e781bcb5ec4658246b11ddbf. --- charmnumeric/array.py | 28 ++++++++++- charmnumeric/ast.py | 10 ++-- charmnumeric/ccs.py | 4 +- examples/conjugate_gradient.py | 19 +++----- examples/graph.py | 24 ++++------ setup.py | 11 +++++ src/CMakeLists.txt | 12 ++--- src/ast.hpp | 79 +++++++++++++++--------------- src/server.cpp | 88 ++++++++++++++++++++-------------- 9 files changed, 155 insertions(+), 120 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 19052a1..0f80220 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -96,6 +96,20 @@ def __del__(self): def __len__(self): return self.shape[0] + #def __str__(self): + # print(self.get()) + + #def __repr__(self): + # #self._flush_command_buffer() + # # FIXME add repr + # pass + + def __setitem__(self, key, value): + if not isinstance(key, slice) or key.start != None or \ + key.stop != None or key.step != None: + raise ValueError("Can't set items or slices") + self.cmd_buffer = ASTNode(res, OPCODES.get('setitem'), [self, value]) + def __neg__(self): return self * -1 @@ -269,13 +283,23 @@ def _flush_command_buffer(self): self.command_buffer.plot_graph() if self.valid: return - cmd = self.command_buffer.get_command(self.ndim, self.shape, is_scalar=self.is_scalar) + validated_arrays = {self.name : self} + cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape, is_scalar=self.is_scalar) + reply_size = 0 + for name, arr in validated_arrays.items(): + reply_size += 8 + 8 * arr.ndim if not debug: cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.operation_handler, cmd) deletion_buffer = b'' deletion_buffer_size = 0 + for i in range(len(validated_arrays)): + arr = validated_arrays[name] + arr.validate() + else: + for name, arr in validated_arrays.items(): + arr.validate() self.validate() def get(self): @@ -297,7 +321,7 @@ def evaluate(self): def validate(self): self.valid = True - self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) + self.command_buffer = ASTNode(self.name, 0, [self]) def copy(self): res = get_name() diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 486b5d2..6180f01 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -59,7 +59,7 @@ def __init__(self, name, opcode, operands, args=[]): # | 8 | 64 | 64 | # # NB: Latter encoding for double constants # ################################################################################################################################################################# - def get_command(self, ndim, shape, save=True, is_scalar=False): + def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): from charmnumeric.array import ndarray # Ndims and Shape setup @@ -83,7 +83,7 @@ def get_command(self, ndim, shape, save=True, is_scalar=False): cmd += to_bytes(len(self.operands), 'B') for op in self.operands: if isinstance(op, ndarray): - if op.valid: + if op.name in validated_arrays: if op.is_scalar: opcmd = to_bytes(1, 'B') else: @@ -94,9 +94,9 @@ def get_command(self, ndim, shape, save=True, is_scalar=False): opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - opcmd = op.command_buffer.get_command(op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) - if save_op: - op.validate() + opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) + if not op.valid and save_op: + validated_arrays[op.name] = op elif isinstance(op, float) or isinstance(op, int): opcmd = to_bytes(0, 'B') for _shape in shape: diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 3eb8c5a..43516b6 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -17,6 +17,8 @@ '/': 4, '@': 5, 'copy': 6, + 'setitem': 7, + 'pow': 8, '>': 9, '<': 10, '>=': 11, @@ -115,7 +117,7 @@ def connect(server_ip, server_port): def disconnect(): from charmnumeric.array import deletion_buffer, deletion_buffer_size - global client_id + global client_id, deletion_buffer, deletion_buffer_size if deletion_buffer_size > 0: cmd = to_bytes(len(deletion_buffer), 'I') + deletion_buffer cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd diff --git a/examples/conjugate_gradient.py b/examples/conjugate_gradient.py index 90e73d6..0309036 100644 --- a/examples/conjugate_gradient.py +++ b/examples/conjugate_gradient.py @@ -28,16 +28,11 @@ def solve(A, b, x): if __name__ == '__main__': connect("172.17.0.1", 10000) - n = int(1e4) + n = 184 - A = ndarray(2, (n, n), np.float64, init_value = 1e-6) - b = ndarray(1, n, np.float64, init_value = 1e-6) - x = ndarray(1, n, np.float64, init_value = 1e-6) - - # Pre-Compilation - _ = solve(A, b, x) - __ = _.get() - print(__) + A = ndarray(2, (n, n), np.float64, init_value = 1e-4) + b = ndarray(1, n, np.float64, init_value = 1e-4) + x = ndarray(1, n, np.float64, init_value = 1e-4) start = time.time() x = solve(A, b, x) @@ -45,9 +40,9 @@ def solve(A, b, x): print("Execution time (Charm) = %.6f s" % (time.time() - start)) # Initialize all arrays to 1 - A = np.ones((n, n), dtype=np.float64) * 1e-6 - b = np.ones(n, dtype=np.float64) * 1e-6 - x = np.ones(n, dtype=np.float64) * 1e-6 + A = np.ones((n, n), dtype=np.float64) * 1e-4 + b = np.ones(n, dtype=np.float64) * 1e-4 + x = np.ones(n, dtype=np.float64) * 1e-4 start = time.time() x_np = solve(A, b, x) diff --git a/examples/graph.py b/examples/graph.py index 853c039..caa36e0 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -11,12 +11,8 @@ def f(): v = ndarray(1, 10, np.float64, init_value=2) b = ndarray(1, 10, np.float64, init_value=1) c = ndarray(1, 10, np.float64, init_value=3) - for _ in range(2): - k = v * 2 + b + 3 + c - 32 - l = k >= 42 - l.get() - print(l.get()) - # print(l.get()) + # k = v * 2 + b + 3 + c - 32 + # l = k >= 42 # v1 = v @ b # v1 = (b + c) @ (b - c) # q.get() @@ -36,14 +32,14 @@ def f(): # final_res = res + 42 # q.get() - # a1 = b @ c - # print(a1.get()) - # a2 = v @ c - # print(a2.get()) - # res = (a1 / a2) * b + c - # v = 2 - # # a3 = a1 + a2 - # print(v.get()) + a1 = b @ c + print(a1.get()) + a2 = v @ c + print(a2.get()) + res = (a1 / a2) * b + c + v = 2 + # a3 = a1 + a2 + print(v.get()) # res = a3 * v # w = c @ b # w.get() diff --git a/setup.py b/setup.py index 9c0b53a..8071e21 100644 --- a/setup.py +++ b/setup.py @@ -10,6 +10,15 @@ def get_version(): exec(compile(open(fname).read(), fname, 'exec'), data) return data.get('__version__') + +def compile_server(): + charmc = os.environ.get('CHARMC', + '/home/adityapb/charm/charm/netlrts-linux-x86_64/bin/charmc') + aum_base = os.environ.get('AUM_HOME', '/home/adityapb/charm/LibCharmtyles') + subprocess.run(["make", "-C", "src/", + "CHARMC=%s" % charmc, "BASE_DIR=%s" % aum_base]) + + install_requires = ['numpy', 'charm4py'] tests_require = ['pytest'] docs_require = ['sphinx'] @@ -30,6 +39,8 @@ def get_version(): ''' classifiers = [x.strip() for x in classes.splitlines() if x] +compile_server() + setup( name='charmnumeric', version=get_version(), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7308a44..2eeb08e 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,16 +40,10 @@ if(Charm_ENABLE_GPU) target_include_directories(server PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}/include ${CHARM_DIR}/include) target_link_libraries(server Kokkos::kokkos) - add_custom_command( - OUTPUT "${CMAKE_BINARY_DIR}/server.out" - COMMAND ${CHARMC} ${GPU_LINK_OPTS} $ -o ${CMAKE_BINARY_DIR}/server.out + add_custom_command(result ALL + COMMAND ${CHARMC} ${GPU_LINK_OPTS} $:server> -o ${CMAKE_BINARY_DIR}/server.out DEPENDS server - COMMENT "Linking charm build against kokkos and cuda" - ) - - add_custom_target(result ALL - DEPENDS "${CMAKE_BINARY_DIR}/server.out" - ) + COMMENT "Linking charm build against kokkos and cuda") else() add_executable(server.out server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) target_include_directories(server.out PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}) diff --git a/src/ast.hpp b/src/ast.hpp index b3ab573..709585c 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -7,11 +7,10 @@ #include #include #include -#include using ctop = ct::util::Operation; using ct_name_t = uint64_t; -using ct_array_t = std::variant, std::unique_ptr>; +using ct_array_t = std::variant; std::unordered_map symbol_table; inline static void insert(ct_name_t name, ct_array_t arr) { @@ -86,25 +85,25 @@ ctop inline to_ctop(uint64_t opcode) noexcept { return ctop::matmul; case 6: return ctop::copy; - case 9: + case 11: return ctop::greater; - case 10: + case 12: return ctop::lesser; - case 11: + case 13: return ctop::geq; - case 12: + case 14: return ctop::leq; - case 13: + case 15: return ctop::eq; - case 14: + case 16: return ctop::neq; - case 15: + case 17: return ctop::logical_and; - case 16: + case 18: return ctop::logical_or; - case 17: + case 19: return ctop::logical_not; - case 18: + case 20: return ctop::where; default: return ctop::noop; @@ -218,10 +217,10 @@ double process_scalar(char *cmd) { const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto &x = std::get>(lookup(xID)); - const auto &y = std::get>(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); - ct::scalar tensor0D = ct::dot(*x, *y); + ct::scalar tensor0D = ct::dot(x, y); double result = tensor0D.get(); insert(tensorID, result); return result; @@ -356,8 +355,8 @@ std::vector process_tensor(char *cmd, bool flush) { uint64_t tensorID = extract(cmd); if (opcode == 0) { - const auto &tmp = std::get>(lookup(tensorID)); - return (*tmp)(); + const auto &tmp = std::get(lookup(tensorID)); + return tmp(); } bool multiLineFuse = extract(cmd); @@ -404,10 +403,10 @@ std::vector process_tensor(char *cmd, bool flush) { const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto &x = std::get>(lookup(xID)); - const auto &y = std::get>(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); - ct::scalar tensor0D = ct::dot(*x, *y); + ct::scalar tensor0D = ct::dot(x, y); double result = tensor0D.get(); insert(tensorID, result); @@ -415,31 +414,31 @@ std::vector process_tensor(char *cmd, bool flush) { return {temp_node}; } else if constexpr (std::is_same_v) { if (xDim == 1 and yDim == 2) { - const auto &x = std::get>(lookup(xID)); - const auto &y = std::get>(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); - std::unique_ptr tensor = std::make_unique(std::move(ct::dot(*x, *y))); - const auto &tensorNode = (*tensor)(); + ct::vector tensor = ct::dot(x, y); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } else if (xDim == 2 and yDim == 1) { - const auto &x = std::get>(lookup(xID)); - const auto &y = std::get>(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); - std::unique_ptr tensor = std::make_unique(std::move(ct::dot(*x, *y))); - const auto &tensorNode = (*tensor)(); + ct::vector tensor = ct::dot(x, y); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } } else if constexpr (std::is_same_v) { if (xDim == 2 and yDim == 2) { - const auto &x = std::get>(lookup(xID)); - const auto &y = std::get>(lookup(yID)); + const auto &x = std::get(lookup(xID)); + const auto &y = std::get(lookup(yID)); - std::unique_ptr tensor = std::make_unique(std::move(ct::matmul(*x, *y))); - const auto &tensorNode = (*tensor)(); + ct::matrix tensor = ct::matmul(x, y); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; @@ -452,15 +451,15 @@ std::vector process_tensor(char *cmd, bool flush) { cmd += operand_size; const uint64_t ©ID = copyOperandInfo.second; - const auto © = std::get>(lookup(copyID)); - std::unique_ptr tensor = std::make_unique(*copy); + const auto © = std::get(lookup(copyID)); + tensorType tensor(copy); - const auto &tensorNode = (*tensor)(); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } - if (numOperands == 1) { + if(numOperands == 1){ uint32_t operand_size = extract(cmd); std::vector left = process_tensor(cmd); cmd += operand_size; @@ -481,8 +480,8 @@ std::vector process_tensor(char *cmd, bool flush) { if (ast[i].ter_ != -1) { ast[i].ter_ += 1; } - } - } else if (numOperands == 2) { + } + } else if(numOperands == 2) { uint32_t operand_size = extract(cmd); std::vector left = process_tensor(cmd); cmd += operand_size; @@ -585,8 +584,8 @@ std::vector process_tensor(char *cmd, bool flush) { } if (store or flush) { - std::unique_ptr tensor = std::make_unique(ast); - const auto &tensorNode = (*tensor)(); + tensorType tensor(ast); + const auto &tensorNode = tensor(); insert(tensorID, std::move(tensor)); return tensorNode; } diff --git a/src/server.cpp b/src/server.cpp index 89ade04..2a69062 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -105,6 +105,7 @@ void Main::handle_command(int epoch, uint8_t kind, uint32_t size, char *cmd) while (!command_buffer.empty() && std::get<0>(command_buffer.top()) == EPOCH) { buffer_t buffer = command_buffer.top(); + // CkPrintf("Executing buffered at epoch %i, current %i\n", std::get<0>(buffer), EPOCH); execute_command(std::get<0>(buffer), std::get<1>(buffer), (int)size, std::get<2>(buffer)); free(std::get<2>(buffer)); command_buffer.pop(); @@ -129,10 +130,18 @@ void Main::send_reply(int epoch, int size, char *msg) server.reply_buffer.erase(epoch); } -void Main::execute_operation(int epoch, int size, char *cmd) { +void Main::execute_operation(int epoch, int size, char *cmd) +{ + // first delete arrays uint32_t num_deletions = extract(cmd); + // CkPrintf("Num deletions = %u\n", num_deletions); + CkPrintf("Memory usage before delete is %f MB\n", CmiMemoryUsage() / (1024. * 1024.)); for (int i = 0; i < num_deletions; i++) - remove(extract(cmd)); + { + ct_name_t name = extract(cmd); + remove(name); + } + CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); char* dimPos = cmd + sizeof(uint8_t); if (peek(cmd) == 1) process_scalar(cmd); else if (peek(dimPos) == 1) process_tensor(cmd); @@ -192,7 +201,8 @@ void Main::execute_creation(int epoch, int size, char *cmd) { case 0: { - CmiAbort("Scalars can only be made through reduction ops and matmuls"); + // create scalar + CmiAbort("Not implemented"); } case 1: { @@ -202,16 +212,16 @@ void Main::execute_creation(int epoch, int size, char *cmd) if (has_buf) { double *init_buf = (double *)cmd; - res = ct::from_vector_unique(init_buf, size); + res = ct::from_vector(init_buf, size); } else if (has_init) { double init_value = extract(cmd); - res = std::make_unique(size, init_value); + res = ct::vector(size, init_value); } else { - res = std::make_unique(size); + res = ct::vector(size); } insert(res_name, std::move(res)); break; @@ -225,22 +235,23 @@ void Main::execute_creation(int epoch, int size, char *cmd) if (has_buf) { double *init_buf = (double *)cmd; - res = ct::from_matrix_unique(init_buf, size1, size2); + res = ct::from_matrix(init_buf, size1, size2); } else if (has_init) { double init_value = extract(cmd); - res = std::make_unique(size1, size2, init_value); + res = ct::matrix(size1, size2, init_value); } else { - res = std::make_unique(size1, size2); + res = ct::matrix(size1, size2); } insert(res_name, std::move(res)); break; } default: { + // FIXME is this correctly caught? CmiAbort("Greater than 2 dimensions not supported"); } } @@ -253,41 +264,44 @@ void Main::execute_fetch(int epoch, int size, char *cmd) char *reply = nullptr; int reply_size = 0; std::visit( - [&](auto &x) - { - using T = std::decay_t; - if constexpr (std::is_same_v) + [&](auto &x) { - reply = (char *)&x; - reply_size += 8; - send_reply(epoch, reply_size, reply); - } - else if constexpr (std::is_same_v>) - { - std::vector values = x->get(); - reply = (char *)values.data(); - reply_size += values.size() * sizeof(double); - send_reply(epoch, reply_size, reply); - } - else if constexpr (std::is_same_v>) - { - std::vector> values = x->get(); - std::vector flat; - for (const auto &row : values) - flat.insert(flat.end(), row.begin(), row.end()); - reply = reinterpret_cast(flat.data()); - reply_size += flat.size() * sizeof(double); - send_reply(epoch, reply_size, reply); - } - }, - arr); + using T = std::decay_t; + if constexpr (std::is_same_v) + { + reply = (char *)&x; + reply_size += 8; + send_reply(epoch, reply_size, reply); + } + else if constexpr (std::is_same_v) + { + std::vector values = x.get(); + reply = (char *)values.data(); + reply_size += values.size() * sizeof(double); + send_reply(epoch, reply_size, reply); + } + else if constexpr (std::is_same_v) + { + std::vector> values = x.get(); + std::vector flat; + for (const auto &row : values) + flat.insert(flat.end(), row.begin(), row.end()); + reply = reinterpret_cast(flat.data()); + reply_size += flat.size() * sizeof(double); + send_reply(epoch, reply_size, reply); + } + }, + arr); } void Main::execute_delete(int epoch, int size, char *cmd) { uint32_t num_deletions = extract(cmd); for (int i = 0; i < num_deletions; i++) - remove(extract(cmd)); + { + ct_name_t name = extract(cmd); + remove(name); + } } void Main::execute_disconnect(int epoch, int size, char *cmd) From 9820a5092511d5a78f5aa628ef90574a7452a31b Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sat, 18 Oct 2025 14:13:16 +0530 Subject: [PATCH 25/34] Reapply "dealloc (#9)" This reverts commit d6bf0ae8fcc6e032bb50776960bf25370382d7eb. --- charmnumeric/array.py | 28 +---------- charmnumeric/ast.py | 10 ++-- charmnumeric/ccs.py | 4 +- examples/conjugate_gradient.py | 19 +++++--- examples/graph.py | 24 ++++++---- setup.py | 11 ----- src/CMakeLists.txt | 12 +++-- src/ast.hpp | 79 +++++++++++++++--------------- src/server.cpp | 88 ++++++++++++++-------------------- 9 files changed, 120 insertions(+), 155 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 0f80220..19052a1 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -96,20 +96,6 @@ def __del__(self): def __len__(self): return self.shape[0] - #def __str__(self): - # print(self.get()) - - #def __repr__(self): - # #self._flush_command_buffer() - # # FIXME add repr - # pass - - def __setitem__(self, key, value): - if not isinstance(key, slice) or key.start != None or \ - key.stop != None or key.step != None: - raise ValueError("Can't set items or slices") - self.cmd_buffer = ASTNode(res, OPCODES.get('setitem'), [self, value]) - def __neg__(self): return self * -1 @@ -283,23 +269,13 @@ def _flush_command_buffer(self): self.command_buffer.plot_graph() if self.valid: return - validated_arrays = {self.name : self} - cmd = self.command_buffer.get_command(validated_arrays, self.ndim, self.shape, is_scalar=self.is_scalar) - reply_size = 0 - for name, arr in validated_arrays.items(): - reply_size += 8 + 8 * arr.ndim + cmd = self.command_buffer.get_command(self.ndim, self.shape, is_scalar=self.is_scalar) if not debug: cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.operation_handler, cmd) deletion_buffer = b'' deletion_buffer_size = 0 - for i in range(len(validated_arrays)): - arr = validated_arrays[name] - arr.validate() - else: - for name, arr in validated_arrays.items(): - arr.validate() self.validate() def get(self): @@ -321,7 +297,7 @@ def evaluate(self): def validate(self): self.valid = True - self.command_buffer = ASTNode(self.name, 0, [self]) + self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) def copy(self): res = get_name() diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 6180f01..486b5d2 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -59,7 +59,7 @@ def __init__(self, name, opcode, operands, args=[]): # | 8 | 64 | 64 | # # NB: Latter encoding for double constants # ################################################################################################################################################################# - def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False): + def get_command(self, ndim, shape, save=True, is_scalar=False): from charmnumeric.array import ndarray # Ndims and Shape setup @@ -83,7 +83,7 @@ def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False) cmd += to_bytes(len(self.operands), 'B') for op in self.operands: if isinstance(op, ndarray): - if op.name in validated_arrays: + if op.valid: if op.is_scalar: opcmd = to_bytes(1, 'B') else: @@ -94,9 +94,9 @@ def get_command(self, validated_arrays, ndim, shape, save=True, is_scalar=False) opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False - opcmd = op.command_buffer.get_command(validated_arrays, op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) - if not op.valid and save_op: - validated_arrays[op.name] = op + opcmd = op.command_buffer.get_command(op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) + if save_op: + op.validate() elif isinstance(op, float) or isinstance(op, int): opcmd = to_bytes(0, 'B') for _shape in shape: diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 43516b6..3eb8c5a 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -17,8 +17,6 @@ '/': 4, '@': 5, 'copy': 6, - 'setitem': 7, - 'pow': 8, '>': 9, '<': 10, '>=': 11, @@ -117,7 +115,7 @@ def connect(server_ip, server_port): def disconnect(): from charmnumeric.array import deletion_buffer, deletion_buffer_size - global client_id, deletion_buffer, deletion_buffer_size + global client_id if deletion_buffer_size > 0: cmd = to_bytes(len(deletion_buffer), 'I') + deletion_buffer cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd diff --git a/examples/conjugate_gradient.py b/examples/conjugate_gradient.py index 0309036..90e73d6 100644 --- a/examples/conjugate_gradient.py +++ b/examples/conjugate_gradient.py @@ -28,11 +28,16 @@ def solve(A, b, x): if __name__ == '__main__': connect("172.17.0.1", 10000) - n = 184 + n = int(1e4) - A = ndarray(2, (n, n), np.float64, init_value = 1e-4) - b = ndarray(1, n, np.float64, init_value = 1e-4) - x = ndarray(1, n, np.float64, init_value = 1e-4) + A = ndarray(2, (n, n), np.float64, init_value = 1e-6) + b = ndarray(1, n, np.float64, init_value = 1e-6) + x = ndarray(1, n, np.float64, init_value = 1e-6) + + # Pre-Compilation + _ = solve(A, b, x) + __ = _.get() + print(__) start = time.time() x = solve(A, b, x) @@ -40,9 +45,9 @@ def solve(A, b, x): print("Execution time (Charm) = %.6f s" % (time.time() - start)) # Initialize all arrays to 1 - A = np.ones((n, n), dtype=np.float64) * 1e-4 - b = np.ones(n, dtype=np.float64) * 1e-4 - x = np.ones(n, dtype=np.float64) * 1e-4 + A = np.ones((n, n), dtype=np.float64) * 1e-6 + b = np.ones(n, dtype=np.float64) * 1e-6 + x = np.ones(n, dtype=np.float64) * 1e-6 start = time.time() x_np = solve(A, b, x) diff --git a/examples/graph.py b/examples/graph.py index caa36e0..853c039 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -11,8 +11,12 @@ def f(): v = ndarray(1, 10, np.float64, init_value=2) b = ndarray(1, 10, np.float64, init_value=1) c = ndarray(1, 10, np.float64, init_value=3) - # k = v * 2 + b + 3 + c - 32 - # l = k >= 42 + for _ in range(2): + k = v * 2 + b + 3 + c - 32 + l = k >= 42 + l.get() + print(l.get()) + # print(l.get()) # v1 = v @ b # v1 = (b + c) @ (b - c) # q.get() @@ -32,14 +36,14 @@ def f(): # final_res = res + 42 # q.get() - a1 = b @ c - print(a1.get()) - a2 = v @ c - print(a2.get()) - res = (a1 / a2) * b + c - v = 2 - # a3 = a1 + a2 - print(v.get()) + # a1 = b @ c + # print(a1.get()) + # a2 = v @ c + # print(a2.get()) + # res = (a1 / a2) * b + c + # v = 2 + # # a3 = a1 + a2 + # print(v.get()) # res = a3 * v # w = c @ b # w.get() diff --git a/setup.py b/setup.py index 8071e21..9c0b53a 100644 --- a/setup.py +++ b/setup.py @@ -10,15 +10,6 @@ def get_version(): exec(compile(open(fname).read(), fname, 'exec'), data) return data.get('__version__') - -def compile_server(): - charmc = os.environ.get('CHARMC', - '/home/adityapb/charm/charm/netlrts-linux-x86_64/bin/charmc') - aum_base = os.environ.get('AUM_HOME', '/home/adityapb/charm/LibCharmtyles') - subprocess.run(["make", "-C", "src/", - "CHARMC=%s" % charmc, "BASE_DIR=%s" % aum_base]) - - install_requires = ['numpy', 'charm4py'] tests_require = ['pytest'] docs_require = ['sphinx'] @@ -39,8 +30,6 @@ def compile_server(): ''' classifiers = [x.strip() for x in classes.splitlines() if x] -compile_server() - setup( name='charmnumeric', version=get_version(), diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 2eeb08e..7308a44 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -40,10 +40,16 @@ if(Charm_ENABLE_GPU) target_include_directories(server PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}/include ${CHARM_DIR}/include) target_link_libraries(server Kokkos::kokkos) - add_custom_command(result ALL - COMMAND ${CHARMC} ${GPU_LINK_OPTS} $:server> -o ${CMAKE_BINARY_DIR}/server.out + add_custom_command( + OUTPUT "${CMAKE_BINARY_DIR}/server.out" + COMMAND ${CHARMC} ${GPU_LINK_OPTS} $ -o ${CMAKE_BINARY_DIR}/server.out DEPENDS server - COMMENT "Linking charm build against kokkos and cuda") + COMMENT "Linking charm build against kokkos and cuda" + ) + + add_custom_target(result ALL + DEPENDS "${CMAKE_BINARY_DIR}/server.out" + ) else() add_executable(server.out server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) target_include_directories(server.out PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}) diff --git a/src/ast.hpp b/src/ast.hpp index 709585c..b3ab573 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -7,10 +7,11 @@ #include #include #include +#include using ctop = ct::util::Operation; using ct_name_t = uint64_t; -using ct_array_t = std::variant; +using ct_array_t = std::variant, std::unique_ptr>; std::unordered_map symbol_table; inline static void insert(ct_name_t name, ct_array_t arr) { @@ -85,25 +86,25 @@ ctop inline to_ctop(uint64_t opcode) noexcept { return ctop::matmul; case 6: return ctop::copy; - case 11: + case 9: return ctop::greater; - case 12: + case 10: return ctop::lesser; - case 13: + case 11: return ctop::geq; - case 14: + case 12: return ctop::leq; - case 15: + case 13: return ctop::eq; - case 16: + case 14: return ctop::neq; - case 17: + case 15: return ctop::logical_and; - case 18: + case 16: return ctop::logical_or; - case 19: + case 17: return ctop::logical_not; - case 20: + case 18: return ctop::where; default: return ctop::noop; @@ -217,10 +218,10 @@ double process_scalar(char *cmd) { const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::scalar tensor0D = ct::dot(x, y); + ct::scalar tensor0D = ct::dot(*x, *y); double result = tensor0D.get(); insert(tensorID, result); return result; @@ -355,8 +356,8 @@ std::vector process_tensor(char *cmd, bool flush) { uint64_t tensorID = extract(cmd); if (opcode == 0) { - const auto &tmp = std::get(lookup(tensorID)); - return tmp(); + const auto &tmp = std::get>(lookup(tensorID)); + return (*tmp)(); } bool multiLineFuse = extract(cmd); @@ -403,10 +404,10 @@ std::vector process_tensor(char *cmd, bool flush) { const uint64_t &yID = yOperandInfo.second; if (xDim == 1 and yDim == 1) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::scalar tensor0D = ct::dot(x, y); + ct::scalar tensor0D = ct::dot(*x, *y); double result = tensor0D.get(); insert(tensorID, result); @@ -414,31 +415,31 @@ std::vector process_tensor(char *cmd, bool flush) { return {temp_node}; } else if constexpr (std::is_same_v) { if (xDim == 1 and yDim == 2) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::vector tensor = ct::dot(x, y); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(std::move(ct::dot(*x, *y))); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } else if (xDim == 2 and yDim == 1) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::vector tensor = ct::dot(x, y); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(std::move(ct::dot(*x, *y))); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } } else if constexpr (std::is_same_v) { if (xDim == 2 and yDim == 2) { - const auto &x = std::get(lookup(xID)); - const auto &y = std::get(lookup(yID)); + const auto &x = std::get>(lookup(xID)); + const auto &y = std::get>(lookup(yID)); - ct::matrix tensor = ct::matmul(x, y); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(std::move(ct::matmul(*x, *y))); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; @@ -451,15 +452,15 @@ std::vector process_tensor(char *cmd, bool flush) { cmd += operand_size; const uint64_t ©ID = copyOperandInfo.second; - const auto © = std::get(lookup(copyID)); - tensorType tensor(copy); + const auto © = std::get>(lookup(copyID)); + std::unique_ptr tensor = std::make_unique(*copy); - const auto &tensorNode = tensor(); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } - if(numOperands == 1){ + if (numOperands == 1) { uint32_t operand_size = extract(cmd); std::vector left = process_tensor(cmd); cmd += operand_size; @@ -480,8 +481,8 @@ std::vector process_tensor(char *cmd, bool flush) { if (ast[i].ter_ != -1) { ast[i].ter_ += 1; } - } - } else if(numOperands == 2) { + } + } else if (numOperands == 2) { uint32_t operand_size = extract(cmd); std::vector left = process_tensor(cmd); cmd += operand_size; @@ -584,8 +585,8 @@ std::vector process_tensor(char *cmd, bool flush) { } if (store or flush) { - tensorType tensor(ast); - const auto &tensorNode = tensor(); + std::unique_ptr tensor = std::make_unique(ast); + const auto &tensorNode = (*tensor)(); insert(tensorID, std::move(tensor)); return tensorNode; } diff --git a/src/server.cpp b/src/server.cpp index 2a69062..89ade04 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -105,7 +105,6 @@ void Main::handle_command(int epoch, uint8_t kind, uint32_t size, char *cmd) while (!command_buffer.empty() && std::get<0>(command_buffer.top()) == EPOCH) { buffer_t buffer = command_buffer.top(); - // CkPrintf("Executing buffered at epoch %i, current %i\n", std::get<0>(buffer), EPOCH); execute_command(std::get<0>(buffer), std::get<1>(buffer), (int)size, std::get<2>(buffer)); free(std::get<2>(buffer)); command_buffer.pop(); @@ -130,18 +129,10 @@ void Main::send_reply(int epoch, int size, char *msg) server.reply_buffer.erase(epoch); } -void Main::execute_operation(int epoch, int size, char *cmd) -{ - // first delete arrays +void Main::execute_operation(int epoch, int size, char *cmd) { uint32_t num_deletions = extract(cmd); - // CkPrintf("Num deletions = %u\n", num_deletions); - CkPrintf("Memory usage before delete is %f MB\n", CmiMemoryUsage() / (1024. * 1024.)); for (int i = 0; i < num_deletions; i++) - { - ct_name_t name = extract(cmd); - remove(name); - } - CkPrintf("Memory usage after %u deletions is %f MB\n", num_deletions, CmiMemoryUsage() / (1024. * 1024.)); + remove(extract(cmd)); char* dimPos = cmd + sizeof(uint8_t); if (peek(cmd) == 1) process_scalar(cmd); else if (peek(dimPos) == 1) process_tensor(cmd); @@ -201,8 +192,7 @@ void Main::execute_creation(int epoch, int size, char *cmd) { case 0: { - // create scalar - CmiAbort("Not implemented"); + CmiAbort("Scalars can only be made through reduction ops and matmuls"); } case 1: { @@ -212,16 +202,16 @@ void Main::execute_creation(int epoch, int size, char *cmd) if (has_buf) { double *init_buf = (double *)cmd; - res = ct::from_vector(init_buf, size); + res = ct::from_vector_unique(init_buf, size); } else if (has_init) { double init_value = extract(cmd); - res = ct::vector(size, init_value); + res = std::make_unique(size, init_value); } else { - res = ct::vector(size); + res = std::make_unique(size); } insert(res_name, std::move(res)); break; @@ -235,23 +225,22 @@ void Main::execute_creation(int epoch, int size, char *cmd) if (has_buf) { double *init_buf = (double *)cmd; - res = ct::from_matrix(init_buf, size1, size2); + res = ct::from_matrix_unique(init_buf, size1, size2); } else if (has_init) { double init_value = extract(cmd); - res = ct::matrix(size1, size2, init_value); + res = std::make_unique(size1, size2, init_value); } else { - res = ct::matrix(size1, size2); + res = std::make_unique(size1, size2); } insert(res_name, std::move(res)); break; } default: { - // FIXME is this correctly caught? CmiAbort("Greater than 2 dimensions not supported"); } } @@ -264,44 +253,41 @@ void Main::execute_fetch(int epoch, int size, char *cmd) char *reply = nullptr; int reply_size = 0; std::visit( - [&](auto &x) + [&](auto &x) + { + using T = std::decay_t; + if constexpr (std::is_same_v) { - using T = std::decay_t; - if constexpr (std::is_same_v) - { - reply = (char *)&x; - reply_size += 8; - send_reply(epoch, reply_size, reply); - } - else if constexpr (std::is_same_v) - { - std::vector values = x.get(); - reply = (char *)values.data(); - reply_size += values.size() * sizeof(double); - send_reply(epoch, reply_size, reply); - } - else if constexpr (std::is_same_v) - { - std::vector> values = x.get(); - std::vector flat; - for (const auto &row : values) - flat.insert(flat.end(), row.begin(), row.end()); - reply = reinterpret_cast(flat.data()); - reply_size += flat.size() * sizeof(double); - send_reply(epoch, reply_size, reply); - } - }, - arr); + reply = (char *)&x; + reply_size += 8; + send_reply(epoch, reply_size, reply); + } + else if constexpr (std::is_same_v>) + { + std::vector values = x->get(); + reply = (char *)values.data(); + reply_size += values.size() * sizeof(double); + send_reply(epoch, reply_size, reply); + } + else if constexpr (std::is_same_v>) + { + std::vector> values = x->get(); + std::vector flat; + for (const auto &row : values) + flat.insert(flat.end(), row.begin(), row.end()); + reply = reinterpret_cast(flat.data()); + reply_size += flat.size() * sizeof(double); + send_reply(epoch, reply_size, reply); + } + }, + arr); } void Main::execute_delete(int epoch, int size, char *cmd) { uint32_t num_deletions = extract(cmd); for (int i = 0; i < num_deletions; i++) - { - ct_name_t name = extract(cmd); - remove(name); - } + remove(extract(cmd)); } void Main::execute_disconnect(int epoch, int size, char *cmd) From 03e7db72e3e039a3679658cb0d051df756e6a711 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sat, 18 Oct 2025 14:14:58 +0530 Subject: [PATCH 26/34] Reapply "nit" This reverts commit dd4d3aefa577e1cf2b34954ec5abb1d9d8e4c09b. --- charmnumeric/ast.py | 2 +- examples/black-scholes.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 486b5d2..f8d3262 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -32,7 +32,7 @@ def compile_wrapper(*args, **kwargs): class ASTNode(object): def __init__(self, name, opcode, operands, args=[]): - from charmtiles.array import ndarray + from charmnumeric.array import ndarray global multiLineFuse # contains opcode, operands # operands are ndarrays diff --git a/examples/black-scholes.py b/examples/black-scholes.py index 098a020..e94ba03 100644 --- a/examples/black-scholes.py +++ b/examples/black-scholes.py @@ -1,7 +1,7 @@ -from charmtiles.array import connect, ndarray -import charmtiles.linalg as lg -from charmtiles.ccs import sync -from charmtiles.ast import set_max_depth +from charmnumeric.array import connect, ndarray +import charmnumeric.linalg as lg +from charmnumeric.ccs import sync +from charmnumeric.ast import set_max_depth import time import numpy as np import gc From f8460fc8c87c6678362604dcca1ba130ab8286ff Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sat, 18 Oct 2025 14:15:36 +0530 Subject: [PATCH 27/34] Revert "Reapply "nit"" This reverts commit 03e7db72e3e039a3679658cb0d051df756e6a711. --- charmnumeric/ast.py | 2 +- examples/black-scholes.py | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index f8d3262..486b5d2 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -32,7 +32,7 @@ def compile_wrapper(*args, **kwargs): class ASTNode(object): def __init__(self, name, opcode, operands, args=[]): - from charmnumeric.array import ndarray + from charmtiles.array import ndarray global multiLineFuse # contains opcode, operands # operands are ndarrays diff --git a/examples/black-scholes.py b/examples/black-scholes.py index e94ba03..098a020 100644 --- a/examples/black-scholes.py +++ b/examples/black-scholes.py @@ -1,7 +1,7 @@ -from charmnumeric.array import connect, ndarray -import charmnumeric.linalg as lg -from charmnumeric.ccs import sync -from charmnumeric.ast import set_max_depth +from charmtiles.array import connect, ndarray +import charmtiles.linalg as lg +from charmtiles.ccs import sync +from charmtiles.ast import set_max_depth import time import numpy as np import gc From 4c5cdb06b21bf3e55e392beb8d73874c91fc84bc Mon Sep 17 00:00:00 2001 From: anant Date: Sun, 19 Oct 2025 00:15:36 +0530 Subject: [PATCH 28/34] debug deletion + extra temprory saves --- charmnumeric/array.py | 6 +++--- charmnumeric/ast.py | 10 +++++++--- examples/graph.py | 24 +++++++++++++++--------- src/ast.hpp | 20 ++++++++++++++++++-- 4 files changed, 43 insertions(+), 17 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 19052a1..c8595e2 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -85,7 +85,7 @@ def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, if is_debug(): print("Maximum AST depth exceeded for %i, " "flushing buffer" % self.name) - self._flush_command_buffer() + self._flush_command_buffer(hasExceededMaxAstDepth=True) def __del__(self): global deletion_buffer, deletion_buffer_size @@ -260,7 +260,7 @@ def __matmul__(self, other): return create_ndarray(res_ndim, self.dtype, shape=shape, name=res, command_buffer=cmd_buffer, is_scalar=is_scalar) - def _flush_command_buffer(self): + def _flush_command_buffer(self, hasExceededMaxAstDepth=False): # send the command to server # finally set command buffer to array name global deletion_buffer, deletion_buffer_size @@ -269,7 +269,7 @@ def _flush_command_buffer(self): self.command_buffer.plot_graph() if self.valid: return - cmd = self.command_buffer.get_command(self.ndim, self.shape, is_scalar=self.is_scalar) + cmd = self.command_buffer.get_command(self.ndim, self.shape, is_scalar=self.is_scalar, hasExceededMaxAstDepth=hasExceededMaxAstDepth) if not debug: cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 486b5d2..93ee28c 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -32,7 +32,7 @@ def compile_wrapper(*args, **kwargs): class ASTNode(object): def __init__(self, name, opcode, operands, args=[]): - from charmtiles.array import ndarray + from charmnumeric.array import ndarray global multiLineFuse # contains opcode, operands # operands are ndarrays @@ -59,7 +59,7 @@ def __init__(self, name, opcode, operands, args=[]): # | 8 | 64 | 64 | # # NB: Latter encoding for double constants # ################################################################################################################################################################# - def get_command(self, ndim, shape, save=True, is_scalar=False): + def get_command(self, ndim, shape, save=True, is_scalar=False, hasExceededMaxAstDepth=False): from charmnumeric.array import ndarray # Ndims and Shape setup @@ -93,7 +93,11 @@ def get_command(self, ndim, shape, save=True, is_scalar=False): opcmd += to_bytes(_shape, 'L') opcmd += to_bytes(0, 'I') + to_bytes(False, '?') + to_bytes(op.name, 'L') else: - save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False + ### this will only be true when AST is being flushed because of exceeding max depth and ensures that unnecessary temporaries are not saved + if hasExceededMaxAstDepth: + save_op = True if c_long.from_address(id(op)).value - 4 > 0 else False + else: + save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False opcmd = op.command_buffer.get_command(op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) if save_op: op.validate() diff --git a/examples/graph.py b/examples/graph.py index 853c039..c88e9cf 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -4,18 +4,24 @@ import charmnumeric.linalg as lg import numpy as np -#enable_debug() -set_max_depth(10) - +# enable_debug() +set_max_depth(2) def f(): v = ndarray(1, 10, np.float64, init_value=2) b = ndarray(1, 10, np.float64, init_value=1) c = ndarray(1, 10, np.float64, init_value=3) - for _ in range(2): - k = v * 2 + b + 3 + c - 32 - l = k >= 42 - l.get() - print(l.get()) + a = ndarray(1, 10, np.float64, init_value=4) + # vx = [a, b] + for _ in range(1): + k = a + b + c + v + k.get() + # prog + # k + -> k temp object -> ref k + # + operation + # tree + # op (generate command) + # l.get() + # print(k.get()) # print(l.get()) # v1 = v @ b # v1 = (b + c) @ (b - c) @@ -69,6 +75,6 @@ def f(): if __name__ == '__main__': - connect("172.17.0.1", 10000) + connect("127.0.0.1", 10000) s = f() diff --git a/src/ast.hpp b/src/ast.hpp index b3ab573..53491ee 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -17,15 +17,31 @@ std::unordered_map symbol_table; inline static void insert(ct_name_t name, ct_array_t arr) { CkPrintf("Created array %" PRIu64 " on server\n", name); symbol_table[name] = std::move(arr); + { + ckout<<"Available Symbols"<second; } From 160feb7a4b0350d6c38c514fdcd179f3cf6514d7 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sun, 19 Oct 2025 01:10:25 +0530 Subject: [PATCH 29/34] deferred deletions --- charmnumeric/array.py | 25 +++++++++++++++++++------ examples/graph.py | 16 ++++++++++++---- src/server.cpp | 6 ++++++ 3 files changed, 37 insertions(+), 10 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index c8595e2..0a84318 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -11,6 +11,9 @@ deletion_buffer = b'' deletion_buffer_size = 0 +doDeferredDeletions = False +deferred_deletion_buffer = b'' +deferred_deletion_buffer_size = 0 def create_ndarray(ndim, dtype, shape=None, name=None, command_buffer=None, is_scalar=False): @@ -88,10 +91,17 @@ def __init__(self, ndim, shape=None, dtype=np.float64, init_value=None, self._flush_command_buffer(hasExceededMaxAstDepth=True) def __del__(self): - global deletion_buffer, deletion_buffer_size - if self.valid: - deletion_buffer += to_bytes(self.name, 'L') - deletion_buffer_size += 1 + global doDeferredDeletions + if doDeferredDeletions: + global deferred_deletion_buffer, deferred_deletion_buffer_size + if self.valid: + deferred_deletion_buffer += to_bytes(self.name, 'L') + deferred_deletion_buffer_size += 1 + else: + global deletion_buffer, deletion_buffer_size + if self.valid: + deletion_buffer += to_bytes(self.name, 'L') + deletion_buffer_size += 1 def __len__(self): return self.shape[0] @@ -263,7 +273,7 @@ def __matmul__(self, other): def _flush_command_buffer(self, hasExceededMaxAstDepth=False): # send the command to server # finally set command buffer to array name - global deletion_buffer, deletion_buffer_size + global deletion_buffer, deletion_buffer_size, deferred_deletion_buffer, deferred_deletion_buffer_size debug = is_debug() if debug: self.command_buffer.plot_graph() @@ -271,7 +281,7 @@ def _flush_command_buffer(self, hasExceededMaxAstDepth=False): return cmd = self.command_buffer.get_command(self.ndim, self.shape, is_scalar=self.is_scalar, hasExceededMaxAstDepth=hasExceededMaxAstDepth) if not debug: - cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + cmd + cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + to_bytes(deferred_deletion_buffer_size, 'I') + deferred_deletion_buffer + cmd cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.operation_handler, cmd) deletion_buffer = b'' @@ -296,8 +306,11 @@ def evaluate(self): self._flush_command_buffer() def validate(self): + global doDeferredDeletions self.valid = True + doDeferredDeletions = True self.command_buffer = ASTNode(self.name, 0, [weakref.proxy(self)]) + doDeferredDeletions = False def copy(self): res = get_name() diff --git a/examples/graph.py b/examples/graph.py index c88e9cf..c0c78a1 100644 --- a/examples/graph.py +++ b/examples/graph.py @@ -6,15 +6,23 @@ # enable_debug() set_max_depth(2) + def f(): - v = ndarray(1, 10, np.float64, init_value=2) + a = ndarray(1, 10, np.float64, init_value=4) b = ndarray(1, 10, np.float64, init_value=1) c = ndarray(1, 10, np.float64, init_value=3) - a = ndarray(1, 10, np.float64, init_value=4) + d = ndarray(1, 10, np.float64, init_value=2) # vx = [a, b] for _ in range(1): - k = a + b + c + v - k.get() + e = a + b + c + d + f = e + a + print(f.get()) + # e = a + b * d - c + 42 - 34 + # f = e + c / a + 32 - b + # g = f.scale(69) + 53 - a / 32 + # g = f + d + # k = a + b + c + v + # k.get() # prog # k + -> k temp object -> ref k # + operation diff --git a/src/server.cpp b/src/server.cpp index 89ade04..7cb2499 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -133,10 +133,16 @@ void Main::execute_operation(int epoch, int size, char *cmd) { uint32_t num_deletions = extract(cmd); for (int i = 0; i < num_deletions; i++) remove(extract(cmd)); + uint32_t num_deferred_deletions = extract(cmd); + std::vector deferred_deletions; deferred_deletions.reserve(num_deferred_deletions); + for (int i = 0; i < num_deferred_deletions; i++) + deferred_deletions.emplace_back(extract(cmd)); char* dimPos = cmd + sizeof(uint8_t); if (peek(cmd) == 1) process_scalar(cmd); else if (peek(dimPos) == 1) process_tensor(cmd); else if (peek(dimPos) == 2) process_tensor(cmd); + for(const auto& it : deferred_deletions) + remove(it); } void Main::execute_command(int epoch, uint8_t kind, int size, char *cmd) From 1d24e4c2bec43fb63b570e19a1f3136f09e3c3ae Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sun, 19 Oct 2025 01:25:45 +0530 Subject: [PATCH 30/34] fix magic deletions --- charmnumeric/array.py | 2 ++ charmnumeric/ccs.py | 10 ++++++---- src/server.cpp | 3 +++ 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/charmnumeric/array.py b/charmnumeric/array.py index 0a84318..b42edc8 100644 --- a/charmnumeric/array.py +++ b/charmnumeric/array.py @@ -286,6 +286,8 @@ def _flush_command_buffer(self, hasExceededMaxAstDepth=False): send_command_async(Handlers.operation_handler, cmd) deletion_buffer = b'' deletion_buffer_size = 0 + deferred_deletion_buffer = b'' + deferred_deletion_buffer_size = 0 self.validate() def get(self): diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 3eb8c5a..7afc24d 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -114,14 +114,16 @@ def connect(server_ip, server_port): atexit.register(disconnect) def disconnect(): - from charmnumeric.array import deletion_buffer, deletion_buffer_size + from charmnumeric.array import deletion_buffer, deletion_buffer_size, deferred_deletion_buffer_size, deferred_deletion_buffer global client_id - if deletion_buffer_size > 0: - cmd = to_bytes(len(deletion_buffer), 'I') + deletion_buffer + if (deletion_buffer_size > 0) or (deferred_deletion_buffer_size > 0): + cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + to_bytes(deferred_deletion_buffer_size, 'I') + deferred_deletion_buffer cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.delete_handler, cmd) deletion_buffer = b'' - deletion_buffer_size = b'' + deletion_buffer_size = 0 + deferred_deletion_buffer = b'' + deferred_deletion_buffer_size = 0 cmd = to_bytes(client_id, 'B') cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.disconnection_handler, cmd) diff --git a/src/server.cpp b/src/server.cpp index 7cb2499..88e6623 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -294,6 +294,9 @@ void Main::execute_delete(int epoch, int size, char *cmd) uint32_t num_deletions = extract(cmd); for (int i = 0; i < num_deletions; i++) remove(extract(cmd)); + uint32_t num_deferred_deletions = extract(cmd); + for (int i = 0; i < num_deferred_deletions; i++) + remove(extract(cmd)); } void Main::execute_disconnect(int epoch, int size, char *cmd) From d5bd4d1a678d5590c9512e3676e3b3440590081d Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Sun, 19 Oct 2025 01:46:26 +0530 Subject: [PATCH 31/34] nit --- examples/charm_fuse.py | 28 ++++++++++++++-------------- src/ast.hpp | 25 +++++++++---------------- 2 files changed, 23 insertions(+), 30 deletions(-) diff --git a/examples/charm_fuse.py b/examples/charm_fuse.py index f91493e..62c5cdc 100644 --- a/examples/charm_fuse.py +++ b/examples/charm_fuse.py @@ -4,12 +4,12 @@ import charmnumeric.linalg as lg import numpy as np import time -set_max_depth(10) +set_max_depth(float('inf')) @charm_fuse def f(): - v = ndarray(1, 1e5, np.float64, init_value=-20) - b = ndarray(1, 1e5, np.float64, init_value=10) + v = ndarray(1, 1e2, np.float64, init_value=-20) + b = ndarray(1, 1e2, np.float64, init_value=10) g1 = v.abs().scale(2).scale(2).add_constant(29) + b + 32 g2 = b.log(2).exp() @@ -17,8 +17,8 @@ def f(): return d.get() def g(): - v = ndarray(1, 1e5, np.float64, init_value=-20) - b = ndarray(1, 1e5, np.float64, init_value=10) + v = ndarray(1, 1e2, np.float64, init_value=-20) + b = ndarray(1, 1e2, np.float64, init_value=10) g1 = v.abs().scale(2).scale(2).add_constant(29) + b + 32 g2 = b.log(2).exp() @@ -27,18 +27,18 @@ def g(): if __name__ == '__main__': connect("127.0.0.1", 10000) - s = f() + # s = f() start = time.time() - for(i) in range(100): + for(i) in range(1000): s = f() end = time.time() print(s) print("Time taken(multi line fused): ", end - start) - s = g() - start = time.time() - for(i) in range(100): - s = g() - end = time.time() - print(s) - print("Time taken(multi line unfused): ", end - start) + # k = g() + # start = time.time() + # for(i) in range(100): + # k = g() + # end = time.time() + # print(k) + # print("Time taken(multi line unfused): ", end - start) diff --git a/src/ast.hpp b/src/ast.hpp index 53491ee..7bed1ee 100644 --- a/src/ast.hpp +++ b/src/ast.hpp @@ -17,17 +17,16 @@ std::unordered_map symbol_table; inline static void insert(ct_name_t name, ct_array_t arr) { CkPrintf("Created array %" PRIu64 " on server\n", name); symbol_table[name] = std::move(arr); - { - ckout<<"Available Symbols"<second; } From 25a9388488efa88718e3427ca99f1eebf569d089 Mon Sep 17 00:00:00 2001 From: Sh0g0-1758 Date: Mon, 20 Oct 2025 00:58:39 +0530 Subject: [PATCH 32/34] fix deletion of matmuls, copy and final cleanup --- charmnumeric/ast.py | 2 +- charmnumeric/ccs.py | 13 +++++++++++-- src/server.cpp | 2 +- 3 files changed, 13 insertions(+), 4 deletions(-) diff --git a/charmnumeric/ast.py b/charmnumeric/ast.py index 93ee28c..217cb61 100644 --- a/charmnumeric/ast.py +++ b/charmnumeric/ast.py @@ -99,7 +99,7 @@ def get_command(self, ndim, shape, save=True, is_scalar=False, hasExceededMaxAst else: save_op = True if c_long.from_address(id(op)).value - 2 > 0 else False opcmd = op.command_buffer.get_command(op.ndim, op.shape, save=save_op, is_scalar=op.is_scalar) - if save_op: + if save_op or (op.command_buffer.opcode == OPCODES.get('@')) or (op.command_buffer.opcode == OPCODES.get('copy')): op.validate() elif isinstance(op, float) or isinstance(op, int): opcmd = to_bytes(0, 'B') diff --git a/charmnumeric/ccs.py b/charmnumeric/ccs.py index 7afc24d..c565e6a 100644 --- a/charmnumeric/ccs.py +++ b/charmnumeric/ccs.py @@ -1,7 +1,7 @@ import struct import atexit from pyccs import Server -from charmnumeric import array +import gc debug = False server = None @@ -114,8 +114,16 @@ def connect(server_ip, server_port): atexit.register(disconnect) def disconnect(): + # cleanup the remaining ndarrays + from charmnumeric.array import ndarray + deleted_id = [] + for obj in gc.get_objects(): + if isinstance(obj, ndarray): + if not obj.name in deleted_id: + print(obj.name) + deleted_id.append(obj.name) + obj.__del__() from charmnumeric.array import deletion_buffer, deletion_buffer_size, deferred_deletion_buffer_size, deferred_deletion_buffer - global client_id if (deletion_buffer_size > 0) or (deferred_deletion_buffer_size > 0): cmd = to_bytes(deletion_buffer_size, 'I') + deletion_buffer + to_bytes(deferred_deletion_buffer_size, 'I') + deferred_deletion_buffer cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd @@ -124,6 +132,7 @@ def disconnect(): deletion_buffer_size = 0 deferred_deletion_buffer = b'' deferred_deletion_buffer_size = 0 + global client_id cmd = to_bytes(client_id, 'B') cmd = to_bytes(get_epoch(), 'i') + to_bytes(len(cmd), 'I') + cmd send_command_async(Handlers.disconnection_handler, cmd) diff --git a/src/server.cpp b/src/server.cpp index 88e6623..d6fe694 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -49,7 +49,7 @@ void connection_handler(char *msg) void disconnection_handler(char *msg) { - CkExit(); + ct::sync(); char *cmd = msg + CmiMsgHeaderSizeBytes; int epoch = extract(cmd); uint32_t size = extract(cmd); From 13e325f32563a157b7fd976433207c819ec9999b Mon Sep 17 00:00:00 2001 From: anant Date: Wed, 29 Oct 2025 16:32:38 +0530 Subject: [PATCH 33/34] sync + kokkos kernels for cpu --- config.cmake | 5 +++-- src/CMakeLists.txt | 7 +++++-- src/server.cpp | 1 + 3 files changed, 9 insertions(+), 4 deletions(-) diff --git a/config.cmake b/config.cmake index 4d72c5c..befd6bc 100644 --- a/config.cmake +++ b/config.cmake @@ -1,8 +1,9 @@ -set(CHARM_DIR "/home/shogo/master/Kale/charm/netlrts-linux-x86_64") -set(BASE_DIR "/home/shogo/master/Kale/LibCharmtyles") +set(CHARM_DIR "/home/anant/winter2024/lbp/study/charm/netlrts-linux-x86_64/") +set(BASE_DIR "/home/anant/sem7/LibCharmtyles") set(EIGEN_DIR "/usr/include/eigen3") set(CUDA_DIR "/path/to/CUDA/directory") set(KOKKOS_DIR "${BASE_DIR}/kokkos/install") +set(KOKKOS_KERNELS_DIR "${BASE_DIR}/kokkos-kernels/install") set(CHARMC "${CHARM_DIR}/bin/charmc") set(CPU_OPTS "-c++-option -std=c++20 -O3 -march=native -DNDEBUG") diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 7308a44..6172df9 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -6,6 +6,9 @@ include(${CMAKE_SOURCE_DIR}/../config.cmake) set(Kokkos_ROOT ${KOKKOS_DIR}) find_package(Kokkos 4.5 REQUIRED CONFIG) +set(KokkosKernels_DIR ${KOKKOS_KERNELS_DIR}/lib/cmake/KokkosKernels) +find_package(KokkosKernels REQUIRED) + if(Charm_ENABLE_GPU) message(STATUS "Building for a GPU backend") add_definitions(-DGPU_BACKEND) @@ -38,7 +41,7 @@ add_custom_command( if(Charm_ENABLE_GPU) add_library(server OBJECT server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) target_include_directories(server PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}/include ${CHARM_DIR}/include) - target_link_libraries(server Kokkos::kokkos) + target_link_libraries(server Kokkos::kokkos Kokkos::kokkoskernels) add_custom_command( OUTPUT "${CMAKE_BINARY_DIR}/server.out" @@ -53,5 +56,5 @@ if(Charm_ENABLE_GPU) else() add_executable(server.out server.cpp ${BASE_DIR}/charmtyles/backend/libcharmtyles.decl.h ${CMAKE_SOURCE_DIR}/server.decl.h) target_include_directories(server.out PRIVATE ${BASE_DIR} ${BASE_DIR}/charmtyles/backend ${EIGEN_DIR}) - target_link_libraries(server.out Kokkos::kokkos) + target_link_libraries(server.out Kokkos::kokkos Kokkos::kokkoskernels) endif() diff --git a/src/server.cpp b/src/server.cpp index d6fe694..6380d4e 100644 --- a/src/server.cpp +++ b/src/server.cpp @@ -310,6 +310,7 @@ void Main::execute_disconnect(int epoch, int size, char *cmd) void Main::execute_sync(int epoch, int size, char *cmd) { + ct::sync(); CkPrintf("Execution time = %f\n", CkTimer() - start_time); bool r = true; send_reply(epoch, 1, (char *)&r); From 884926a657c5ab48ebcbacc40252decd59dddc18 Mon Sep 17 00:00:00 2001 From: anant Date: Wed, 29 Oct 2025 20:13:26 +0530 Subject: [PATCH 34/34] update cg benchmark to remove numerically unstable init --- config.cmake | 2 +- examples/conjugate_gradient.py | 56 ++++++++++++++++++++++++++-------- 2 files changed, 44 insertions(+), 14 deletions(-) diff --git a/config.cmake b/config.cmake index befd6bc..53660ec 100644 --- a/config.cmake +++ b/config.cmake @@ -8,6 +8,6 @@ set(KOKKOS_KERNELS_DIR "${BASE_DIR}/kokkos-kernels/install") set(CHARMC "${CHARM_DIR}/bin/charmc") set(CPU_OPTS "-c++-option -std=c++20 -O3 -march=native -DNDEBUG") set(GPU_OPTS "-std=c++20 -O3 -march=native -DNDEBUG") -set(GPU_LINK_OPTS -O3 -language charm++ -L${KOKKOS_DIR}/lib64 -lkokkoscore -L${CUDA_DIR} -lcuda -lcudart) +set(GPU_LINK_OPTS -O3 -language charm++ -L${KOKKOS_DIR}/lib64 -L/u/ajain18/kokkos_kernels_install/lib64 -lkokkoscore -lkokkoscontainers -lkokkoskernels -L${CUDA_DIR} -lcuda -lcudart -lcusparse -lcublas) set(LD_OPTS "") set(INCS "-I${BASE_DIR}") diff --git a/examples/conjugate_gradient.py b/examples/conjugate_gradient.py index 90e73d6..1a69d5e 100644 --- a/examples/conjugate_gradient.py +++ b/examples/conjugate_gradient.py @@ -1,13 +1,45 @@ from charmnumeric.array import connect, ndarray -import charmnumeric.linalg as lg -from charmnumeric.ccs import enable_debug, sync from charmnumeric.ast import set_max_depth import numpy as np import time set_max_depth(10) -def solve(A, b, x): +def generate_2D(N, corners=True): + if corners: + print( + "Generating %dx%d 2-D adjacency system with corners..." + % (N**2, N**2) + ) + A = np.zeros((N**2, N**2)) + 8 * np.eye(N**2) + else: + print( + "Generating %dx%d 2-D adjacency system without corners..." + % (N**2, N**2) + ) + A = np.zeros((N**2, N**2)) + 4 * np.eye(N**2) + # These are the same for both cases + off_one = np.full(N**2 - 1, -1, dtype=np.float64) + A += np.diag(off_one, k=1) + A += np.diag(off_one, k=-1) + off_N = np.full(N * (N - 1), -1, dtype=np.float64) + A += np.diag(off_N, k=N) + A += np.diag(off_N, k=-N) + # If we have corners then we have four more cases + if corners: + off_N_plus = np.full(N * (N - 1) - 1, -1, dtype=np.float64) + A += np.diag(off_N_plus, k=N + 1) + A += np.diag(off_N_plus, k=-(N + 1)) + off_N_minus = np.full(N * (N - 1) + 1, -1, dtype=np.float64) + A += np.diag(off_N_minus, k=N - 1) + A += np.diag(off_N_minus, k=-(N - 1)) + # Then we can generate a random b matrix + b = np.random.rand(N**2) + return A, b + + +def solve(A, b, x_cp): + x = x_cp.copy() r = b - A @ x p = r.copy() rsold = r @ r @@ -26,13 +58,14 @@ def solve(A, b, x): return x if __name__ == '__main__': - connect("172.17.0.1", 10000) + connect("127.0.0.1", 10000) - n = int(1e4) + n = 50 - A = ndarray(2, (n, n), np.float64, init_value = 1e-6) - b = ndarray(1, n, np.float64, init_value = 1e-6) - x = ndarray(1, n, np.float64, init_value = 1e-6) + A_np, b_np = generate_2D(n) + A = ndarray(2, (n**2, n**2), np.float64, nparr = A_np) + b = ndarray(1, n**2, np.float64, nparr = b_np) + x = ndarray(1, A.shape[1], np.float64, init_value = 0) # Pre-Compilation _ = solve(A, b, x) @@ -44,13 +77,10 @@ def solve(A, b, x): x_charm = x.get() print("Execution time (Charm) = %.6f s" % (time.time() - start)) - # Initialize all arrays to 1 - A = np.ones((n, n), dtype=np.float64) * 1e-6 - b = np.ones(n, dtype=np.float64) * 1e-6 - x = np.ones(n, dtype=np.float64) * 1e-6 + x = np.zeros(A_np.shape[1], dtype=np.float64) start = time.time() - x_np = solve(A, b, x) + x_np = solve(A_np, b_np, x) print("Execution time (NumPy) = %.6f s" % (time.time() - start)) if np.allclose(x_np, x_charm, atol=1e-5):