diff --git a/.github/workflows/charm4py.yml b/.github/workflows/charm4py.yml index a6a08acf..445f6194 100644 --- a/.github/workflows/charm4py.yml +++ b/.github/workflows/charm4py.yml @@ -31,7 +31,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install dependencies run: | - pip install setuptools cython cffi greenlet numpy torch torchvision filelock matplotlib + pip install setuptools cython cffi greenlet numpy numba torch torchvision filelock matplotlib if [ ${{ matrix.os }} == 'macos-13' ]; then # pypi only distributes torch packages w/ numpy v1 for macos-x86_64 pip install 'numpy<2' @@ -50,3 +50,28 @@ jobs: # needed for param server export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES python auto_test.py + + build: + name: Lint + runs-on: ubuntu-latest + + permissions: + contents: read + packages: read + # To report GitHub Actions status checks + statuses: write + + steps: + - name: Checkout code + uses: actions/checkout@v4 + with: + # super-linter needs the full git history to get the + # list of files that changed across commits + fetch-depth: 0 + + - name: Super-linter + uses: super-linter/super-linter/slim@v7.4.0 # x-release-please-version + env: + # To report GitHub Actions status checks + GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + VALIDATE_PYTHON_BLACK: true diff --git a/auto_test.py b/auto_test.py index a4860494..04e96d7b 100644 --- a/auto_test.py +++ b/auto_test.py @@ -1,6 +1,7 @@ import time import subprocess import sys + if sys.version_info[0] < 3: print("auto_test requires Python 3") exit(1) @@ -10,21 +11,23 @@ import json -if len(sys.argv) == 2 and sys.argv[1] == '-version_check': +if len(sys.argv) == 2 and sys.argv[1] == "-version_check": exit(sys.version_info[0]) def searchForPython(python_implementations): py3_exec = None - py3_exec = shutil.which('python3') + py3_exec = shutil.which("python3") if py3_exec is None: - exec_str = shutil.which('python') + exec_str = shutil.which("python") if exec_str is not None: - version = subprocess.call([exec_str, 'auto_test.py', '-version_check']) + version = subprocess.call([exec_str, "auto_test.py", "-version_check"]) if version >= 3: py3_exec = exec_str if py3_exec is None: - print("WARNING: Python 3 executable not found for auto_test. If desired, set manually") + print( + "WARNING: Python 3 executable not found for auto_test. If desired, set manually" + ) else: python_implementations.add((3, py3_exec)) @@ -33,76 +36,88 @@ def searchForPython(python_implementations): TIMEOUT = 120 # timeout for each test (in seconds) CHARM_QUIET_AFTER_NUM_TESTS = 5 -commonArgs = ['++local'] -default_num_processes = int(os.environ.get('CHARM4PY_TEST_NUM_PROCESSES', 4)) +commonArgs = ["++local"] +default_num_processes = int(os.environ.get("CHARM4PY_TEST_NUM_PROCESSES", 4)) try: - import numba + numbaInstalled = True except: numbaInstalled = False # search for python executables -python_implementations = set() # python implementations can also be added here manually +python_implementations = set() # python implementations can also be added here manually searchForPython(python_implementations) -interfaces = ['cython'] +interfaces = ["cython"] -with open('test_config.json', 'r') as infile: +with open("test_config.json", "r") as infile: tests = json.load(infile) num_tests = 0 durations = defaultdict(dict) for test in tests: - if 'condition' in test: - if test['condition'] == 'numbaInstalled' and not numbaInstalled: + if "condition" in test: + if test["condition"] == "numbaInstalled" and not numbaInstalled: continue - if test['condition'] == 'not numbaInstalled' and numbaInstalled: + if test["condition"] == "not numbaInstalled" and numbaInstalled: continue - if 'timeout_override' in test: - TIMEOUT = test['timeout_override'] + if "timeout_override" in test: + TIMEOUT = test["timeout_override"] else: TIMEOUT = 120 - num_processes = max(test.get('force_min_processes', default_num_processes), default_num_processes) + num_processes = max( + test.get("force_min_processes", default_num_processes), default_num_processes + ) for interface in interfaces: - durations[interface][test['path']] = [] + durations[interface][test["path"]] = [] for version, python in sorted(python_implementations): - if version < test.get('requires_py_version', -1): + if version < test.get("requires_py_version", -1): continue additionalArgs = [] - if num_tests >= CHARM_QUIET_AFTER_NUM_TESTS and '++quiet' not in commonArgs: - additionalArgs.append('++quiet') - cmd = ['charmrun/charmrun'] - if test.get('prefix'): - cmd += [test['prefix']] - if not test.get('interactive', False): - cmd += [python] + [test['path']] + if num_tests >= CHARM_QUIET_AFTER_NUM_TESTS and "++quiet" not in commonArgs: + additionalArgs.append("++quiet") + cmd = ["charmrun/charmrun"] + if test.get("prefix"): + cmd += [test["prefix"]] + if not test.get("interactive", False): + cmd += [python] + [test["path"]] else: - cmd += [python] + ['-m', 'charm4py.interactive'] - if 'args' in test: - cmd += test['args'].split(' ') + cmd += [python] + ["-m", "charm4py.interactive"] + if "args" in test: + cmd += test["args"].split(" ") cmd += commonArgs - cmd += ['+p' + str(num_processes), '+libcharm_interface', interface] + cmd += ["+p" + str(num_processes), "+libcharm_interface", interface] cmd += additionalArgs - print('Test command is ' + ' '.join(cmd)) + print("Test command is " + " ".join(cmd)) startTime = time.time() stdin = None - if test.get('interactive', False): - stdin = open(test['path']) + if test.get("interactive", False): + stdin = open(test["path"]) p = subprocess.Popen(cmd, stdin=stdin) try: rc = p.wait(TIMEOUT) except subprocess.TimeoutExpired: - print("Timeout (" + str(TIMEOUT) + " secs) expired when running " + test['path'] + ", Killing process") + print( + "Timeout (" + + str(TIMEOUT) + + " secs) expired when running " + + test["path"] + + ", Killing process" + ) p.kill() rc = -1 if rc != 0: - print("ERROR running test " + test['path'] + " with " + python) + print("ERROR running test " + test["path"] + " with " + python) exit(1) else: elapsed = round(time.time() - startTime, 3) - durations[interface][test['path']].append(elapsed) - print("\n\n--------------------- TEST PASSED (in " + str(elapsed) + " secs) ---------------------\n\n") + durations[interface][test["path"]].append(elapsed) + print( + "\n\n--------------------- TEST PASSED (in " + + str(elapsed) + + " secs) ---------------------\n\n" + ) num_tests += 1 diff --git a/charm4py/__init__.py b/charm4py/__init__.py index 441a3011..47b5f066 100644 --- a/charm4py/__init__.py +++ b/charm4py/__init__.py @@ -1,28 +1,35 @@ import sys + if sys.version_info < (3, 8, 0): - raise RuntimeError('Charm4py requires Python 3.8 or higher') + raise RuntimeError("Charm4py requires Python 3.8 or higher") import atexit import os + try: import greenlet except ImportError: - print('Charm4py requires the greenlet package. It can be installed via pip') + print("Charm4py requires the greenlet package. It can be installed via pip") exit(-1) -charm4py_version = 'unknown' +charm4py_version = "unknown" try: from ._version import version as charm4py_version except: try: import subprocess - charm4py_version = subprocess.check_output(['git', 'describe'], - cwd=os.path.dirname(__file__)).rstrip().decode() + + charm4py_version = ( + subprocess.check_output(["git", "describe"], cwd=os.path.dirname(__file__)) + .rstrip() + .decode() + ) except: pass -if os.environ.get('CHARM_NOLOAD', '0') == '0': +if os.environ.get("CHARM_NOLOAD", "0") == "0": from .charm import register, charm, readonlies, Options + Reducer = charm.reducers Future = charm.createFuture @@ -34,7 +41,9 @@ def checkCharmStarted(): if not charm.started: - print('Program is exiting but charm was not started: charm.start() was not ' - 'called or error happened before start') + print( + "Program is exiting but charm was not started: charm.start() was not " + "called or error happened before start" + ) atexit.register(checkCharmStarted) diff --git a/charm4py/channel.py b/charm4py/channel.py index aaf058ec..1809518f 100644 --- a/charm4py/channel.py +++ b/charm4py/channel.py @@ -4,7 +4,7 @@ class Channel(object): def __new__(cls, chare, remote, local=None): - if not hasattr(chare, '__channels__'): + if not hasattr(chare, "__channels__"): chare.__initchannelattrs__() ch = chare.__findPendingChannel__(remote, False) if ch is None: @@ -16,7 +16,7 @@ def __new__(cls, chare, remote, local=None): ch.setEstablished() if local is None: # if local is None, we assume local endpoint is the individual chare - if hasattr(chare, 'thisIndex'): + if hasattr(chare, "thisIndex"): local = chare.thisProxy[chare.thisIndex] else: local = chare.thisProxy @@ -26,6 +26,7 @@ def __new__(cls, chare, remote, local=None): CHAN_BUF_SIZE = 40000 + class _Channel(object): def __init__(self, port, remote, locally_initiated): @@ -36,7 +37,9 @@ def __init__(self, port, remote, locally_initiated): self.recv_seqno = 0 self.data = {} self.recv_fut = None # this future is used to block on self.recv() - self.wait_ready = None # this future is used to block on ready (by charm.iwait()) + self.wait_ready = ( + None # this future is used to block on ready (by charm.iwait()) + ) self.established = False self.established_fut = None self.locally_initiated = locally_initiated diff --git a/charm4py/chare.py b/charm4py/chare.py index bc64f4ea..b75187ec 100644 --- a/charm4py/chare.py +++ b/charm4py/chare.py @@ -1,5 +1,4 @@ from . import wait -from charm4py import ray import sys from greenlet import getcurrent from collections import defaultdict @@ -12,9 +11,7 @@ CHARM_TYPES = (MAINCHARE, GROUP, ARRAY) # Constants to detect type of contributors for reduction. Order should match enum extContributorType -(CONTRIBUTOR_TYPE_ARRAY, - CONTRIBUTOR_TYPE_GROUP, - CONTRIBUTOR_TYPE_NODEGROUP) = range(3) +(CONTRIBUTOR_TYPE_ARRAY, CONTRIBUTOR_TYPE_GROUP, CONTRIBUTOR_TYPE_NODEGROUP) = range(3) class Chare(object): @@ -27,13 +24,13 @@ def __new__(cls, chare_type=None, args=[], onPE=-1): arr.ckInsert(0, args, onPE, single=True) arr.ckDoneInserting() proxy = arr[0] - if hasattr(arr, 'creation_future'): + if hasattr(arr, "creation_future"): proxy.creation_future = arr.creation_future return proxy return object.__new__(cls) def __init__(self): - if hasattr(self, '_local'): + if hasattr(self, "_local"): return # messages to this chare from chares in the same PE are stored here without copying # or pickling. _local is a fixed size array that implements a mem pool, where msgs @@ -52,7 +49,7 @@ def __init__(self): def __addLocal__(self, msg): if self._local_free_head is None: - raise Charm4PyError('Local msg buffer full. Increase LOCAL_MSG_BUF_SIZE') + raise Charm4PyError("Local msg buffer full. Increase LOCAL_MSG_BUF_SIZE") h = self._local_free_head self._local_free_head = self._local[self._local_free_head] self._local[h] = msg @@ -118,7 +115,7 @@ def contribute(self, data, reducer, callback, section=None): charm.contribute(data, reducer, callback, self, section) def reduce(self, callback, data=None, reducer=None, section=None): - assert callable(callback), 'First argument to reduce must be a callback' + assert callable(callback), "First argument to reduce must be a callback" charm.contribute(data, reducer, callback, self, section) def allreduce(self, data=None, reducer=None, section=None): @@ -148,7 +145,9 @@ def allreduce(self, data=None, reducer=None, section=None): def AtSync(self): # NOTE this will fail if called from a chare that is not in an array (as it should be) - charm.CkArraySend(self.thisProxy.aid, self.thisIndex, self.thisProxy.AtSync.ep, (b'', [])) + charm.CkArraySend( + self.thisProxy.aid, self.thisIndex, self.thisProxy.AtSync.ep, (b"", []) + ) def migrate(self, toPe): charm.lib.CkMigrate(self.thisProxy.aid, self.thisIndex, toPe) @@ -167,7 +166,7 @@ def _coll_future_deposit_result(self, fid, result=None): def __getRedNo__(self): proxy = self.thisProxy - if hasattr(proxy, 'aid'): + if hasattr(proxy, "aid"): return charm.lib.getArrayElementRedNo(proxy.aid, self.thisIndex) else: return charm.lib.getGroupRedNo(proxy.gid) @@ -176,7 +175,9 @@ def __addThreadEventSubscriber__(self, target, args): self._thread_notify_target = target self._thread_notify_data = args - def _getSectionLocations_(self, sid0, numsections, member_func, slicing, section_elems, f, proxy): + def _getSectionLocations_( + self, sid0, numsections, member_func, slicing, section_elems, f, proxy + ): # list of sections in which this element participates (sections # numbered from 0 to numsections - 1) sections = [] @@ -203,9 +204,12 @@ def _getSectionLocations_(self, sid0, numsections, member_func, slicing, section for sec_num, elems in enumerate(section_elems): if self.thisIndex in elems: sections.append(sec_num) - assert len(sections) <= numsections, 'Element ' + str(self.thisIndex) + \ - ' participates in more sections than were specified' - if len(sections) > 0 and not hasattr(self, '_scookies'): + assert len(sections) <= numsections, ( + "Element " + + str(self.thisIndex) + + " participates in more sections than were specified" + ) + if len(sections) > 0 and not hasattr(self, "_scookies"): # chares that participate in sections need this dict to store their # reduction numbers for each section self._scookies = defaultdict(int) @@ -226,7 +230,9 @@ def _getSectionLocations_(self, sid0, numsections, member_func, slicing, section def __initchannelattrs__(self): self.__channels__ = [] # port -> channel._Channel object - self.__pendingChannels__ = [] # channels that have not finished establishing connections + self.__pendingChannels__ = ( + [] + ) # channels that have not finished establishing connections def __findPendingChannel__(self, remote, started_locally): for i, ch in enumerate(self.__pendingChannels__): @@ -236,7 +242,7 @@ def __findPendingChannel__(self, remote, started_locally): return None def _channelConnect__(self, remote_proxy, remote_port): # entry method - if not hasattr(self, '__channels__'): + if not hasattr(self, "__channels__"): self.__initchannelattrs__() ch = self.__findPendingChannel__(remote_proxy, True) if ch is not None: @@ -248,6 +254,7 @@ def _channelConnect__(self, remote_proxy, remote_port): # entry method ch.setEstablished() else: from .channel import _Channel + local_port = len(self.__channels__) ch = _Channel(local_port, remote_proxy, False) self.__channels__.append(ch) @@ -267,58 +274,88 @@ def _channelRecv__(self, port, seqno, *msg): # entry method elif ch.recv_fut is not None and seqno == ch.recv_seqno: ch.recv_fut.send(msg) else: - assert seqno not in ch.data, 'Channel buffer is full' + assert seqno not in ch.data, "Channel buffer is full" ch.data[seqno] = msg method_restrictions = { # reserved methods are those that can't be redefined in user subclass - 'reserved': {'__addLocal__', '__removeLocal__', '__flush_wait_queues__', - '__waitEnqueue__', 'wait', 'contribute', 'reduce', 'allreduce', - 'AtSync', 'migrate', 'setMigratable', - '_coll_future_deposit_result', '__getRedNo__', - '__addThreadEventSubscriber__', '_getSectionLocations_', - '__initchannelattrs__', '__findPendingChannel__', - '_channelConnect__', '_channelRecv__'}, - + "reserved": { + "__addLocal__", + "__removeLocal__", + "__flush_wait_queues__", + "__waitEnqueue__", + "wait", + "contribute", + "reduce", + "allreduce", + "AtSync", + "migrate", + "setMigratable", + "_coll_future_deposit_result", + "__getRedNo__", + "__addThreadEventSubscriber__", + "_getSectionLocations_", + "__initchannelattrs__", + "__findPendingChannel__", + "_channelConnect__", + "_channelRecv__", + }, # these methods of Chare cannot be entry methods. NOTE that any methods starting # and ending with '__' are automatically excluded from being entry methods - 'non_entry_method': {'wait', 'contribute', 'reduce', 'allreduce', - 'AtSync', 'migrated'} + "non_entry_method": { + "wait", + "contribute", + "reduce", + "allreduce", + "AtSync", + "migrated", + }, } def getEntryMethodInfo(cls, method_name): func = getattr(cls, method_name) argcount = func.__code__.co_argcount - 1 # - 1 to disregard "self" argument - argnames = tuple(func.__code__.co_varnames[1:argcount + 1]) - assert 'ret' not in argnames, '"ret" keyword for entry method parameters is reserved' + argnames = tuple(func.__code__.co_varnames[1 : argcount + 1]) + assert ( + "ret" not in argnames + ), '"ret" keyword for entry method parameters is reserved' defaults = func.__defaults__ if defaults is None: defaults = () return argcount, argnames, defaults + # ----------------- Mainchare and Proxy ----------------- + def mainchare_proxy_ctor(proxy, cid): proxy.cid = cid + def mainchare_proxy__getstate__(proxy): return proxy.cid + def mainchare_proxy__setstate__(proxy, state): proxy.cid = state + def mainchare_proxy__eq__(proxy, other): if isinstance(other, proxy.__class__): return proxy.cid == other.cid else: return False + def mainchare_proxy__hash__(proxy): return hash(proxy.cid) -def mainchare_proxy_method_gen(ep, argcount, argnames, defaults): # decorator, generates proxy entry methods + +def mainchare_proxy_method_gen( + ep, argcount, argnames, defaults +): # decorator, generates proxy entry methods def proxy_entry_method(proxy, *args, **kwargs): num_args = len(args) if num_args < argcount and len(kwargs) > 0: @@ -331,23 +368,31 @@ def proxy_entry_method(proxy, *args, **kwargs): else: # if not there, see if there is a default value def_idx = i - argcount + len(defaults) - assert def_idx >= 0, 'Value not found for parameter \'' + argname + '\' of entry method' + assert def_idx >= 0, ( + "Value not found for parameter '" + + argname + + "' of entry method" + ) args.append(defaults[def_idx]) header = {} blockFuture = None cid = proxy.cid # chare ID - if ('ret' in kwargs and kwargs['ret']) or ('awaitable' in kwargs and kwargs['awaitable']): - header[b'block'] = blockFuture = charm.Future() + if ("ret" in kwargs and kwargs["ret"]) or ( + "awaitable" in kwargs and kwargs["awaitable"] + ): + header[b"block"] = blockFuture = charm.Future() destObj = None if Options.local_msg_optim and (cid in charm.chares) and (len(args) > 0): destObj = charm.chares[cid] msg = charm.packMsg(destObj, args, header) charm.CkChareSend(cid, ep, msg) return blockFuture + proxy_entry_method.ep = ep return proxy_entry_method + def mainchare_proxy_contribute(proxy, contributeInfo): charm.CkContributeToChare(contributeInfo, proxy.cid) @@ -362,32 +407,36 @@ def initMember(cls, obj, cid): @classmethod def __baseEntryMethods__(cls): - return ['__init__'] + return ["__init__"] @classmethod def __getProxyClass__(C, cls): # print("Creating mainchare proxy class for class " + cls.__name__) - proxyClassName = cls.__name__ + 'Proxy' + proxyClassName = cls.__name__ + "Proxy" M = dict() # proxy methods for m in charm.classEntryMethods[MAINCHARE][cls]: if m.epIdx == -1: - raise Charm4PyError('Unregistered entry method') - if m.name == '__init__': + raise Charm4PyError("Unregistered entry method") + if m.name == "__init__": continue argcount, argnames, defaults = getEntryMethodInfo(m.C, m.name) if Options.profiling: - f = profile_send_function(mainchare_proxy_method_gen(m.epIdx, argcount, argnames, defaults)) + f = profile_send_function( + mainchare_proxy_method_gen(m.epIdx, argcount, argnames, defaults) + ) else: f = mainchare_proxy_method_gen(m.epIdx, argcount, argnames, defaults) - f.__qualname__ = proxyClassName + '.' + m.name + f.__qualname__ = proxyClassName + "." + m.name f.__name__ = m.name M[m.name] = f - M['__init__'] = mainchare_proxy_ctor - M['ckContribute'] = mainchare_proxy_contribute # function called when target proxy is Mainchare - M['__getstate__'] = mainchare_proxy__getstate__ - M['__setstate__'] = mainchare_proxy__setstate__ - M['__eq__'] = mainchare_proxy__eq__ - M['__hash__'] = mainchare_proxy__hash__ + M["__init__"] = mainchare_proxy_ctor + M["ckContribute"] = ( + mainchare_proxy_contribute # function called when target proxy is Mainchare + ) + M["__getstate__"] = mainchare_proxy__getstate__ + M["__setstate__"] = mainchare_proxy__setstate__ + M["__eq__"] = mainchare_proxy__eq__ + M["__hash__"] = mainchare_proxy__hash__ return type(proxyClassName, (), M) # create and return proxy class @@ -398,19 +447,23 @@ def __init__(self, args): # ------------------ Group and Proxy ------------------ + def group_proxy_ctor(proxy, gid): proxy.gid = gid proxy.elemIdx = -1 # entry method calls will be to elemIdx PE (broadcast if -1) + def group_proxy__getstate__(proxy): return (proxy.gid, proxy.elemIdx) + def group_proxy__setstate__(proxy, state): proxy.gid, proxy.elemIdx = state + def group_proxy__eq__(proxy, other): if proxy.issec: - if hasattr(other, 'issec'): + if hasattr(other, "issec"): return proxy.section == other.section else: return False @@ -419,12 +472,14 @@ def group_proxy__eq__(proxy, other): else: return False + def group_proxy__hash__(proxy): if proxy.issec: return hash(proxy.section) else: return hash((proxy.gid, proxy.elemIdx)) + def group_getsecproxy(proxy, sinfo): if proxy.issec: secproxy = proxy.__class__(proxy.gid) @@ -433,12 +488,15 @@ def group_getsecproxy(proxy, sinfo): secproxy.section = sinfo return secproxy + def groupsecproxy__getstate__(proxy): return (proxy.gid, proxy.elemIdx, proxy.section) + def groupsecproxy__setstate__(proxy, state): proxy.gid, proxy.elemIdx, proxy.section = state + def group_proxy_elem(proxy, pe): # group proxy [] overload method if not isinstance(pe, slice): proxy_clone = proxy.__class__(proxy.gid) @@ -454,7 +512,10 @@ def group_proxy_elem(proxy, pe): # group proxy [] overload method step = 1 return charm.split(proxy, 1, elems=[list(range(start, stop, step))])[0] -def group_proxy_method_gen(ep, argcount, argnames, defaults): # decorator, generates proxy entry methods + +def group_proxy_method_gen( + ep, argcount, argnames, defaults +): # decorator, generates proxy entry methods def proxy_entry_method(proxy, *args, **kwargs): num_args = len(args) if num_args < argcount and len(kwargs) > 0: @@ -467,20 +528,24 @@ def proxy_entry_method(proxy, *args, **kwargs): else: # if not there, see if there is a default value def_idx = i - argcount + len(defaults) - assert def_idx >= 0, 'Value not found for parameter \'' + argname + '\' of entry method' + assert def_idx >= 0, ( + "Value not found for parameter '" + + argname + + "' of entry method" + ) args.append(defaults[def_idx]) header = {} blockFuture = None elemIdx = proxy.elemIdx - if 'ret' in kwargs and kwargs['ret']: - header[b'block'] = blockFuture = charm.Future() + if "ret" in kwargs and kwargs["ret"]: + header[b"block"] = blockFuture = charm.Future() if elemIdx == -1: - header[b'bcast'] = header[b'bcastret'] = True - elif 'awaitable' in kwargs and kwargs['awaitable']: - header[b'block'] = blockFuture = charm.Future() + header[b"bcast"] = header[b"bcastret"] = True + elif "awaitable" in kwargs and kwargs["awaitable"]: + header[b"block"] = blockFuture = charm.Future() if elemIdx == -1: - header[b'bcast'] = True + header[b"bcast"] = True if not proxy.issec or elemIdx != -1: destObj = None gid = proxy.gid @@ -490,15 +555,19 @@ def proxy_entry_method(proxy, *args, **kwargs): charm.CkGroupSend(gid, elemIdx, ep, msg) else: root, sid = proxy.section - header[b'sid'] = sid + header[b"sid"] = sid if Options.local_msg_optim and root == charm._myPe: - charm.sectionMgr.thisProxy[root].sendToSectionLocal(sid, ep, header, *args) + charm.sectionMgr.thisProxy[root].sendToSectionLocal( + sid, ep, header, *args + ) else: charm.sectionMgr.thisProxy[root].sendToSection(sid, ep, header, *args) return blockFuture + proxy_entry_method.ep = ep return proxy_entry_method + def update_globals_proxy_method_gen(ep): def proxy_entry_method(proxy, *args, **kwargs): new_args = [] @@ -507,22 +576,22 @@ def proxy_entry_method(proxy, *args, **kwargs): new_args.append(var) if len(args) >= 2: new_args.append(args[1]) - elif 'module_name' in kwargs: - new_args.append(kwargs['module_name']) + elif "module_name" in kwargs: + new_args.append(kwargs["module_name"]) else: - new_args.append('__main__') # default value for 'module_name' parameter + new_args.append("__main__") # default value for 'module_name' parameter args = new_args header = {} blockFuture = None elemIdx = proxy.elemIdx - if 'ret' in kwargs and kwargs['ret']: - header[b'block'] = blockFuture = charm.Future() + if "ret" in kwargs and kwargs["ret"]: + header[b"block"] = blockFuture = charm.Future() if elemIdx == -1: - header[b'bcast'] = header[b'bcastret'] = True - elif 'awaitable' in kwargs and kwargs['awaitable']: - header[b'block'] = blockFuture = charm.Future() + header[b"bcast"] = header[b"bcastret"] = True + elif "awaitable" in kwargs and kwargs["awaitable"]: + header[b"block"] = blockFuture = charm.Future() if elemIdx == -1: - header[b'bcast'] = True + header[b"bcast"] = True if not proxy.issec or elemIdx != -1: destObj = None gid = proxy.gid @@ -532,88 +601,101 @@ def proxy_entry_method(proxy, *args, **kwargs): charm.CkGroupSend(gid, elemIdx, ep, msg) else: root, sid = proxy.section - header[b'sid'] = sid + header[b"sid"] = sid if Options.local_msg_optim and root == charm._myPe: - charm.sectionMgr.thisProxy[root].sendToSectionLocal(sid, ep, header, *args) + charm.sectionMgr.thisProxy[root].sendToSectionLocal( + sid, ep, header, *args + ) else: charm.sectionMgr.thisProxy[root].sendToSection(sid, ep, header, *args) return blockFuture + proxy_entry_method.ep = ep return proxy_entry_method + def group_ckNew_gen(C, epIdx): - @classmethod # make ckNew a class (not instance) method of proxy + @classmethod # make ckNew a class (not instance) method of proxy def group_ckNew(cls, args, onPEs): # print("GROUP calling ckNew for class " + C.__name__ + " cIdx=", C.idx[GROUP], "epIdx=", epIdx) header = {} creation_future = None if not charm.threadMgr.isMainThread() and ArrayMap not in C.mro(): creation_future = charm.Future() - header[b'block'] = creation_future - header[b'bcast'] = True - header[b'creation'] = True + header[b"block"] = creation_future + header[b"bcast"] = True + header[b"creation"] = True if onPEs is None: msg = charm.packMsg(None, args, header) gid = charm.lib.CkCreateGroup(C.idx[GROUP], epIdx, msg) proxy = cls(gid) else: # send empty msg for Charm++ group creation (on every PE) - msg = charm.packMsg(None, [], {b'constrained': True}) + msg = charm.packMsg(None, [], {b"constrained": True}) gid = charm.lib.CkCreateGroup(C.idx[GROUP], epIdx, msg) proxy = cls(gid) # real msg goes only to section elements - proxy = charm.split(proxy, 1, elems=[onPEs], cons=[-1, epIdx, header, args])[0] + proxy = charm.split( + proxy, 1, elems=[onPEs], cons=[-1, epIdx, header, args] + )[0] if creation_future is not None: proxy.creation_future = creation_future return proxy + return group_ckNew + def group_proxy_contribute(proxy, contributeInfo): charm.CkContributeToGroup(contributeInfo, proxy.gid, proxy.elemIdx) + def groupsecproxy_contribute(proxy, contributeInfo): charm.CkContributeToSection(contributeInfo, proxy.section[1], proxy.section[0]) + def group_proxy_localbranch(proxy): return charm.groups[proxy.gid] + class Group(object): type_id = GROUP def __new__(cls, C, args=[], onPEs=None): - if (not hasattr(C, 'mro')) or (Chare not in C.mro()): - raise Charm4PyError('Only subclasses of Chare can be member of Group') + if (not hasattr(C, "mro")) or (Chare not in C.mro()): + raise Charm4PyError("Only subclasses of Chare can be member of Group") if C not in charm.proxyClasses[GROUP]: - raise Charm4PyError(str(C) + ' not registered for use in Groups') + raise Charm4PyError(str(C) + " not registered for use in Groups") return charm.proxyClasses[GROUP][C].ckNew(args, onPEs) @classmethod def initMember(cls, obj, gid): obj.thisIndex = charm.myPe() obj.thisProxy = charm.proxyClasses[GROUP][obj.__class__](gid) - obj._contributeInfo = charm.lib.initContributeInfo(gid, obj.thisIndex, CONTRIBUTOR_TYPE_GROUP) + obj._contributeInfo = charm.lib.initContributeInfo( + gid, obj.thisIndex, CONTRIBUTOR_TYPE_GROUP + ) obj._scookies = defaultdict(int) @classmethod def __baseEntryMethods__(cls): - return ['__init__'] + return ["__init__"] @classmethod def __getProxyClass__(C, cls, sectionProxy=False): # print("Creating group proxy class for class " + cls.__name__) if not sectionProxy: - proxyClassName = cls.__name__ + 'GroupProxy' + proxyClassName = cls.__name__ + "GroupProxy" else: - proxyClassName = cls.__name__ + 'GroupSecProxy' + proxyClassName = cls.__name__ + "GroupSecProxy" M = dict() # proxy methods entryMethods = charm.classEntryMethods[GROUP][cls] for m in entryMethods: if m.epIdx == -1: - raise Charm4PyError('Unregistered entry method') - if m.name == '__init__': + raise Charm4PyError("Unregistered entry method") + if m.name == "__init__": continue - if m.name == 'updateGlobals' and cls == CharmRemote: + if m.name == "updateGlobals" and cls == CharmRemote: if Options.profiling: f = profile_send_function(update_globals_proxy_method_gen(m.epIdx)) else: @@ -621,30 +703,36 @@ def __getProxyClass__(C, cls, sectionProxy=False): else: argcount, argnames, defaults = getEntryMethodInfo(m.C, m.name) if Options.profiling: - f = profile_send_function(group_proxy_method_gen(m.epIdx, argcount, argnames, defaults)) + f = profile_send_function( + group_proxy_method_gen(m.epIdx, argcount, argnames, defaults) + ) else: f = group_proxy_method_gen(m.epIdx, argcount, argnames, defaults) - f.__qualname__ = proxyClassName + '.' + m.name + f.__qualname__ = proxyClassName + "." + m.name f.__name__ = m.name M[m.name] = f if cls == CharmRemote and sys.version_info >= (3, 0, 0): # TODO remove this and change rexec to exec when Python 2 support is dropped - M['exec'] = M['rexec'] - M['__init__'] = group_proxy_ctor - M['__getitem__'] = group_proxy_elem - M['__eq__'] = group_proxy__eq__ - M['__hash__'] = group_proxy__hash__ - M['ckNew'] = group_ckNew_gen(cls, entryMethods[0].epIdx) - M['ckLocalBranch'] = group_proxy_localbranch - M['__getsecproxy__'] = group_getsecproxy + M["exec"] = M["rexec"] + M["__init__"] = group_proxy_ctor + M["__getitem__"] = group_proxy_elem + M["__eq__"] = group_proxy__eq__ + M["__hash__"] = group_proxy__hash__ + M["ckNew"] = group_ckNew_gen(cls, entryMethods[0].epIdx) + M["ckLocalBranch"] = group_proxy_localbranch + M["__getsecproxy__"] = group_getsecproxy if not sectionProxy: - M['ckContribute'] = group_proxy_contribute # function called when target proxy is Group - M['__getstate__'] = group_proxy__getstate__ - M['__setstate__'] = group_proxy__setstate__ + M["ckContribute"] = ( + group_proxy_contribute # function called when target proxy is Group + ) + M["__getstate__"] = group_proxy__getstate__ + M["__setstate__"] = group_proxy__setstate__ else: - M['ckContribute'] = groupsecproxy_contribute # function called when target proxy is Group - M['__getstate__'] = groupsecproxy__getstate__ - M['__setstate__'] = groupsecproxy__setstate__ + M["ckContribute"] = ( + groupsecproxy_contribute # function called when target proxy is Group + ) + M["__getstate__"] = groupsecproxy__getstate__ + M["__setstate__"] = groupsecproxy__setstate__ proxyCls = type(proxyClassName, (), M) # create and return proxy class proxyCls.issec = sectionProxy return proxyCls @@ -657,20 +745,24 @@ def __init__(self): # -------------------- Array and Proxy -------------------- + def array_proxy_ctor(proxy, aid, ndims): proxy.aid = aid proxy.ndims = ndims proxy.elemIdx = () # entry method calls will be to elemIdx array element (broadcast if empty tuple) + def array_proxy__getstate__(proxy): return (proxy.aid, proxy.ndims, proxy.elemIdx) + def array_proxy__setstate__(proxy, state): proxy.aid, proxy.ndims, proxy.elemIdx = state + def array_proxy__eq__(proxy, other): if proxy.issec: - if hasattr(other, 'issec'): + if hasattr(other, "issec"): return proxy.section == other.section else: return False @@ -679,12 +771,14 @@ def array_proxy__eq__(proxy, other): else: return False + def array_proxy__hash__(proxy): if proxy.issec: return hash(proxy.section) else: return hash((proxy.aid, proxy.elemIdx)) + def array_getsecproxy(proxy, sinfo): if proxy.issec: secproxy = proxy.__class__(proxy.aid, proxy.ndims) @@ -693,12 +787,15 @@ def array_getsecproxy(proxy, sinfo): secproxy.section = sinfo return secproxy + def arraysecproxy__getstate__(proxy): return (proxy.aid, proxy.ndims, proxy.elemIdx, proxy.section) + def arraysecproxy__setstate__(proxy, state): proxy.aid, proxy.ndims, proxy.elemIdx, proxy.section = state + def array_proxy_elem(proxy, idx): # array proxy [] overload method ndims = proxy.ndims isslice = True @@ -708,17 +805,24 @@ def array_proxy_elem(proxy, idx): # array proxy [] overload method isslice = False elif idxtype == slice: idx = (idx,) - assert len(idx) == ndims, "Dimensions of index " + str(idx) + " don't match array dimensions" + assert len(idx) == ndims, ( + "Dimensions of index " + str(idx) + " don't match array dimensions" + ) if not isslice or not isinstance(idx[0], slice): proxy_clone = proxy.__class__(proxy.aid, ndims) proxy_clone.elemIdx = tuple(idx) return proxy_clone else: for _slice in idx: - assert _slice.start is not None and _slice.stop is not None, 'Must specify start and stop indexes for array slicing' + assert ( + _slice.start is not None and _slice.stop is not None + ), "Must specify start and stop indexes for array slicing" return charm.split(proxy, 1, slicing=idx)[0] -def array_proxy_method_gen(ep, argcount, argnames, defaults): # decorator, generates proxy entry methods + +def array_proxy_method_gen( + ep, argcount, argnames, defaults +): # decorator, generates proxy entry methods def proxy_entry_method(proxy, *args, **kwargs): num_args = len(args) if num_args < argcount and len(kwargs) > 0: @@ -731,22 +835,26 @@ def proxy_entry_method(proxy, *args, **kwargs): else: # if not there, see if there is a default value def_idx = i - argcount + len(defaults) - assert def_idx >= 0, 'Value not found for parameter \'' + argname + '\' of entry method' + assert def_idx >= 0, ( + "Value not found for parameter '" + + argname + + "' of entry method" + ) args.append(defaults[def_idx]) header = {} - is_ray = kwargs.pop('is_ray', False) - header['is_ray'] = is_ray + is_ray = kwargs.pop("is_ray", False) + header["is_ray"] = is_ray blockFuture = None elemIdx = proxy.elemIdx - if 'ret' in kwargs and kwargs['ret']: - header[b'block'] = blockFuture = charm.Future() + if "ret" in kwargs and kwargs["ret"]: + header[b"block"] = blockFuture = charm.Future() if elemIdx == (): - header[b'bcast'] = header[b'bcastret'] = True - elif 'awaitable' in kwargs and kwargs['awaitable']: - header[b'block'] = blockFuture = charm.Future() + header[b"bcast"] = header[b"bcastret"] = True + elif "awaitable" in kwargs and kwargs["awaitable"]: + header[b"block"] = blockFuture = charm.Future() if elemIdx == (): - header[b'bcast'] = True + header[b"bcast"] = True if not proxy.issec or elemIdx != (): destObj = None aid = proxy.aid @@ -763,31 +871,42 @@ def proxy_entry_method(proxy, *args, **kwargs): charm.CkArraySend(aid, elemIdx, ep, msg) else: root, sid = proxy.section - header[b'sid'] = sid + header[b"sid"] = sid if Options.local_msg_optim and root == charm._myPe: - charm.sectionMgr.thisProxy[root].sendToSectionLocal(sid, ep, header, *args) + charm.sectionMgr.thisProxy[root].sendToSectionLocal( + sid, ep, header, *args + ) else: charm.sectionMgr.thisProxy[root].sendToSection(sid, ep, header, *args) return blockFuture + proxy_entry_method.ep = ep return proxy_entry_method + def array_ckNew_gen(C, epIdx): - @classmethod # make ckNew a class (not instance) method of proxy - def array_ckNew(cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, is_ray=False): + @classmethod # make ckNew a class (not instance) method of proxy + def array_ckNew( + cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, is_ray=False + ): # if charm.myPe() == 0: print("calling array ckNew for class " + C.__name__ + " cIdx=" + str(C.idx[ARRAY])) - if type(dims) == int: dims = (dims,) + if type(dims) == int: + dims = (dims,) if dims is None and ndims == -1: - raise Charm4PyError('Bounds and number of dimensions for array cannot be empty in ckNew') + raise Charm4PyError( + "Bounds and number of dimensions for array cannot be empty in ckNew" + ) elif dims is not None and ndims != -1 and ndims != len(dims): - raise Charm4PyError('Number of bounds should match number of dimensions') + raise Charm4PyError("Number of bounds should match number of dimensions") elif dims is None and ndims != -1: # create an empty array dims = (0,) * ndims # this is a restriction in Charm++. Charm++ won't tell you unless # error checking is enabled, resulting in obscure errors otherwise - assert charm._myPe == 0, 'Cannot create arrays from PE != 0. Use charm.thisProxy[0].createArray() instead' + assert ( + charm._myPe == 0 + ), "Cannot create arrays from PE != 0. Use charm.thisProxy[0].createArray() instead" map_gid = -1 if map is not None: @@ -796,42 +915,53 @@ def array_ckNew(cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, is header, creation_future = {}, None if sum(dims) > 0 and not charm.threadMgr.isMainThread(): creation_future = charm.Future() - header[b'block'] = creation_future - header[b'bcast'] = True - header[b'creation'] = True - header[b'is_ray'] = is_ray + header[b"block"] = creation_future + header[b"bcast"] = True + header[b"creation"] = True + header[b"is_ray"] = is_ray msg = charm.packMsg(None, args, header) - aid = charm.lib.CkCreateArray(C.idx[ARRAY], dims, epIdx, msg, map_gid, useAtSync) + aid = charm.lib.CkCreateArray( + C.idx[ARRAY], dims, epIdx, msg, map_gid, useAtSync + ) proxy = cls(aid, len(dims)) if creation_future is not None: proxy.creation_future = creation_future return proxy + return array_ckNew + def array_ckInsert_gen(epIdx): - def array_ckInsert(proxy, index, args=[], onPE=-1, useAtSync=False, single=False, is_ray=False): - if type(index) == int: index = (index,) - assert len(index) == proxy.ndims, 'Invalid index dimensions passed to ckInsert' + def array_ckInsert( + proxy, index, args=[], onPE=-1, useAtSync=False, single=False, is_ray=False + ): + if type(index) == int: + index = (index,) + assert len(index) == proxy.ndims, "Invalid index dimensions passed to ckInsert" header = {} if single: - header[b'single'] = True + header[b"single"] = True if not charm.threadMgr.isMainThread(): proxy.creation_future = charm.Future() - header[b'block'] = proxy.creation_future - header[b'bcast'] = True - header[b'creation'] = True - header[b'is_ray'] = is_ray + header[b"block"] = proxy.creation_future + header[b"bcast"] = True + header[b"creation"] = True + header[b"is_ray"] = is_ray msg = charm.packMsg(None, args, header) charm.lib.CkInsert(proxy.aid, index, epIdx, onPE, msg, useAtSync) + return array_ckInsert + def array_proxy_contribute(proxy, contributeInfo): charm.CkContributeToArray(contributeInfo, proxy.aid, proxy.elemIdx) + def arraysecproxy_contribute(proxy, contributeInfo): charm.CkContributeToSection(contributeInfo, proxy.section[1], proxy.section[0]) + def array_proxy_doneInserting(proxy): charm.lib.CkDoneInserting(proxy.aid) @@ -841,10 +971,10 @@ class Array(object): type_id = ARRAY def __new__(cls, C, dims=None, ndims=-1, args=[], map=None, useAtSync=False): - if (not hasattr(C, 'mro')) or (Chare not in C.mro()): - raise Charm4PyError('Only subclasses of Chare can be member of Array') + if (not hasattr(C, "mro")) or (Chare not in C.mro()): + raise Charm4PyError("Only subclasses of Chare can be member of Array") if C not in charm.proxyClasses[ARRAY]: - raise Charm4PyError(str(C) + ' not registered for use in Arrays') + raise Charm4PyError(str(C) + " not registered for use in Arrays") return charm.proxyClasses[ARRAY][C].ckNew(dims, ndims, args, map, useAtSync) @classmethod @@ -854,8 +984,12 @@ def initMember(cls, obj, aid, index, single=False): proxy = charm.proxyClasses[ARRAY][obj.__class__](aid, len(obj.thisIndex)) obj.thisProxy = proxy[index] else: - obj.thisProxy = charm.proxyClasses[ARRAY][obj.__class__](aid, len(obj.thisIndex)) - obj._contributeInfo = charm.lib.initContributeInfo(aid, obj.thisIndex, CONTRIBUTOR_TYPE_ARRAY) + obj.thisProxy = charm.proxyClasses[ARRAY][obj.__class__]( + aid, len(obj.thisIndex) + ) + obj._contributeInfo = charm.lib.initContributeInfo( + aid, obj.thisIndex, CONTRIBUTOR_TYPE_ARRAY + ) obj.migratable = True @classmethod @@ -864,49 +998,56 @@ def __baseEntryMethods__(cls): # - to register the migration constructor on Charm++ side (note that this migration constructor does nothing) # - Chare.migrated() is called whenever a chare has completed migration. # The EntryMethod object with this name is used to profile Chare.migrated() calls. - return ['__init__', 'migrated', 'AtSync'] + return ["__init__", "migrated", "AtSync"] @classmethod def __getProxyClass__(C, cls, sectionProxy=False): if not sectionProxy: - proxyClassName = cls.__name__ + 'ArrayProxy' + proxyClassName = cls.__name__ + "ArrayProxy" else: - proxyClassName = cls.__name__ + 'ArraySecProxy' + proxyClassName = cls.__name__ + "ArraySecProxy" M = dict() # proxy methods entryMethods = charm.classEntryMethods[ARRAY][cls] for m in entryMethods: if m.epIdx == -1: - raise Charm4PyError('Unregistered entry method') - if m.name in {'__init__', 'migrated'}: + raise Charm4PyError("Unregistered entry method") + if m.name in {"__init__", "migrated"}: continue argcount, argnames, defaults = getEntryMethodInfo(m.C, m.name) if Options.profiling: - f = profile_send_function(array_proxy_method_gen(m.epIdx, argcount, argnames, defaults)) + f = profile_send_function( + array_proxy_method_gen(m.epIdx, argcount, argnames, defaults) + ) else: f = array_proxy_method_gen(m.epIdx, argcount, argnames, defaults) - f.__qualname__ = proxyClassName + '.' + m.name + f.__qualname__ = proxyClassName + "." + m.name f.__name__ = m.name M[m.name] = f - M['__init__'] = array_proxy_ctor - M['__getitem__'] = array_proxy_elem - M['__eq__'] = array_proxy__eq__ - M['__hash__'] = array_proxy__hash__ - M['ckNew'] = array_ckNew_gen(cls, entryMethods[0].epIdx) - M['__getsecproxy__'] = array_getsecproxy - M['ckInsert'] = array_ckInsert_gen(entryMethods[0].epIdx) - M['ckDoneInserting'] = array_proxy_doneInserting + M["__init__"] = array_proxy_ctor + M["__getitem__"] = array_proxy_elem + M["__eq__"] = array_proxy__eq__ + M["__hash__"] = array_proxy__hash__ + M["ckNew"] = array_ckNew_gen(cls, entryMethods[0].epIdx) + M["__getsecproxy__"] = array_getsecproxy + M["ckInsert"] = array_ckInsert_gen(entryMethods[0].epIdx) + M["ckDoneInserting"] = array_proxy_doneInserting if not sectionProxy: - M['ckContribute'] = array_proxy_contribute # function called when target proxy is Array - M['__getstate__'] = array_proxy__getstate__ - M['__setstate__'] = array_proxy__setstate__ + M["ckContribute"] = ( + array_proxy_contribute # function called when target proxy is Array + ) + M["__getstate__"] = array_proxy__getstate__ + M["__setstate__"] = array_proxy__setstate__ else: - M['ckContribute'] = arraysecproxy_contribute # function called when target proxy is Array - M['__getstate__'] = arraysecproxy__getstate__ - M['__setstate__'] = arraysecproxy__setstate__ + M["ckContribute"] = ( + arraysecproxy_contribute # function called when target proxy is Array + ) + M["__getstate__"] = arraysecproxy__getstate__ + M["__setstate__"] = arraysecproxy__setstate__ proxyCls = type(proxyClassName, (), M) # create and return proxy class proxyCls.issec = sectionProxy return proxyCls + # --------------------------------------------------- charm_type_id_to_class = [None] * len(CHARM_TYPES) @@ -922,5 +1063,6 @@ def __getProxyClass__(C, cls, sectionProxy=False): def charmStarting(): global charm, Options, Reducer, Charm4PyError, CharmRemote, profile_send_function from .charm import charm, Charm4PyError, CharmRemote, profile_send_function + Options = charm.options Reducer = charm.reducers diff --git a/charm4py/charm.py b/charm4py/charm.py index c343eb62..17196230 100644 --- a/charm4py/charm.py +++ b/charm4py/charm.py @@ -7,6 +7,7 @@ # import sys import os + if sys.version_info < (3, 0, 0): import cPickle from cStringIO import StringIO @@ -21,7 +22,7 @@ import traceback from . import chare from .chare import MAINCHARE, GROUP, ARRAY, CHARM_TYPES -from .chare import CONTRIBUTOR_TYPE_GROUP, CONTRIBUTOR_TYPE_ARRAY +from .chare import CONTRIBUTOR_TYPE_ARRAY from .chare import Chare, Mainchare, Group, ArrayMap, Array from . import entry_method from . import threads @@ -29,14 +30,15 @@ from . import reduction from . import wait from charm4py.c_object_store import MessageBuffer -from . import ray import array + try: import numpy except ImportError: # this is to avoid numpy dependency class NumpyDummy: ndarray = None + numpy = NumpyDummy() @@ -57,19 +59,25 @@ def register(C): class Options(object): def __str__(self): - output = '' + output = "" for varname in dir(self): var = getattr(self, varname) - if isinstance(var, Options) or varname.startswith('__') or callable(var): + if isinstance(var, Options) or varname.startswith("__") or callable(var): continue - output += varname + ': ' + str(var) + '\n' + output += varname + ": " + str(var) + "\n" return output def check_deprecated(self): - old_options = {'PROFILING', 'PICKLE_PROTOCOL', 'LOCAL_MSG_OPTIM', - 'LOCAL_MSG_BUF_SIZE', 'AUTO_FLUSH_WAIT_QUEUES', 'QUIET'} + old_options = { + "PROFILING", + "PICKLE_PROTOCOL", + "LOCAL_MSG_OPTIM", + "LOCAL_MSG_BUF_SIZE", + "AUTO_FLUSH_WAIT_QUEUES", + "QUIET", + } if len(old_options.intersection(set(dir(self.__class__)))) != 0: - raise Charm4PyError('Options API has changed. Use charm.options instead') + raise Charm4PyError("Options API has changed. Use charm.options instead") class Charm4PyError(Exception): @@ -82,10 +90,12 @@ def __init__(self, msg): # per process) class Charm(object): - if os.name == 'nt': + if os.name == "nt": + class PrintStream(object): def write(self, msg): charm.lib.CkPrintf(msg.encode()) + def flush(self): pass @@ -93,15 +103,27 @@ def __init__(self): self.started = False self._myPe = -1 self._numPes = -1 - self.registered = {} # class -> set of Charm types (Mainchare, Group, Array) for which this class is registered - self.register_order = [] # list of classes in registration order (all processes must use same order) + self.registered = ( + {} + ) # class -> set of Charm types (Mainchare, Group, Array) for which this class is registered + self.register_order = ( + [] + ) # list of classes in registration order (all processes must use same order) self.chares = {} - self.groups = {} # group ID -> group instance on this PE - self.arrays = defaultdict(dict) # aid -> idx -> array element instance with index idx on this PE - self.entryMethods = {} # ep_idx -> EntryMethod object - self.classEntryMethods = [{} for _ in CHARM_TYPES] # charm_type_id -> class -> list of EntryMethod objects - self.proxyClasses = [{} for _ in CHARM_TYPES] # charm_type_id -> class -> proxy class - self.groupMsgBuf = defaultdict(list) # gid -> list of msgs received for constrained groups that haven't been created yet + self.groups = {} # group ID -> group instance on this PE + self.arrays = defaultdict( + dict + ) # aid -> idx -> array element instance with index idx on this PE + self.entryMethods = {} # ep_idx -> EntryMethod object + self.classEntryMethods = [ + {} for _ in CHARM_TYPES + ] # charm_type_id -> class -> list of EntryMethod objects + self.proxyClasses = [ + {} for _ in CHARM_TYPES + ] # charm_type_id -> class -> proxy class + self.groupMsgBuf = defaultdict( + list + ) # gid -> list of msgs received for constrained groups that haven't been created yet self.section_counter = 0 self.rebuildFuncs = (rebuildByteArray, rebuildArray, rebuildNumpyArray) self.sched_tagpool = set(range(1, 128)) # pool of tags for scheduling callables @@ -133,7 +155,7 @@ def __init__(self): self.mainchareRegistered = False # entry point to Charm program. can be used in place of defining a Mainchare self.entry_func = None - if self.lib.name == 'cython': + if self.lib.name == "cython": # replace these methods with the fast Cython versions self.packMsg = self.lib.packMsg self.unpackMsg = self.lib.unpackMsg @@ -141,7 +163,7 @@ def __init__(self): self.last_exception_timestamp = time.time() # store chare types defined after program start and other objects created # in interactive mode - self.dynamic_register = sys.modules['__main__'].__dict__ + self.dynamic_register = sys.modules["__main__"].__dict__ self.lb_requested = False self.threadMgr = threads.EntryMethodThreadManager(self) self.createFuture = self.Future = self.threadMgr.createFuture @@ -154,7 +176,7 @@ def __init__(self): # TODO: maybe implement this buffer in c++ self.future_get_buffer = {} - #registered methods for ccs + # registered methods for ccs self.ccs_methods = {} def __init_profiling__(self): @@ -170,13 +192,12 @@ def __init_profiling__(self): # chares created on this PE self.activeChares = set() - def print_dbg(self, *args, **kwargs): print("PE", self.myPe(), ":", *args, **kwargs) - + @entry_method.coro def get_future_value(self, fut): - #self.print_dbg("Getting data for object", fut.id) + # self.print_dbg("Getting data for object", fut.id) obj = fut.lookup_object() if obj is None: local_f = LocalFuture() @@ -186,7 +207,7 @@ def get_future_value(self, fut): return fut.lookup_object() else: return obj - + @entry_method.coro def getany_future_value(self, futs, num_returns): ready_count = 0 @@ -209,7 +230,7 @@ def getany_future_value(self, futs, num_returns): for f in not_local: self.future_get_buffer.pop(f.store_id, None) return ready_list + result - + def check_futures_buffer(self, obj_id): if obj_id in self.future_get_buffer: local_f, fut = self.future_get_buffer.pop(obj_id) @@ -219,7 +240,7 @@ def check_send_buffer(self, obj_id): completed = self.send_buffer.check(obj_id) def check_receive_buffer(self, obj_id): - #print("Received result for", obj_id, "on pe", self._myPe) + # print("Received result for", obj_id, "on pe", self._myPe) completed = self.receive_buffer.check(obj_id) for args in completed: args = list(args) @@ -232,39 +253,49 @@ def check_receive_buffer(self, obj_id): def handleGeneralError(self): errorType, error, stacktrace = sys.exc_info() if not self.interactive: - if hasattr(error, 'remote_stacktrace'): + if hasattr(error, "remote_stacktrace"): origin, stacktrace = error.remote_stacktrace - print('----------------- Python Stack Traceback PE ' + str(origin) + ' -----------------') + print( + "----------------- Python Stack Traceback PE " + + str(origin) + + " -----------------" + ) print(stacktrace) else: - print('----------------- Python Stack Traceback PE ' + str(self.myPe()) + ' -----------------') + print( + "----------------- Python Stack Traceback PE " + + str(self.myPe()) + + " -----------------" + ) traceback.print_tb(stacktrace, limit=None) - self.abort(errorType.__name__ + ': ' + str(error)) + self.abort(errorType.__name__ + ": " + str(error)) else: - self.thisProxy[self.myPe()].propagateException(self.prepareExceptionForSend(error)) + self.thisProxy[self.myPe()].propagateException( + self.prepareExceptionForSend(error) + ) def prepareExceptionForSend(self, e): - if not hasattr(e, 'remote_stacktrace'): + if not hasattr(e, "remote_stacktrace"): f = StringIO() traceback.print_tb(sys.exc_info()[2], limit=None, file=f) e.remote_stacktrace = (self.myPe(), f.getvalue()) return e def process_em_exc(self, e, obj, header): - if b'block' not in header: + if b"block" not in header: raise e # remote is expecting a response via a future, send exception to the future - blockFuture = header[b'block'] + blockFuture = header[b"block"] sid = None - if b'sid' in header: - sid = header[b'sid'] - if b'creation' in header: + if b"sid" in header: + sid = header[b"sid"] + if b"creation" in header: # don't send anything in this case (future is not guaranteed to be used) obj.contribute(None, None, blockFuture, sid) raise e self.prepareExceptionForSend(e) - if b'bcast' in header: - if b'bcastret' in header: + if b"bcast" in header: + if b"bcastret" in header: obj.contribute(e, self.reducers.gather, blockFuture, sid) else: # NOTE: it will work if some elements contribute with an exception (here) @@ -276,8 +307,9 @@ def process_em_exc(self, e, obj, header): def recvReadOnly(self, msg): roData = cPickle.loads(msg) for name, obj in roData.items(): - if name == 'charm_pool_proxy__h': + if name == "charm_pool_proxy__h": from .pool import Pool + self.pool = Pool(obj) else: setattr(readonlies, name, obj) @@ -286,9 +318,11 @@ def recvReadOnly(self, msg): def buildMainchare(self, onPe, objPtr, ep, args): cid = (onPe, objPtr) # chare ID (objPtr should be a Python int) assert onPe == self.myPe() - assert cid not in self.chares, 'Chare ' + str(cid) + ' already instantiated' + assert cid not in self.chares, "Chare " + str(cid) + " already instantiated" em = self.entryMethods[ep] - assert em.name == '__init__', 'Specified mainchare entry method is not constructor' + assert ( + em.name == "__init__" + ), "Specified mainchare entry method is not constructor" self._createInternalChares() obj = object.__new__(em.C) # create object but don't call __init__ Mainchare.initMember(obj, cid) @@ -304,11 +338,11 @@ def buildMainchare(self, onPe, objPtr, ep, args): if self.myPe() == 0: # broadcast readonlies roData = {} for attr in dir(readonlies): # attr is string - if not attr.startswith('_') and not attr.endswith('_'): + if not attr.startswith("_") and not attr.endswith("_"): roData[attr] = getattr(readonlies, attr) msg = cPickle.dumps(roData, self.options.pickle_protocol) # print("Registering readonly data of size " + str(len(msg))) - self.lib.CkRegisterReadonly(b'charm4py_ro', b'charm4py_ro', msg) + self.lib.CkRegisterReadonly(b"charm4py_ro", b"charm4py_ro", msg) gc.collect() def invokeEntryMethod(self, obj, ep, header, args, ret_fut=False): @@ -329,18 +363,18 @@ def recvGroupMsg(self, gid, ep, msg, dcopy_start): if gid in self.groups: obj = self.groups[gid] header, args = self.unpackMsg(msg, dcopy_start, obj) - self.invokeEntryMethod(obj, ep, header, args, ret_fut=False) + self.invokeEntryMethod(obj, ep, header, args, ret_fut=False) else: em = self.entryMethods[ep] header, args = self.unpackMsg(msg, dcopy_start, None) - if em.name != '__init__': + if em.name != "__init__": # this is not a constructor msg and the group hasn't been # created yet. this should only happen for constrained groups # (buffering of msgs for regular groups that haven't # been created yet is done inside Charm++) self.groupMsgBuf[gid].append((ep, header, args)) return - if b'constrained' in header: + if b"constrained" in header: # constrained group instances are created by SectionManager return assert gid not in self.groupMsgBuf @@ -361,7 +395,7 @@ def recvArrayMsg(self, aid, index, ep, msg, dcopy_start): obj = self.arrays[aid][index] header, args = self.unpackMsg(msg, dcopy_start, obj) dep_ids = [] - is_ray = 'is_ray' in header and header['is_ray'] + is_ray = "is_ray" in header and header["is_ray"] if is_ray: for i, arg in enumerate(args[:-1]): if isinstance(arg, Future): @@ -377,19 +411,23 @@ def recvArrayMsg(self, aid, index, ep, msg, dcopy_start): self.invokeEntryMethod(obj, ep, header, args, ret_fut=is_ray) else: em = self.entryMethods[ep] - assert em.name == '__init__', 'Specified array entry method not constructor' + assert em.name == "__init__", "Specified array entry method not constructor" header, args = self.unpackMsg(msg, dcopy_start, None) if self.options.profiling: self.activeChares.add((em.C, Array)) if isinstance(args, Chare): # obj migrating in - em = self.entryMethods[ep + 1] # get 'migrated' EntryMethod object instead of __init__ + em = self.entryMethods[ + ep + 1 + ] # get 'migrated' EntryMethod object instead of __init__ obj = args - obj._contributeInfo = self.lib.initContributeInfo(aid, index, CONTRIBUTOR_TYPE_ARRAY) + obj._contributeInfo = self.lib.initContributeInfo( + aid, index, CONTRIBUTOR_TYPE_ARRAY + ) self.arrays[aid][index] = obj em.run(obj, {}, ()) else: - obj = object.__new__(em.C) # create object but don't call __init__ - if b'single' in header: + obj = object.__new__(em.C) # create object but don't call __init__ + if b"single" in header: Array.initMember(obj, aid, index, single=True) else: Array.initMember(obj, aid, index) @@ -404,19 +442,19 @@ def recvArrayBcast(self, aid, indexes, ep, msg, dcopy_start): self.invokeEntryMethod(array[index], ep, header, args) def unpackMsg(self, msg, dcopy_start, dest_obj): - if msg[:7] == b'_local:': + if msg[:7] == b"_local:": header, args = dest_obj.__removeLocal__(int(msg[7:])) else: header, args = cPickle.loads(msg) - if b'dcopy' in header: + if b"dcopy" in header: rel_offset = dcopy_start buf = memoryview(msg) - for arg_pos, typeId, rebuildArgs, size in header[b'dcopy']: - arg_buf = buf[rel_offset:rel_offset + size] + for arg_pos, typeId, rebuildArgs, size in header[b"dcopy"]: + arg_buf = buf[rel_offset : rel_offset + size] args[arg_pos] = self.rebuildFuncs[typeId](arg_buf, *rebuildArgs) rel_offset += size - elif b'custom_reducer' in header: - reducer = getattr(self.reducers, header[b'custom_reducer']) + elif b"custom_reducer" in header: + reducer = getattr(self.reducers, header[b"custom_reducer"]) # reduction result won't always be in position 0, but will always be last # (e.g. if reduction target is a future, the reduction result will be 2nd argument) if reducer.hasPostprocess: @@ -427,31 +465,31 @@ def unpackMsg(self, msg, dcopy_start, dest_obj): def packMsg(self, destObj, msgArgs, header): """Prepares a message for sending, given arguments to an entry method invocation. - The message is the result of pickling `(header,args)` where header is a dict, - and args the list of arguments. If direct-copy is enabled, arguments supporting - the buffer interface will bypass pickling and their place in 'args' will be - made empty. Instead, metadata to reconstruct these args at the destination will be - put in the header, and this method will return a list of buffers for - direct-copying of these args into a CkMessage at Charm side. + The message is the result of pickling `(header,args)` where header is a dict, + and args the list of arguments. If direct-copy is enabled, arguments supporting + the buffer interface will bypass pickling and their place in 'args' will be + made empty. Instead, metadata to reconstruct these args at the destination will be + put in the header, and this method will return a list of buffers for + direct-copying of these args into a CkMessage at Charm side. - If destination object exists on same PE as source, the args will be stored in - '_local' buffer of destination obj (without copying), and the msg will be a - small integer tag to retrieve the args from '_local' when the msg is delivered. + If destination object exists on same PE as source, the args will be stored in + '_local' buffer of destination obj (without copying), and the msg will be a + small integer tag to retrieve the args from '_local' when the msg is delivered. - Args: - destObj: destination object if it exists on the same PE as source, otherwise None - msgArgs: arguments to entry method - header: msg header + Args: + destObj: destination object if it exists on the same PE as source, otherwise None + msgArgs: arguments to entry method + header: msg header - Returns: - 2-tuple containing msg and list of direct-copy buffers + Returns: + 2-tuple containing msg and list of direct-copy buffers """ direct_copy_buffers = [] dcopy_size = 0 if destObj is not None: # if dest obj is local localTag = destObj.__addLocal__((header, msgArgs)) - msg = ('_local:' + str(localTag)).encode() + msg = ("_local:" + str(localTag)).encode() else: direct_copy_hdr = [] # goes to msg header args = list(msgArgs) @@ -474,15 +512,20 @@ def packMsg(self, destObj, msgArgs, header): # C-contiguous", which seems to be a CPython error (not cffi related) nbytes = arg.nbytes if arg.dtype.isbuiltin: - direct_copy_hdr.append((i, 2, (arg.shape, arg.dtype.char), nbytes)) + direct_copy_hdr.append( + (i, 2, (arg.shape, arg.dtype.char), nbytes) + ) else: - direct_copy_hdr.append((i, 2, (arg.shape, arg.dtype.name), nbytes)) + direct_copy_hdr.append( + (i, 2, (arg.shape, arg.dtype.name), nbytes) + ) else: continue args[i] = None # will direct-copy this arg so remove from args list direct_copy_buffers.append(memoryview(arg)) dcopy_size += nbytes - if len(direct_copy_hdr) > 0: header[b'dcopy'] = direct_copy_hdr + if len(direct_copy_hdr) > 0: + header[b"dcopy"] = direct_copy_hdr msg = (header, args) msg = cPickle.dumps(msg, self.options.pickle_protocol) if self.options.profiling: @@ -494,7 +537,9 @@ def registerInCharmAs(self, C, charm_type, libRegisterFunc): charm_type_id = charm_type.type_id entryMethods = self.classEntryMethods[charm_type_id][C] entryNames = [method.name for method in entryMethods] - C.idx[charm_type_id], startEpIdx = libRegisterFunc(C.__name__ + str(charm_type_id), entryNames, len(entryMethods)) + C.idx[charm_type_id], startEpIdx = libRegisterFunc( + C.__name__ + str(charm_type_id), entryNames, len(entryMethods) + ) for i, em in enumerate(entryMethods): em.epIdx = startEpIdx + i self.entryMethods[em.epIdx] = em @@ -527,41 +572,54 @@ def registerInCharm(self, C): # first callback from Charm++ shared library # this method registers classes with the shared library def registerMainModule(self): - self._myPe = self.lib.CkMyPe() + self._myPe = self.lib.CkMyPe() self._numPes = self.lib.CkNumPes() # Charm++ library captures stdout/stderr. here we reset the streams with a buffering # policy that ensures that messages reach Charm++ in a timely fashion - if os.name == 'nt': + if os.name == "nt": sys.stdout = Charm.PrintStream() else: - sys.stdout = os.fdopen(1, 'wt', 1) - sys.stderr = os.fdopen(2, 'wt', 1) + sys.stdout = os.fdopen(1, "wt", 1) + sys.stderr = os.fdopen(2, "wt", 1) if self.myPe() != 0: - self.lib.CkRegisterReadonly(b'python_null', b'python_null', None) + self.lib.CkRegisterReadonly(b"python_null", b"python_null", None) if (self.myPe() == 0) and (not self.options.quiet): import platform from . import charm4py_version + py_impl = platform.python_implementation() - print("Charm4py> Running Charm4py version " + charm4py_version + - " on Python " + str(platform.python_version()) + " (" + - py_impl + "). Using '" + - self.lib.name + "' interface to access Charm++") - if py_impl != 'CPython': - raise Charm4PyError('PyPy is no longer supported. Use CPython instead') - if sys.version_info < (3,8,0): - raise Charm4PyError('Python 2 is no longer supported. Use Python 3.8 or above instead') + print( + "Charm4py> Running Charm4py version " + + charm4py_version + + " on Python " + + str(platform.python_version()) + + " (" + + py_impl + + "). Using '" + + self.lib.name + + "' interface to access Charm++" + ) + if py_impl != "CPython": + raise Charm4PyError("PyPy is no longer supported. Use CPython instead") + if sys.version_info < (3, 8, 0): + raise Charm4PyError( + "Python 2 is no longer supported. Use Python 3.8 or above instead" + ) if self.options.profiling: - print('Charm4py> Profiling is ON (this affects performance)') + print("Charm4py> Profiling is ON (this affects performance)") for C in self.register_order: self.registerInCharm(C) def registerAs(self, C, charm_type_id): from .sections import SectionManager + if charm_type_id == MAINCHARE: - assert not self.mainchareRegistered, 'More than one entry point has been specified' + assert ( + not self.mainchareRegistered + ), "More than one entry point has been specified" self.mainchareRegistered = True # make mainchare constructor always a coroutine if sys.version_info < (3, 0, 0): @@ -571,25 +629,30 @@ def registerAs(self, C, charm_type_id): charm_type = chare.charm_type_id_to_class[charm_type_id] # print("charm4py: Registering class " + C.__name__, "as", charm_type.__name__, "type_id=", charm_type_id, charm_type) profilingOn = self.options.profiling - ems = [entry_method.EntryMethod(C, m, profilingOn) for m in charm_type.__baseEntryMethods__()] + ems = [ + entry_method.EntryMethod(C, m, profilingOn) + for m in charm_type.__baseEntryMethods__() + ] members = dir(C) if C == SectionManager: - ems.append(entry_method.EntryMethod(C, 'sendToSection', profilingOn)) - members.remove('sendToSection') + ems.append(entry_method.EntryMethod(C, "sendToSection", profilingOn)) + members.remove("sendToSection") self.classEntryMethods[charm_type_id][C] = ems for m in members: m_obj = getattr(C, m) if not callable(m_obj) or inspect.isclass(m_obj): continue - if m in chare.method_restrictions['reserved'] and m_obj != getattr(Chare, m): - raise Charm4PyError(str(C) + " redefines reserved method '" + m + "'") - if m.startswith('__') and m.endswith('__'): + if m in chare.method_restrictions["reserved"] and m_obj != getattr( + Chare, m + ): + raise Charm4PyError(str(C) + " redefines reserved method '" + m + "'") + if m.startswith("__") and m.endswith("__"): continue # filter out non-user methods - if m in chare.method_restrictions['non_entry_method']: + if m in chare.method_restrictions["non_entry_method"]: continue - if charm_type_id != ARRAY and m in {'migrate', 'setMigratable'}: + if charm_type_id != ARRAY and m in {"migrate", "setMigratable"}: continue # print(m) em = entry_method.EntryMethod(C, m, profilingOn) @@ -601,8 +664,8 @@ def registerAs(self, C, charm_type_id): def register(self, C, collections=(GROUP, ARRAY)): if C in self.registered: return - if (not hasattr(C, 'mro')) or (Chare not in C.mro()): - raise Charm4PyError('Only subclasses of Chare can be registered') + if (not hasattr(C, "mro")) or (Chare not in C.mro()): + raise Charm4PyError("Only subclasses of Chare can be registered") # cache of template condition objects for `chare.wait(cond_str)` calls # maps cond_str to condition object. the condition object stores the lambda function associated with cond_str @@ -616,11 +679,13 @@ def register(self, C, collections=(GROUP, ARRAY)): def _registerInternalChares(self): global SectionManager from .sections import SectionManager + self.register(SectionManager, (GROUP,)) self.register(CharmRemote, (GROUP,)) from .pool import PoolScheduler, Worker + if self.interactive: if sys.version_info < (3, 0, 0): entry_method.coro(PoolScheduler.start.im_func) @@ -632,14 +697,16 @@ def _registerInternalChares(self): self.register(Worker, (GROUP,)) if self.options.profiling: - self.internalChareTypes.update({SectionManager, CharmRemote, - PoolScheduler, Worker}) + self.internalChareTypes.update( + {SectionManager, CharmRemote, PoolScheduler, Worker} + ) def _createInternalChares(self): Group(CharmRemote) Group(SectionManager) from .pool import Pool, PoolScheduler + pool_proxy = Chare(PoolScheduler, onPE=0) self.pool = Pool(pool_proxy) readonlies.charm_pool_proxy__h = pool_proxy @@ -667,40 +734,53 @@ def start(self, entry=None, classes=[], modules=[], interactive=False): if interactive: from .interactive import InteractiveConsole as entry from .channel import Channel + self.options.remote_exec = True self.origStdinFd = os.dup(0) self.origStoutFd = os.dup(1) self.interactive = True - self.dynamic_register.update({'charm': charm, 'Chare': Chare, 'Group': Group, - 'Array': Array, 'Reducer': self.reducers, - 'threaded': entry_method.coro, 'coro': entry_method.coro, - 'Channel': Channel}) + self.dynamic_register.update( + { + "charm": charm, + "Chare": Chare, + "Group": Group, + "Array": Array, + "Reducer": self.reducers, + "threaded": entry_method.coro, + "coro": entry_method.coro, + "Channel": Channel, + } + ) if self.started: - raise Charm4PyError('charm.start() can only be called once') + raise Charm4PyError("charm.start() can only be called once") self.started = True if self.options.profiling: self.__init_profiling__() self.contribute = profile_send_function(self.contribute) - self.triggerCallableEM = entry_method.EntryMethod(self.__class__, - 'triggerCallable', - True) - if self.options.quiet and '++quiet' not in sys.argv: - sys.argv += ['++quiet'] - elif '++quiet' in sys.argv: + self.triggerCallableEM = entry_method.EntryMethod( + self.__class__, "triggerCallable", True + ) + if self.options.quiet and "++quiet" not in sys.argv: + sys.argv += ["++quiet"] + elif "++quiet" in sys.argv: self.options.quiet = True self._registerInternalChares() - if hasattr(entry, 'mro') and Chare in entry.mro(): + if hasattr(entry, "mro") and Chare in entry.mro(): if entry.__init__.__code__.co_argcount != 2: - raise Charm4PyError('Mainchare constructor must take one (and only one) parameter') + raise Charm4PyError( + "Mainchare constructor must take one (and only one) parameter" + ) self.register(entry, (MAINCHARE,)) else: - assert callable(entry), 'Given entry point is not a function or Chare' + assert callable(entry), "Given entry point is not a function or Chare" if entry.__code__.co_argcount != 1: - raise Charm4PyError('Main function must have one (and only one) parameter') + raise Charm4PyError( + "Main function must have one (and only one) parameter" + ) self.entry_func = entry self.register(chare.DefaultMainchare, (MAINCHARE,)) @@ -713,37 +793,46 @@ def start(self, entry=None, classes=[], modules=[], interactive=False): raise Charm4PyError("Class", C, "is not a Chare (can't register)") import importlib + M = list(modules) - if '__main__' not in M: - M.append('__main__') + if "__main__" not in M: + M.append("__main__") for module_name in M: if module_name not in sys.modules: importlib.import_module(module_name) - for C_name, C in inspect.getmembers(sys.modules[module_name], inspect.isclass): - if C.__module__ != chare.__name__ and hasattr(C, 'mro'): + for C_name, C in inspect.getmembers( + sys.modules[module_name], inspect.isclass + ): + if C.__module__ != chare.__name__ and hasattr(C, "mro"): if ArrayMap in C.mro(): self.register(C, (GROUP,)) # register ArrayMap only as Group elif Chare in C.mro(): self.register(C) elif Group in C.mro() or Array in C.mro() or Mainchare in C.mro(): - raise Charm4PyError('Chares must not inherit from Group, Array or' - ' Mainchare. Refer to new API') + raise Charm4PyError( + "Chares must not inherit from Group, Array or" + " Mainchare. Refer to new API" + ) for module in (chare, entry_method, wait): module.charmStarting() self.threadMgr.start() - self.lb_requested = '+balancer' in sys.argv + self.lb_requested = "+balancer" in sys.argv self.lib.start() def arrayElemLeave(self, aid, index): obj = self.arrays[aid].pop(index) - if hasattr(obj, '_scookies'): - charm.abort('Cannot migrate elements that are part of a section ' - '(this will be supported in a future version)') + if hasattr(obj, "_scookies"): + charm.abort( + "Cannot migrate elements that are part of a section " + "(this will be supported in a future version)" + ) self.threadMgr.objMigrating(obj) - if hasattr(obj, '__channels__'): - assert len(obj.__pendingChannels__) == 0, 'Cannot migrate chares that did not complete channel establishment' + if hasattr(obj, "__channels__"): + assert ( + len(obj.__pendingChannels__) == 0 + ), "Cannot migrate chares that did not complete channel establishment" del obj._contributeInfo # don't want to pickle this pickled_chare = cPickle.dumps(({}, obj), self.options.pickle_protocol) # facilitate garbage collection (especially by removing cyclical references) @@ -763,7 +852,9 @@ def contribute(self, data, reducer, target, chare, section=None): if isinstance(target, Future): fid = target.fid target = target.getTargetProxyEntryMethod() - contributeInfo = self.lib.getContributeInfo(target.ep, fid, contribution, chare) + contributeInfo = self.lib.getContributeInfo( + target.ep, fid, contribution, chare + ) if self.options.profiling: self.recordSend(contributeInfo.getDataSize()) target.__self__.ckContribute(contributeInfo) @@ -783,7 +874,9 @@ def contribute(self, data, reducer, target, chare, section=None): try: redno = chare._scookies[sid] except: - raise Charm4PyError('Chare doing section reduction but is not part of a section') + raise Charm4PyError( + "Chare doing section reduction but is not part of a section" + ) self.sectionMgr.contrib(sid, redno, data, reducer, target) chare._scookies[sid] += 1 @@ -796,7 +889,9 @@ def combine(self, *proxies): secproxy = None if proxy.issec: secproxy = proxy - proxy._getSectionLocations_(sid, 1, SECTION_ALL, None, None, futures[i], secproxy) + proxy._getSectionLocations_( + sid, 1, SECTION_ALL, None, None, futures[i], secproxy + ) for f in futures: pes.update(f.get()[0]) assert len(pes) > 0 @@ -804,8 +899,12 @@ def combine(self, *proxies): self.sectionMgr.thisProxy[root].createSectionDown(sid, pes, None) return proxies[0].__getsecproxy__((root, sid)) - def split(self, proxy, numsections, section_func=None, elems=None, slicing=None, cons=None): - assert (hasattr(proxy, 'gid') and proxy.elemIdx == -1) or (hasattr(proxy, 'aid') and proxy.elemIdx == ()) + def split( + self, proxy, numsections, section_func=None, elems=None, slicing=None, cons=None + ): + assert (hasattr(proxy, "gid") and proxy.elemIdx == -1) or ( + hasattr(proxy, "aid") and proxy.elemIdx == () + ) sid0 = (self._myPe, self.section_counter) self.section_counter += numsections secproxy = None @@ -813,22 +912,30 @@ def split(self, proxy, numsections, section_func=None, elems=None, slicing=None, secproxy = proxy if elems is None: f = self.Future() - proxy._getSectionLocations_(sid0, numsections, section_func, slicing, None, f, secproxy) + proxy._getSectionLocations_( + sid0, numsections, section_func, slicing, None, f, secproxy + ) section_pes = f.get() else: - if numsections == 1 and not isinstance(elems[0], list) and not isinstance(elems[0], set): + if ( + numsections == 1 + and not isinstance(elems[0], list) + and not isinstance(elems[0], set) + ): elems = [elems] try: assert len(elems) == numsections except AssertionError: print(len(elems), numsections) - if hasattr(proxy, 'gid') and not proxy.issec: + if hasattr(proxy, "gid") and not proxy.issec: # in this case the elements are guaranteed to be PEs, so I don't # have to collect locations section_pes = elems else: f = self.Future() - proxy._getSectionLocations_(sid0, numsections, None, None, elems, f, secproxy) + proxy._getSectionLocations_( + sid0, numsections, None, None, elems, f, secproxy + ) section_pes = f.get() secProxies = [] # TODO if there are many many sections, should do a stateless multicast to the roots with the section info @@ -841,8 +948,10 @@ def split(self, proxy, numsections, section_func=None, elems=None, slicing=None, pes = set(pes) assert len(pes) > 0 root = min(pes) - if not proxy.issec and hasattr(proxy, 'gid'): - self.sectionMgr.thisProxy[root].createGroupSectionDown(sid, proxy.gid, pes, None, cons) + if not proxy.issec and hasattr(proxy, "gid"): + self.sectionMgr.thisProxy[root].createGroupSectionDown( + sid, proxy.gid, pes, None, cons + ) else: self.sectionMgr.thisProxy[root].createSectionDown(sid, pes, None) secProxies.append(proxy.__getsecproxy__((root, sid))) @@ -854,12 +963,18 @@ def startQD(self, callback): fid = callback.fid callback = callback.getTargetProxyEntryMethod() cb_proxy = callback.__self__ - if hasattr(cb_proxy, 'section'): - self.lib.CkStartQD_SectionCallback(cb_proxy.section[1], cb_proxy.section[0], callback.ep) - elif hasattr(cb_proxy, 'gid'): - self.lib.CkStartQD_GroupCallback(cb_proxy.gid, cb_proxy.elemIdx, callback.ep, fid) - elif hasattr(cb_proxy, 'aid'): - self.lib.CkStartQD_ArrayCallback(cb_proxy.aid, cb_proxy.elemIdx, callback.ep, fid) + if hasattr(cb_proxy, "section"): + self.lib.CkStartQD_SectionCallback( + cb_proxy.section[1], cb_proxy.section[0], callback.ep + ) + elif hasattr(cb_proxy, "gid"): + self.lib.CkStartQD_GroupCallback( + cb_proxy.gid, cb_proxy.elemIdx, callback.ep, fid + ) + elif hasattr(cb_proxy, "aid"): + self.lib.CkStartQD_ArrayCallback( + cb_proxy.aid, cb_proxy.elemIdx, callback.ep, fid + ) else: self.lib.CkStartQD_ChareCallback(cb_proxy.cid, callback.ep, fid) @@ -880,10 +995,14 @@ def sleep(self, secs): def awaitCreation(self, *proxies): for proxy in proxies: - if not hasattr(proxy, 'creation_future'): - if not proxy.__class__.__name__.endswith('Proxy'): - raise Charm4PyError('Did not pass a proxy to awaitCreation? ' + str(type(proxy))) - raise Charm4PyError('awaitCreation can only be used if creation triggered from a coroutine entry method') + if not hasattr(proxy, "creation_future"): + if not proxy.__class__.__name__.endswith("Proxy"): + raise Charm4PyError( + "Did not pass a proxy to awaitCreation? " + str(type(proxy)) + ) + raise Charm4PyError( + "awaitCreation can only be used if creation triggered from a coroutine entry method" + ) proxy.creation_future.get() del proxy.creation_future @@ -933,7 +1052,7 @@ def recordSendRecv(self, stats, size): stats[2] = max(size, stats[2]) stats[3] += size stats[4] = size - + # deposit value of one of the futures that was created on this PE def _future_deposit_result(self, fid, result=None): self.threadMgr.depositFuture(fid, result) @@ -941,13 +1060,20 @@ def _future_deposit_result(self, fid, result=None): def __printTable__(self, table, sep): col_width = [max(len(x) for x in col) for col in zip(*table)] for j, line in enumerate(table): - if j in sep: print(sep[j]) - print("| " + " | ".join("{:{}}".format(x, col_width[i]) for i, x in enumerate(line)) + " |") + if j in sep: + print(sep[j]) + print( + "| " + + " | ".join( + "{:{}}".format(x, col_width[i]) for i, x in enumerate(line) + ) + + " |" + ) def printStats(self): - assert self.started, 'charm was not started' + assert self.started, "charm was not started" if not self.options.profiling: - print('NOTE: called charm.printStats() but profiling is disabled') + print("NOTE: called charm.printStats() but profiling is disabled") return em = self.runningEntryMethod @@ -957,29 +1083,41 @@ def printStats(self): em.stopMeasuringTime() em.startMeasuringTime() - print('Timings for PE', self.myPe(), ':') - table = [['', 'em', 'send', 'recv', 'total']] + print("Timings for PE", self.myPe(), ":") + table = [["", "em", "send", "recv", "total"]] lineNb = 1 sep = {} row_totals = [0.0] * 4 - chares_sorted = sorted([(C.__module__, C.__name__, - charm_type.type_id, C, charm_type) - for C, charm_type in self.activeChares]) + chares_sorted = sorted( + [ + (C.__module__, C.__name__, charm_type.type_id, C, charm_type) + for C, charm_type in self.activeChares + ] + ) for _, _, _, C, charm_type in chares_sorted: if C in self.internalChareTypes: totaltime = 0.0 for em in self.classEntryMethods[charm_type.type_id][C]: - if em.name == '__init__': + if em.name == "__init__": continue totaltime += sum(em.times) if totaltime < 0.001: continue - sep[lineNb] = '------ ' + str(C) + ' as ' + charm_type.__name__ + ' ------' + sep[lineNb] = "------ " + str(C) + " as " + charm_type.__name__ + " ------" for em in self.classEntryMethods[charm_type.type_id][C]: - if not hasattr(em, 'times'): + if not hasattr(em, "times"): continue - if C == chare.DefaultMainchare and self.entry_func is not None and em.name == '__init__': - em_name = self.entry_func.__module__ + '.' + self.entry_func.__name__ + ' (main function)' + if ( + C == chare.DefaultMainchare + and self.entry_func is not None + and em.name == "__init__" + ): + em_name = ( + self.entry_func.__module__ + + "." + + self.entry_func.__name__ + + " (main function)" + ) else: em_name = em.name vals = em.times + [sum(em.times)] @@ -987,14 +1125,16 @@ def printStats(self): row_totals[i] += vals[i] table.append([em_name] + [str(round(v, 3)) for v in vals]) lineNb += 1 - sep[lineNb] = '-----------------------------------------------------------' - table.append([''] + [str(round(v, 3)) for v in row_totals]) + sep[lineNb] = "-----------------------------------------------------------" + table.append([""] + [str(round(v, 3)) for v in row_totals]) lineNb += 1 - sep[lineNb] = '-----------------------------------------------------------' + sep[lineNb] = "-----------------------------------------------------------" misc_overheads = [str(round(v, 3)) for v in self.lib.times] - table.append(['reductions', ' ', ' ', misc_overheads[0], misc_overheads[0]]) - table.append(['custom reductions', ' ', ' ', misc_overheads[1], misc_overheads[1]]) - table.append(['migrating out', ' ', ' ', misc_overheads[2], misc_overheads[2]]) + table.append(["reductions", " ", " ", misc_overheads[0], misc_overheads[0]]) + table.append( + ["custom reductions", " ", " ", misc_overheads[1], misc_overheads[1]] + ) + table.append(["migrating out", " ", " ", misc_overheads[2], misc_overheads[2]]) lineNb += 3 triggerCallableTotalTime = sum(self.triggerCallableEM.times) if triggerCallableTotalTime > 0: @@ -1002,12 +1142,12 @@ def printStats(self): for i, v in enumerate(vals): row_totals[i] += v times = [str(round(v, 3)) for v in vals] - table.append(['triggerCallable'] + times) + table.append(["triggerCallable"] + times) lineNb += 1 - sep[lineNb] = '-----------------------------------------------------------' + sep[lineNb] = "-----------------------------------------------------------" row_totals[2] += sum(self.lib.times) row_totals[3] += sum(self.lib.times) - table.append([''] + [str(round(v, 3)) for v in row_totals]) + table.append([""] + [str(round(v, 3)) for v in row_totals]) lineNb += 1 self.__printTable__(table, sep) @@ -1016,45 +1156,62 @@ def printStats(self): num_msgs = self.msg_send_stats[0] min_msgsize, max_msgsize, sum_msgsize = self.msg_send_stats[1:4] avg_msgsize = sum_msgsize / num_msgs - print('\nMessages sent: ' + str(num_msgs)) + print("\nMessages sent: " + str(num_msgs)) else: num_msgs = self.msg_recv_stats[0] min_msgsize, max_msgsize, sum_msgsize = self.msg_recv_stats[1:4] avg_msgsize = sum_msgsize / num_msgs - print('\nMessages received: ' + str(num_msgs)) + print("\nMessages received: " + str(num_msgs)) msgSizeStats = [min_msgsize, avg_msgsize, max_msgsize] msgSizeStats = [round(val, 3) for val in msgSizeStats] - print('Message size in bytes (min / mean / max): ' + ' / '.join([str(v) for v in msgSizeStats])) - print('Total bytes = ' + str(round(sum_msgsize / 1024.0 / 1024.0, 3)) + ' MB') - print('') + print( + "Message size in bytes (min / mean / max): " + + " / ".join([str(v) for v in msgSizeStats]) + ) + print( + "Total bytes = " + str(round(sum_msgsize / 1024.0 / 1024.0, 3)) + " MB" + ) + print("") def lib_version_check(self, commit_id_str): - req_version = tuple([int(n) for n in open(os.path.dirname(__file__) + '/libcharm_version', 'r').read().split('.')]) - version = [int(n) for n in commit_id_str.split('-')[0][1:].split('.')] + req_version = tuple( + [ + int(n) + for n in open(os.path.dirname(__file__) + "/libcharm_version", "r") + .read() + .split(".") + ] + ) + version = [int(n) for n in commit_id_str.split("-")[0][1:].split(".")] try: - version = tuple(version + [int(commit_id_str.split('-')[1])]) + version = tuple(version + [int(commit_id_str.split("-")[1])]) except: version = tuple(version + [0]) if version < req_version: - req_str = '.'.join([str(n) for n in req_version]) - cur_str = '.'.join([str(n) for n in version]) - raise Charm4PyError('Charm++ version >= ' + req_str + ' required. ' + - 'Existing version is ' + cur_str) + req_str = ".".join([str(n) for n in req_version]) + cur_str = ".".join([str(n) for n in version]) + raise Charm4PyError( + "Charm++ version >= " + + req_str + + " required. " + + "Existing version is " + + cur_str + ) def getTopoTreeEdges(self, pe, root_pe, pes=None, bfactor=4): - """ Returns (parent, children) of 'pe' in a tree spanning the given 'pes', - or all PEs if 'pes' is None - If 'pes' is specified, 'root_pe' must be in the first position of 'pes', - and 'pe' must be a member of 'pes' """ + """Returns (parent, children) of 'pe' in a tree spanning the given 'pes', + or all PEs if 'pes' is None + If 'pes' is specified, 'root_pe' must be in the first position of 'pes', + and 'pe' must be a member of 'pes'""" return self.lib.getTopoTreeEdges(pe, root_pe, pes, bfactor) def getTopoSubtrees(self, root_pe, pes, bfactor=4): - """ Returns a list of subtrees of root_pe in a spanning tree containing - all given pes. Subtrees are returned as lists of pes in the - subtree: the first PE in the list is the root of the subtree, but - otherwise the list doesn't specify the structure of the subtree - (the subtree structure can be extracted by recursively calling this - method). """ + """Returns a list of subtrees of root_pe in a spanning tree containing + all given pes. Subtrees are returned as lists of pes in the + subtree: the first PE in the list is the root of the subtree, but + otherwise the list doesn't specify the structure of the subtree + (the subtree structure can be extracted by recursively calling this + method).""" return self.lib.getTopoSubtrees(root_pe, pes, bfactor) def myPe(self): @@ -1099,20 +1256,20 @@ def LBTurnInstrumentOn(self): def LBTurnInstrumentOff(self): self.lib.LBTurnInstrumentOff() - #functions for ccs + # functions for ccs def CcsRegisterHandler(self, handlername, handler): self.ccs_methods[handlername] = handler self.lib.CcsRegisterHandler(handlername, handler) def CcsIsRemoteRequest(self): self.lib.isRemoteRequest() - + def CcsSendReply(self, message): self.lib.CcsSendReply(message) def CcsDelayReply(self): return self.lib.CcsDelayReply() - + def CcsSendDelayedReply(self, d, message): self.lib.CcsSendDelayedReply(d, message) @@ -1120,8 +1277,7 @@ def callHandler(self, handlername, data): if handlername in self.ccs_methods: self.ccs_methods[handlername](data) else: - raise Charm4PyError('Handler ' + handlername + ' not registered') - + raise Charm4PyError("Handler " + handlername + " not registered") class CharmRemote(Chare): @@ -1153,25 +1309,33 @@ def updateGlobals(self, *args): # TODO remove this warning and related code when the new lb framework is merged if charm.myPe() == 0 and charm.lb_requested: - print('WARNING> updateGlobals with load balancing enabled can lead to unexpected behavior ' - 'due to a bug in Charm++ load balancing. This will be fixed in an upcoming release.') + print( + "WARNING> updateGlobals with load balancing enabled can lead to unexpected behavior " + "due to a bug in Charm++ load balancing. This will be fixed in an upcoming release." + ) charm.lb_requested = False sys.modules[module_name].__dict__.update(global_dict) - def createArray(self, cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, cb=None): + def createArray( + self, cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, cb=None + ): proxy = Array(cls, dims, ndims, args, map, useAtSync) if cb is not None: cb(proxy) return proxy - def rexec(self, code, module_name='__main__'): + def rexec(self, code, module_name="__main__"): if charm.options.remote_exec is not True: - raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True') + raise Charm4PyError( + "Remote code execution is disabled. Set charm.options.remote_exec to True" + ) exec(code, sys.modules[module_name].__dict__) - def eval(self, expression, module_name='__main__'): + def eval(self, expression, module_name="__main__"): if charm.options.remote_exec is not True: - raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True') + raise Charm4PyError( + "Remote code execution is disabled. Set charm.options.remote_exec to True" + ) return eval(expression, sys.modules[module_name].__dict__) # deposit value of one of the futures that was created on this PE @@ -1185,9 +1349,11 @@ def notify_future_deletion(self, store_id, depth): # if yes, remove it fut = charm.threadMgr.borrowed_futures[(store_id, depth)] refcount = ctypes.c_long.from_address(id(fut)).value - #print(store_id, "on pe", charm.myPe(), "depth", depth, "ref count =", refcount) - if (fut.parent == None and refcount == 3) or (fut.parent != None and refcount == 2): - #print("Real deletion of", store_id, "from", charm.myPe()) + # print(store_id, "on pe", charm.myPe(), "depth", depth, "ref count =", refcount) + if (fut.parent == None and refcount == 3) or ( + fut.parent != None and refcount == 2 + ): + # print("Real deletion of", store_id, "from", charm.myPe()) if fut.parent == None: charm.threadMgr.futures.pop(fut.fid) charm.threadMgr.borrowed_futures.pop((store_id, depth)) @@ -1197,17 +1363,24 @@ def propagateException(self, error): charm.last_exception_timestamp = time.time() if charm.myPe() == 0: origin, remote_stacktrace = error.remote_stacktrace - print('----------------- Python Stack Traceback from PE', origin, '-----------------\n', remote_stacktrace) - print(type(error).__name__ + ':', error, '(PE ' + str(origin) + ')') + print( + "----------------- Python Stack Traceback from PE", + origin, + "-----------------\n", + remote_stacktrace, + ) + print(type(error).__name__ + ":", error, "(PE " + str(origin) + ")") else: - self.thisProxy[(charm.myPe()-1) // 2].propagateException(error) + self.thisProxy[(charm.myPe() - 1) // 2].propagateException(error) def printStats(self): charm.printStats() def registerNewChareType(self, name, source): if charm.options.remote_exec is not True: - raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True') + raise Charm4PyError( + "Remote code execution is disabled. Set charm.options.remote_exec to True" + ) exec(source, charm.dynamic_register) chare_type = charm.dynamic_register[name] charm.register(chare_type) @@ -1221,24 +1394,24 @@ def registerNewChareTypes(self, classes): def load_charm_library(charm): args = sys.argv - libcharm_path = os.path.join(os.path.dirname(__file__), '.libs') - if os.name == 'nt': - os.environ['PATH'] += ';' + libcharm_path - if '+libcharm_interface' in args: - arg_idx = args.index('+libcharm_interface') + libcharm_path = os.path.join(os.path.dirname(__file__), ".libs") + if os.name == "nt": + os.environ["PATH"] += ";" + libcharm_path + if "+libcharm_interface" in args: + arg_idx = args.index("+libcharm_interface") interface = args.pop(arg_idx + 1) args.pop(arg_idx) - if interface == 'cython': + if interface == "cython": from .charmlib.charmlib_cython import CharmLib else: - raise Charm4PyError('Unrecognized interface ' + interface) + raise Charm4PyError("Unrecognized interface " + interface) else: # pick best available interface import platform + py_impl = platform.python_implementation() from .charmlib.charmlib_cython import CharmLib - - + return CharmLib(charm, charm.options, libcharm_path) @@ -1252,7 +1425,8 @@ def func_with_profiling(*args, **kwargs): else: ret = func(*args, **kwargs) return ret - if hasattr(func, 'ep'): + + if hasattr(func, "ep"): func_with_profiling.ep = func.ep return func_with_profiling @@ -1266,7 +1440,7 @@ def rebuildByteArray(data): def rebuildArray(data, typecode): - #a = array.array('d', data.cast(typecode)) # this is slow + # a = array.array('d', data.cast(typecode)) # this is slow a = array.array(typecode) a.frombytes(data) return a diff --git a/charm4py/entry_method.py b/charm4py/entry_method.py index 7cf9a623..cac140d1 100644 --- a/charm4py/entry_method.py +++ b/charm4py/entry_method.py @@ -16,12 +16,14 @@ def __init__(self, C, name, profile=False): self.running = False method = getattr(C, name) - if hasattr(method, '_ck_coro'): + if hasattr(method, "_ck_coro"): if not profile: self.run = self._run_th else: self.run = self._run_th_prof - self.thread_notify = hasattr(method, '_ck_coro_notify') and method._ck_coro_notify + self.thread_notify = ( + hasattr(method, "_ck_coro_notify") and method._ck_coro_notify + ) else: if not profile: self.run = self._run @@ -29,25 +31,25 @@ def __init__(self, C, name, profile=False): self.run = self._run_prof self.when_cond = None - if hasattr(method, 'when_cond'): + if hasattr(method, "when_cond"): # template object specifying the 'when' condition clause # for this entry method - self.when_cond = getattr(method, 'when_cond') + self.when_cond = getattr(method, "when_cond") if isinstance(self.when_cond, wait.ChareStateMsgCond): self.when_cond_func = self.when_cond.cond_func def _run(self, obj, header, args, ret_fut=False): - """ run entry method of the given object in the current thread """ + """run entry method of the given object in the current thread""" # set last entry method executed (note that 'last_em_exec' won't # necessarily always coincide with the currently running entry method) charm.last_em_exec = self try: - #print(args) + # print(args) if ret_fut: fut = args[-1] args = args[:-1] ret = getattr(obj, self.name)(*args) - if ret_fut and not (ret is None): + if ret_fut and ret is not None: fut.create_object(ret) except SystemExit: exit_code = sys.exc_info()[1].code @@ -60,13 +62,13 @@ def _run(self, obj, header, args, ret_fut=False): except Exception as e: charm.process_em_exc(e, obj, header) return - if b'block' in header: - blockFuture = header[b'block'] - if b'bcast' in header: + if b"block" in header: + blockFuture = header[b"block"] + if b"bcast" in header: sid = None - if b'sid' in header: - sid = header[b'sid'] - if b'bcastret' in header: + if b"sid" in header: + sid = header[b"sid"] + if b"bcastret" in header: charm.contribute(ret, charm.reducers.gather, blockFuture, obj, sid) else: charm.contribute(None, None, blockFuture, obj, sid) @@ -172,9 +174,10 @@ def when(cond_str): def _when(func): method_args = {} for i in range(1, func.__code__.co_argcount): - method_args[func.__code__.co_varnames[i]] = i-1 + method_args[func.__code__.co_varnames[i]] = i - 1 func.when_cond = wait.parse_cond_str(cond_str, func.__module__, method_args) return func + return _when @@ -188,6 +191,7 @@ def _coro(func): func._ck_coro = True func._ck_coro_notify = event_notify return func + return _coro diff --git a/charm4py/interactive.py b/charm4py/interactive.py index 0ddade16..f5421be7 100644 --- a/charm4py/interactive.py +++ b/charm4py/interactive.py @@ -13,7 +13,7 @@ def future_(): f = Future() - charm.dynamic_register['_f'] = f + charm.dynamic_register["_f"] = f return f @@ -22,36 +22,37 @@ class InteractiveConsole(Chare, InteractiveInterpreter): def __init__(self, args): global Charm4PyError from .charm import Charm4PyError + # restore original tty stdin and stdout (else readline won't work correctly) os.dup2(charm.origStdinFd, 0) os.dup2(charm.origStoutFd, 1) - charm.dynamic_register['future'] = future_ - charm.dynamic_register['self'] = self + charm.dynamic_register["future"] = future_ + charm.dynamic_register["self"] = self InteractiveInterpreter.__init__(self, locals=charm.dynamic_register) - self.filename = '' + self.filename = "" self.resetbuffer() # regexp to detect when user defines a new chare type - self.regexpChareDefine = re.compile('class\s*(\S+)\s*\(.*Chare.*\)\s*:') + self.regexpChareDefine = re.compile("class\s*(\S+)\s*\(.*Chare.*\)\s*:") # regexps to detect import statements - self.regexpImport1 = re.compile('\s*from\s*(\S+) import') - self.regexpImport2 = re.compile('import\s*(\S+)') + self.regexpImport1 = re.compile("\s*from\s*(\S+) import") + self.regexpImport2 = re.compile("import\s*(\S+)") self.options = charm.options.interactive try: import readline - import rlcompleter - readline.parse_and_bind('tab: complete') + + readline.parse_and_bind("tab: complete") except: pass try: sys.ps1 except AttributeError: - sys.ps1 = '>>> ' + sys.ps1 = ">>> " try: sys.ps2 except AttributeError: - sys.ps2 = '... ' + sys.ps2 = "... " self.thisProxy.start() def resetbuffer(self): @@ -69,8 +70,10 @@ def write(self, data, sched=True): @coro def start(self): - self.write('\nCharm4py interactive shell (beta)\n') - self.write('charm.options.interactive.verbose = ' + str(self.options.verbose) + '\n') + self.write("\nCharm4py interactive shell (beta)\n") + self.write( + "charm.options.interactive.verbose = " + str(self.options.verbose) + "\n" + ) charm.scheduleCallableAfter(self.thisProxy.hang_check_phase1, HANG_CHECK_FREQ) self.monitorFutures = [] @@ -89,18 +92,18 @@ def start(self): line = self.raw_input(prompt) tick = time.time() except EOFError: - self.write('\n') + self.write("\n") break else: more = self.push(line) except KeyboardInterrupt: - self.write('\nKeyboardInterrupt\n') + self.write("\nKeyboardInterrupt\n") self.resetbuffer() more = 0 def push(self, line): self.buffer.append(line) - source = '\n'.join(self.buffer) + source = "\n".join(self.buffer) more = self.runsource(source, self.filename) if not more: self.resetbuffer() @@ -113,15 +116,17 @@ def runcode(self, code): m = self.regexpChareDefine.search(line) if m is not None: newChareTypeName = m.group(1) - source = '\n'.join(self.buffer) - charm.thisProxy.registerNewChareType(newChareTypeName, source, awaitable=True).get() + source = "\n".join(self.buffer) + charm.thisProxy.registerNewChareType( + newChareTypeName, source, awaitable=True + ).get() if self.options.verbose > 0: - self.write('Charm4py> Broadcasted Chare definition\n') + self.write("Charm4py> Broadcasted Chare definition\n") return line = self.buffer[0] module_name = None - if 'import' in line: + if "import" in line: m = self.regexpImport1.search(line) if m is not None: module_name = m.group(1) @@ -135,34 +140,50 @@ def runcode(self, code): if module_name not in sys.modules: # error importing the module return if self.options.broadcast_imports: - charm.thisProxy.rexec('\n'.join(self.buffer), awaitable=True).get() + charm.thisProxy.rexec("\n".join(self.buffer), awaitable=True).get() if self.options.verbose > 0: - self.write('Charm4py> Broadcasted import statement\n') + self.write("Charm4py> Broadcasted import statement\n") new_modules = set(sys.modules.keys()) - prev_modules chare_types = [] for module_name in new_modules: try: - members = inspect.getmembers(sys.modules[module_name], inspect.isclass) + members = inspect.getmembers( + sys.modules[module_name], inspect.isclass + ) except: # some modules can throw exceptions with inspect.getmembers, ignoring them for now continue for C_name, C in members: - if C.__module__ != chare.__name__ and hasattr(C, 'mro'): + if C.__module__ != chare.__name__ and hasattr(C, "mro"): if chare.ArrayMap in C.mro(): chare_types.append(C) elif Chare in C.mro(): chare_types.append(C) - elif chare.Group in C.mro() or chare.Array in C.mro() or chare.Mainchare in C.mro(): - raise Charm4PyError('Chares must not inherit from Group, Array or' - ' Mainchare. Refer to new API') + elif ( + chare.Group in C.mro() + or chare.Array in C.mro() + or chare.Mainchare in C.mro() + ): + raise Charm4PyError( + "Chares must not inherit from Group, Array or" + " Mainchare. Refer to new API" + ) if len(chare_types) > 0: if self.options.broadcast_imports: - charm.thisProxy.registerNewChareTypes(chare_types, awaitable=True).get() + charm.thisProxy.registerNewChareTypes( + chare_types, awaitable=True + ).get() if self.options.verbose > 0: - self.write('Broadcasted the following chare definitions: ' + str([str(C) for C in chare_types]) + '\n') + self.write( + "Broadcasted the following chare definitions: " + + str([str(C) for C in chare_types]) + + "\n" + ) else: - self.write('Charm4py> ERROR: import module(s) contain Chare definitions but the import was not broadcasted\n') + self.write( + "Charm4py> ERROR: import module(s) contain Chare definitions but the import was not broadcasted\n" + ) return except: self.showtraceback() @@ -171,14 +192,18 @@ def runcode(self, code): InteractiveInterpreter.runcode(self, code) self.interactive_running = False - def raw_input(self, prompt=''): + def raw_input(self, prompt=""): return input(prompt) def hang_check_phase1(self): self.monitorFutures = [f for f in self.monitorFutures if f.blocked] if self.interactive_running: for f in charm.threadMgr.futures.values(): - if f.blocked and not hasattr(f, 'ignorehang') and not hasattr(f, 'timestamp'): + if ( + f.blocked + and not hasattr(f, "ignorehang") + and not hasattr(f, "timestamp") + ): f.timestamp = time.time() self.monitorFutures.append(f) for f in self.monitorFutures: @@ -193,19 +218,27 @@ def hang_check_phase2(self): charm.scheduleCallableAfter(self.thisProxy.hang_check_phase1, HANG_CHECK_FREQ) for f in monitor_futures: if f.blocked: - self.write('\nError: system is idle, canceling block on future\n', sched=False) + self.write( + "\nError: system is idle, canceling block on future\n", sched=False + ) charm.threadMgr.cancelFuture(f) def showtraceback(self): error_type, error, tb = sys.exc_info() - if hasattr(error, 'remote_stacktrace'): + if hasattr(error, "remote_stacktrace"): origin, stacktrace = error.remote_stacktrace - self.write('----------------- Python Stack Traceback from PE ' + str(origin) + ' -----------------\n') - self.write(stacktrace + '\n') - self.write(error_type.__name__ + ': ' + str(error) + ' (PE ' + str(origin) + ')\n') + self.write( + "----------------- Python Stack Traceback from PE " + + str(origin) + + " -----------------\n" + ) + self.write(stacktrace + "\n") + self.write( + error_type.__name__ + ": " + str(error) + " (PE " + str(origin) + ")\n" + ) else: super(InteractiveConsole, self).showtraceback() -if __name__ == '__main__': +if __name__ == "__main__": charm.start(interactive=True) diff --git a/charm4py/liveviz.py b/charm4py/liveviz.py index 0ebe7c19..b675edc8 100644 --- a/charm4py/liveviz.py +++ b/charm4py/liveviz.py @@ -3,228 +3,272 @@ from collections import deque import struct from itertools import chain + Reducer = charm.reducers group = None + def viz_gather(contribs): return list(chain(*contribs)) + def viz_gather_preprocess(data, contributor): return [data] + Reducer.addReducer(viz_gather, pre=viz_gather_preprocess) + @dataclass class Config: - version: int = 1 - isColor: bool = True - isPush: bool = True - is3d: bool = False - min: tuple = field(default_factory=lambda: (0.0, 0.0, 0.0)) - max: tuple = field(default_factory=lambda: (1.0, 1.0, 1.0)) - - def to_binary(self): - # Format: int, int, int, int, [double, double, double, double, double, double] - binary_data = struct.pack(">iiii", - self.version, - 1 if self.isColor else 0, - 1 if self.isPush else 0, - 1 if self.is3d else 0) - if self.is3d: - binary_data += struct.pack(">dddddd", - self.min[0], self.min[1], self.min[2], - self.max[0], self.max[1], self.max[2]) - return binary_data - + version: int = 1 + isColor: bool = True + isPush: bool = True + is3d: bool = False + min: tuple = field(default_factory=lambda: (0.0, 0.0, 0.0)) + max: tuple = field(default_factory=lambda: (1.0, 1.0, 1.0)) + + def to_binary(self): + # Format: int, int, int, int, [double, double, double, double, double, double] + binary_data = struct.pack( + ">iiii", + self.version, + 1 if self.isColor else 0, + 1 if self.isPush else 0, + 1 if self.is3d else 0, + ) + if self.is3d: + binary_data += struct.pack( + ">dddddd", + self.min[0], + self.min[1], + self.min[2], + self.max[0], + self.max[1], + self.max[2], + ) + return binary_data + + class Vector3d: - def __init__(self, x=0.0, y=0.0, z=0.0): - self.x = x - self.y = y - self.z = z - - @classmethod - def from_bytes(cls, data, offset=0): - # Read 3 doubles from the data starting at offset - x, y, z = struct.unpack_from(">ddd", data, offset) - return cls(x, y, z), offset + 24 # 24 = 3 * 8 bytes (double) - + def __init__(self, x=0.0, y=0.0, z=0.0): + self.x = x + self.y = y + self.z = z + + @classmethod + def from_bytes(cls, data, offset=0): + # Read 3 doubles from the data starting at offset + x, y, z = struct.unpack_from(">ddd", data, offset) + return cls(x, y, z), offset + 24 # 24 = 3 * 8 bytes (double) + + class ImageRequest: - def __init__(self, version, request_type, width, height, - x=None, y=None, z=None, o=None, minZ=0.0, maxZ=0.0): - self.version = version - self.request_type = request_type - self.width = width - self.height = height - self.x = x - self.y = y - self.z = z - self.o = o - self.minZ = minZ - self.maxZ = maxZ - - @classmethod - def from_bytes(cls, data): - if len(data) < 16: # At least 4 ints - raise ValueError("Not enough data to decode ImageRequest") - - version, request_type, width, height = struct.unpack_from(">iiii", data, 0) - - # If there's more data, we have the optional fields - if len(data) > 16: - offset = 16 - x, offset = Vector3d.from_bytes(data, offset) - y, offset = Vector3d.from_bytes(data, offset) - z, offset = Vector3d.from_bytes(data, offset) - o, offset = Vector3d.from_bytes(data, offset) - minZ, maxZ = struct.unpack_from(">dd", data, offset) - - return cls(version, request_type, width, height, x, y, z, o, minZ, maxZ) - else: - return cls(version, request_type, width, height) - + def __init__( + self, + version, + request_type, + width, + height, + x=None, + y=None, + z=None, + o=None, + minZ=0.0, + maxZ=0.0, + ): + self.version = version + self.request_type = request_type + self.width = width + self.height = height + self.x = x + self.y = y + self.z = z + self.o = o + self.minZ = minZ + self.maxZ = maxZ + + @classmethod + def from_bytes(cls, data): + if len(data) < 16: # At least 4 ints + raise ValueError("Not enough data to decode ImageRequest") + + version, request_type, width, height = struct.unpack_from(">iiii", data, 0) + + # If there's more data, we have the optional fields + if len(data) > 16: + offset = 16 + x, offset = Vector3d.from_bytes(data, offset) + y, offset = Vector3d.from_bytes(data, offset) + z, offset = Vector3d.from_bytes(data, offset) + o, offset = Vector3d.from_bytes(data, offset) + minZ, maxZ = struct.unpack_from(">dd", data, offset) + + return cls(version, request_type, width, height, x, y, z, o, minZ, maxZ) + else: + return cls(version, request_type, width, height) + + @register class LiveVizGroup(Chare): - - def __init__(self, cb, poll): - self.callback = cb - self.poll = poll - charm.CcsRegisterHandler("lvImage", self.image_handler) - if poll: - self.requests = deque() - self.images = deque() - - def send(self, result): - image = ByteImage.from_contributions(result, LiveViz.cfg.isColor) - if self.poll: - if len(self.requests) > 0: - req, delayed = self.requests.popleft() - output = ByteImage.with_image_in_corner(image, req.width, req.height) - charm.CcsSendDelayedReply(delayed, output.to_binary()) - else: - print("sent") - self.images.append(image) - else: - output = ByteImage.with_image_in_corner(image, self.wid, self.ht) - charm.CcsSendDelayedReply(self.reply, output.to_binary()) - - def image_handler(self, msg): - request = ImageRequest.from_bytes(msg) - if self.poll: - if len(self.images) > 0: - output = ByteImage.with_image_in_corner(self.images.popleft(), request.width, request.height) - charm.CcsSendReply(output.to_binary()) - else: - self.requests.append((request, charm.CcsDelayReply())) - else: - self.ht = request.height - self.wid = request.width - self.callback(request) - self.reply = charm.CcsDelayReply() - + + def __init__(self, cb, poll): + self.callback = cb + self.poll = poll + charm.CcsRegisterHandler("lvImage", self.image_handler) + if poll: + self.requests = deque() + self.images = deque() + + def send(self, result): + image = ByteImage.from_contributions(result, LiveViz.cfg.isColor) + if self.poll: + if len(self.requests) > 0: + req, delayed = self.requests.popleft() + output = ByteImage.with_image_in_corner(image, req.width, req.height) + charm.CcsSendDelayedReply(delayed, output.to_binary()) + else: + print("sent") + self.images.append(image) + else: + output = ByteImage.with_image_in_corner(image, self.wid, self.ht) + charm.CcsSendDelayedReply(self.reply, output.to_binary()) + + def image_handler(self, msg): + request = ImageRequest.from_bytes(msg) + if self.poll: + if len(self.images) > 0: + output = ByteImage.with_image_in_corner( + self.images.popleft(), request.width, request.height + ) + charm.CcsSendReply(output.to_binary()) + else: + self.requests.append((request, charm.CcsDelayReply())) + else: + self.ht = request.height + self.wid = request.width + self.callback(request) + self.reply = charm.CcsDelayReply() + + class ByteImage: - def __init__(self, data=None, width=0, height=0, is_color=True): - """ - Initialize a byte image - - Args: - data (bytes, optional): Raw image data as bytes, or None to create empty image - width (int): Image width in pixels - height (int): Image height in pixels - is_color (bool): Whether the image is in color (True) or grayscale (False) - """ - self.width = width - self.height = height - self.is_color = is_color - self.bytes_per_pixel = 3 if is_color else 1 - - if data is not None: - self.data = data - else: - self.data = bytes(width * height * self.bytes_per_pixel) - - @classmethod - def from_contributions(cls, contribs, is_color=True): - """ - Create a ByteImage from multiple contributions, positioning each - contribution at the right location. - - Args: - contribs (list): List of tuples with format - (bytes_data, startx, starty, local_height, local_width, total_height, total_width) - is_color (bool): Whether the image is in color - - Returns: - ByteImage: A composite image with all contributions in the right positions - """ - _, _, _, _, _, total_height, total_width = contribs[0] - bytes_per_pixel = 3 if is_color else 1 - - buffer = bytearray(total_width * total_height * bytes_per_pixel) - - for data, startx, starty, local_height, local_width, _, _ in contribs: - for y in range(local_height): - for x in range(local_width): - src_pos = (y * local_width + x) * bytes_per_pixel - dst_pos = ((starty + y) * total_width + (startx + x)) * bytes_per_pixel - - if src_pos + bytes_per_pixel <= len(data): - buffer[dst_pos:dst_pos + bytes_per_pixel] = (buffer[dst_pos:dst_pos + bytes_per_pixel] + data[src_pos:src_pos + bytes_per_pixel]) % 256 - - return cls(bytes(buffer), total_width, total_height, is_color) - - def to_binary(self): - return self.data - - @classmethod - def with_image_in_corner(cls, src_image, new_width, new_height): - """ - Create a new image with specified dimensions and place the source image - in the top left corner. - - Args: - src_image (ByteImage): Source image to place in the corner - new_width (int): Width of the new image - new_height (int): Height of the new image - - Returns: - ByteImage: A new image with the source image in the top left corner - """ - dest_image = cls(None, new_width, new_height, src_image.is_color) - bytes_per_pixel = dest_image.bytes_per_pixel - - buffer = bytearray(new_width * new_height * bytes_per_pixel) - - # Calculate dimensions to copy - copy_width = min(new_width, src_image.width) - copy_height = min(new_height, src_image.height) - - for y in range(copy_height): - for x in range(copy_width): - src_pos = (y * src_image.width + x) * bytes_per_pixel - - dst_pos = (y * new_width + x) * bytes_per_pixel - - if src_pos + bytes_per_pixel <= len(src_image.data): - buffer[dst_pos:dst_pos + bytes_per_pixel] = src_image.data[src_pos:src_pos + bytes_per_pixel] - - return cls(bytes(buffer), new_width, new_height, src_image.is_color) + def __init__(self, data=None, width=0, height=0, is_color=True): + """ + Initialize a byte image + + Args: + data (bytes, optional): Raw image data as bytes, or None to create empty image + width (int): Image width in pixels + height (int): Image height in pixels + is_color (bool): Whether the image is in color (True) or grayscale (False) + """ + self.width = width + self.height = height + self.is_color = is_color + self.bytes_per_pixel = 3 if is_color else 1 + + if data is not None: + self.data = data + else: + self.data = bytes(width * height * self.bytes_per_pixel) + + @classmethod + def from_contributions(cls, contribs, is_color=True): + """ + Create a ByteImage from multiple contributions, positioning each + contribution at the right location. + + Args: + contribs (list): List of tuples with format + (bytes_data, startx, starty, local_height, local_width, total_height, total_width) + is_color (bool): Whether the image is in color + + Returns: + ByteImage: A composite image with all contributions in the right positions + """ + _, _, _, _, _, total_height, total_width = contribs[0] + bytes_per_pixel = 3 if is_color else 1 + + buffer = bytearray(total_width * total_height * bytes_per_pixel) + + for data, startx, starty, local_height, local_width, _, _ in contribs: + for y in range(local_height): + for x in range(local_width): + src_pos = (y * local_width + x) * bytes_per_pixel + dst_pos = ( + (starty + y) * total_width + (startx + x) + ) * bytes_per_pixel + + if src_pos + bytes_per_pixel <= len(data): + buffer[dst_pos : dst_pos + bytes_per_pixel] = ( + buffer[dst_pos : dst_pos + bytes_per_pixel] + + data[src_pos : src_pos + bytes_per_pixel] + ) % 256 + + return cls(bytes(buffer), total_width, total_height, is_color) + + def to_binary(self): + return self.data + + @classmethod + def with_image_in_corner(cls, src_image, new_width, new_height): + """ + Create a new image with specified dimensions and place the source image + in the top left corner. + + Args: + src_image (ByteImage): Source image to place in the corner + new_width (int): Width of the new image + new_height (int): Height of the new image + + Returns: + ByteImage: A new image with the source image in the top left corner + """ + dest_image = cls(None, new_width, new_height, src_image.is_color) + bytes_per_pixel = dest_image.bytes_per_pixel + + buffer = bytearray(new_width * new_height * bytes_per_pixel) + + # Calculate dimensions to copy + copy_width = min(new_width, src_image.width) + copy_height = min(new_height, src_image.height) + + for y in range(copy_height): + for x in range(copy_width): + src_pos = (y * src_image.width + x) * bytes_per_pixel + + dst_pos = (y * new_width + x) * bytes_per_pixel + + if src_pos + bytes_per_pixel <= len(src_image.data): + buffer[dst_pos : dst_pos + bytes_per_pixel] = src_image.data[ + src_pos : src_pos + bytes_per_pixel + ] + + return cls(bytes(buffer), new_width, new_height, src_image.is_color) + class LiveViz: - cfg = None - - @classmethod - def config_handler(cls, msg): - charm.CcsSendReply(cls.cfg.to_binary()) - - @classmethod - def deposit(cls, buffer, elem, x, y, ht, wid, g_ht, g_wid): - elem.reduce(group.send, data=(buffer,x,y,ht,wid,g_ht,g_wid), reducer=Reducer.viz_gather) - - @classmethod - def init(cls, cfg, cb, poll=False): - global group - cls.cfg = cfg - grp = Chare(LiveVizGroup, args=[cb, poll], onPE=0) - charm.thisProxy.updateGlobals({'group': grp}, awaitable=True, module_name='charm4py.liveviz').get() - charm.CcsRegisterHandler("lvConfig", cls.config_handler) + cfg = None + + @classmethod + def config_handler(cls, msg): + charm.CcsSendReply(cls.cfg.to_binary()) + + @classmethod + def deposit(cls, buffer, elem, x, y, ht, wid, g_ht, g_wid): + elem.reduce( + group.send, + data=(buffer, x, y, ht, wid, g_ht, g_wid), + reducer=Reducer.viz_gather, + ) + + @classmethod + def init(cls, cfg, cb, poll=False): + global group + cls.cfg = cfg + grp = Chare(LiveVizGroup, args=[cb, poll], onPE=0) + charm.thisProxy.updateGlobals( + {"group": grp}, awaitable=True, module_name="charm4py.liveviz" + ).get() + charm.CcsRegisterHandler("lvConfig", cls.config_handler) diff --git a/charm4py/object_store.py b/charm4py/object_store.py index 4462e7ef..78fc33bc 100644 --- a/charm4py/object_store.py +++ b/charm4py/object_store.py @@ -1,6 +1,11 @@ -from charm4py import charm, Chare, Group, Array, Future, coro, Channel, Reducer, register +from charm4py import ( + charm, + Chare, + register, +) from charm4py.c_object_store import CObjectStore + @register class ObjectStore(Chare): def __init__(self): @@ -13,27 +18,25 @@ def delete_remote_objects(self, obj_id): self._object_store.delete_remote_objects(obj_id) def delete_object(self, obj_id): - """Delete this object from the local object store - """ + """Delete this object from the local object store""" self._object_store.delete_object(obj_id) def lookup_object(self, obj_id): - """ Lookup object in local object map - """ + """Lookup object in local object map""" return self._object_store.lookup_object(obj_id) - + def lookup_location(self, obj_id): - """ Lookup location in local location map + """Lookup location in local location map If not found in local map, send a message to home PE to get the location back on this PE """ return self._object_store.lookup_location(obj_id) - + def update_location(self, obj_id, pe): - """ Update location in local map + """Update location in local map Check buffers for location requests and object requests Also check send buffer to see if any message is buffered to send. This is - currently not implemented, currently messages are only buffered at the + currently not implemented, currently messages are only buffered at the receiving PE """ self._object_store.update_location(obj_id, pe) @@ -41,9 +44,9 @@ def update_location(self, obj_id, pe): def insert_object_small(self, obj_id, obj): self._object_store.insert_object_small(obj_id, obj) - + def receive_remote_object(self, obj_id, obj): - """ Add object to local object map + """Add object to local object map Then check receive buffer to see if any messages are buffered on the receiving end on this PE """ @@ -52,19 +55,19 @@ def receive_remote_object(self, obj_id, obj): charm.check_futures_buffer(obj_id) def request_object(self, obj_id, requesting_pe): - """ If obj_id is found in the local object map, then send it back to the + """If obj_id is found in the local object map, then send it back to the requesting PE. Else buffer the request """ self._object_store.request_object(obj_id, requesting_pe) def request_location(self, obj_id, requesting_pe): - """ If location for obj_id is in the local map, then send the location back to the + """If location for obj_id is in the local map, then send the location back to the requesting PE. Else buffer the request """ self._object_store.request_location(obj_id, requesting_pe) def request_location_object(self, obj_id, requesting_pe): - """ If location for obj_id is in the local map, send a request_location call to + """If location for obj_id is in the local map, send a request_location call to the location of obj_id and add the requesting PE to the local location map. Else buffer the request """ @@ -77,7 +80,7 @@ def bulk_send_location(self, obj_id, requesting_pes): self._object_store.bulk_send_location(obj_id, requesting_pes) def create_object(self, obj_id, obj): - """ Add the object to the local object map and send an update_location + """Add the object to the local object map and send an update_location call to the home PE of obj_id """ self._object_store.create_object(obj_id, obj) diff --git a/charm4py/pool.py b/charm4py/pool.py index 3ec0e025..0eb404eb 100644 --- a/charm4py/pool.py +++ b/charm4py/pool.py @@ -1,4 +1,4 @@ -from . import charm, Chare, Group, Array, coro_ext, threads, Future, register, ray +from . import charm, Chare, Array, coro_ext, threads, Future, register from .charm import Charm4PyError from .threads import NotThreadedError from collections import defaultdict @@ -34,7 +34,9 @@ def __init__(self, id, func, tasks, result, ncores, chunksize, is_ray=False): self.id = id self.max_cores = ncores self.n_avail = ncores - self.func = func # if func is not None, function is the same for all tasks in the job + self.func = ( + func # if func is not None, function is the same for all tasks in the job + ) self.workers = [] # ID of workers who have executed tasks from this job self.chunked = chunksize > 1 self.threaded = False @@ -43,22 +45,28 @@ def __init__(self, id, func, tasks, result, ncores, chunksize, is_ray=False): self.is_ray = is_ray assert chunksize > 0 if func is not None: - self.threaded = hasattr(func, '_ck_coro') + self.threaded = hasattr(func, "_ck_coro") else: # this is not efficient, especially considering that we iterate over # the tasks again below. This case is only needed for submit(). Might # just want to consider removing submit() to simplify code? for func_, args in tasks: - if hasattr(func_, '_ck_coro'): + if hasattr(func_, "_ck_coro"): self.threaded = True break if self.chunked: if result is None or isinstance(result, threads.Future): self.results = [None] * len(tasks) self.future = result - self.tasks = [Chunk(tasks[i:i+chunksize], i) for i in range(0, len(tasks), chunksize)] + self.tasks = [ + Chunk(tasks[i : i + chunksize], i) + for i in range(0, len(tasks), chunksize) + ] else: - self.tasks = [Chunk(tasks[i:i+chunksize], result[i:i+chunksize]) for i in range(0, len(tasks), chunksize)] + self.tasks = [ + Chunk(tasks[i : i + chunksize], result[i : i + chunksize]) + for i in range(0, len(tasks), chunksize) + ] else: if result is None or isinstance(result, threads.Future): self.results = [None] * len(tasks) @@ -66,12 +74,17 @@ def __init__(self, id, func, tasks, result, ncores, chunksize, is_ray=False): if func is not None: self.tasks = [Task(args, i) for i, args in enumerate(tasks)] else: - self.tasks = [Task(args, i, func) for i, (func, args) in enumerate(tasks)] + self.tasks = [ + Task(args, i, func) for i, (func, args) in enumerate(tasks) + ] else: if func is not None: self.tasks = [Task(args, result[i]) for i, args in enumerate(tasks)] else: - self.tasks = [Task(args, result[i], func) for i, (func, args) in enumerate(tasks)] + self.tasks = [ + Task(args, result[i], func) + for i, (func, args) in enumerate(tasks) + ] # print('Created job with', len(self.tasks), 'tasks') self.tasks_pending = len(self.tasks) @@ -102,11 +115,15 @@ def __init__(self): def __start__(self, func, tasks, result): if self.workers is None: - assert self.num_workers > 0, 'Run with more than 1 PE to use charm.pool' + assert self.num_workers > 0, "Run with more than 1 PE to use charm.pool" # first time running a job, create Group of workers - print('Initializing charm.pool with', self.num_workers, 'worker PEs. ' - 'Warning: charm.pool is experimental (API and performance ' - 'is subject to change)') + print( + "Initializing charm.pool with", + self.num_workers, + "worker PEs. " + "Warning: charm.pool is experimental (API and performance " + "is subject to change)", + ) self.workers = Array(Worker, charm.numPes(), args=[self.thisProxy]) if len(self.job_id_pool) == 0: @@ -118,10 +135,14 @@ def __start__(self, func, tasks, result): if charm.interactive: try: if func is not None: - self.workers.check(func.__module__, func.__name__, awaitable=True).get() + self.workers.check( + func.__module__, func.__name__, awaitable=True + ).get() else: for func_, args in tasks: - self.workers.check(func_.__module__, func_.__name__, awaitable=True).get() + self.workers.check( + func_.__module__, func_.__name__, awaitable=True + ).get() except Exception as e: if result is None: raise e @@ -153,13 +174,23 @@ def start(self, func, tasks, result, ncores, chunksize, is_ray=False): if ncores < 0: ncores = self.num_workers elif ncores > self.num_workers: - print('charm.pool Warning: requested more cores than are ' - 'available. Using max available cores') + print( + "charm.pool Warning: requested more cores than are " + "available. Using max available cores" + ) ncores = self.num_workers self.__start__(func, tasks, result) - job = Job(self.job_id_pool.pop(), func, tasks, result, ncores, chunksize, is_ray=is_ray) + job = Job( + self.job_id_pool.pop(), + func, + tasks, + result, + ncores, + chunksize, + is_ray=is_ray, + ) self.__addJob__(job) if job.chunked: @@ -215,11 +246,23 @@ def schedule(self): self.workers.elemIdx = (worker_id,) else: self.workers.elemIdx = worker_id - + if isinstance(task.data, tuple): - job.remote(func, [task.result_dest], job.id, *task.data, is_ray=job.is_ray) + job.remote( + func, + [task.result_dest], + job.id, + *task.data, + is_ray=job.is_ray, + ) else: - job.remote(func, [task.result_dest], job.id, task.data, is_ray=job.is_ray) + job.remote( + func, + [task.result_dest], + job.id, + task.data, + is_ray=job.is_ray, + ) if len(job.tasks) == 0: prev.job_next = job.job_next @@ -238,7 +281,7 @@ def schedule(self): job = prev.job_next def taskFinished(self, worker_id, job_id, result=None): - #print('Job finished') + # print('Job finished') job = self.jobs[job_id] if job.failed: return self.taskError(worker_id, job_id, job.exception) @@ -246,7 +289,7 @@ def taskFinished(self, worker_id, job_id, result=None): if job.chunked: i, results = result n = len(results) - job.results[i:i+n] = results + job.results[i : i + n] = results else: i, _result = result job.results[i] = _result @@ -272,7 +315,7 @@ def threadResumed(self, worker_id): self.idle_workers.discard(worker_id) def migrated(self): - charm.abort('Someone migrated PoolScheduler which is non-migratable') + charm.abort("Someone migrated PoolScheduler which is non-migratable") def taskError(self, worker_id, job_id, exception): job = self.jobs[job_id] @@ -281,7 +324,7 @@ def taskError(self, worker_id, job_id, exception): # marking as failed will allow the scheduler to delete it from the linked list # NOTE that we will only delete from the 'jobs' list once all the pending tasks are done job.failed = True - if not hasattr(job, 'future'): + if not hasattr(job, "future"): if job.chunked: for chunk in job.tasks: for f in chunk.result_dest: @@ -296,7 +339,7 @@ def taskError(self, worker_id, job_id, exception): self.job_id_pool.add(job_id) for worker_id in job.workers: self.worker_knows[worker_id].remove(job.id) - if hasattr(job, 'future'): + if hasattr(job, "future"): if job.future is not None: job.future.send(job.exception) else: @@ -309,7 +352,9 @@ class Worker(Chare): def __init__(self, scheduler): self.scheduler = scheduler - assert len(self.scheduler.elemIdx) > 0 # make sure points to the element, not collection + assert ( + len(self.scheduler.elemIdx) > 0 + ) # make sure points to the element, not collection self.__addThreadEventSubscriber__(scheduler, self.thisIndex) # TODO: when to purge entries from this dict? self.funcs = {} # job ID -> function used by this job ID @@ -334,14 +379,20 @@ def runTask(self, func, result_destination, job_id, *args): try: result = func(*args) if isinstance(result_destination, int): - self.scheduler.taskFinished(self.thisIndex, job_id, (result_destination, result)) + self.scheduler.taskFinished( + self.thisIndex, job_id, (result_destination, result) + ) else: # assume result_destination is a future result_destination.send(result) self.scheduler.taskFinished(self.thisIndex, job_id) except Exception as e: if isinstance(e, NotThreadedError): - e = Charm4PyError('Function ' + str(func) + ' must be decorated with @coro to be able to suspend') + e = Charm4PyError( + "Function " + + str(func) + + " must be decorated with @coro to be able to suspend" + ) charm.prepareExceptionForSend(e) self.scheduler.taskError(self.thisIndex, job_id, e) if not isinstance(result_destination, int): @@ -356,14 +407,20 @@ def runTask_star(self, func, result_destination, job_id, *args): try: result = func(*args) if isinstance(result_destination, int): - self.scheduler.taskFinished(self.thisIndex, job_id, (result_destination, result)) + self.scheduler.taskFinished( + self.thisIndex, job_id, (result_destination, result) + ) else: # assume result_destination is a future result_destination.send(result) self.scheduler.taskFinished(self.thisIndex, job_id) except Exception as e: if isinstance(e, NotThreadedError): - e = Charm4PyError('Function ' + str(func) + ' must be decorated with @coro to be able to suspend') + e = Charm4PyError( + "Function " + + str(func) + + " must be decorated with @coro to be able to suspend" + ) charm.prepareExceptionForSend(e) self.scheduler.taskError(self.thisIndex, job_id, e) if not isinstance(result_destination, int): @@ -404,7 +461,9 @@ def runChunk(self, func, result_destination, job_id, *chunk): def send_chunk_results(self, results, result_destination, job_id): if isinstance(result_destination, int): - self.scheduler.taskFinished(self.thisIndex, job_id, (result_destination, results)) + self.scheduler.taskFinished( + self.thisIndex, job_id, (result_destination, results) + ) else: # assume result_destination is a list of futures # TODO: should send all results together to PE where future was created, @@ -415,7 +474,7 @@ def send_chunk_results(self, results, result_destination, job_id): def send_chunk_exc(self, e, result_destination, job_id): if isinstance(e, NotThreadedError): - e = Charm4PyError('Function not decorated with @coro tried to suspend') + e = Charm4PyError("Function not decorated with @coro tried to suspend") charm.prepareExceptionForSend(e) self.scheduler.taskError(self.thisIndex, job_id, e) if not isinstance(result_destination, int): @@ -424,7 +483,9 @@ def send_chunk_exc(self, e, result_destination, job_id): def check(self, func_module, func_name): if charm.options.remote_exec is not True: - raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True') + raise Charm4PyError( + "Remote code execution is disabled. Set charm.options.remote_exec to True" + ) eval(func_name, sys.modules[func_module].__dict__) @@ -457,10 +518,14 @@ def Task(self, func, args, ret=False, awaitable=False): def map(self, func, iterable, chunksize=1, ncores=-1, is_ray=False): result = Future(store=is_ray) # TODO shouldn't send task objects to a central place. what if they are large? - self.pool_scheduler.start(func, iterable, result, ncores, chunksize, is_ray=is_ray) + self.pool_scheduler.start( + func, iterable, result, ncores, chunksize, is_ray=is_ray + ) return result.get() - def map_async(self, func, iterable, chunksize=1, ncores=-1, multi_future=False, is_ray=False): + def map_async( + self, func, iterable, chunksize=1, ncores=-1, multi_future=False, is_ray=False + ): if self.mype == 0: # see deepcopy comment above (only need this for async case since # the sync case won't return until all the tasks have finished) @@ -469,7 +534,9 @@ def map_async(self, func, iterable, chunksize=1, ncores=-1, multi_future=False, result = [Future(store=is_ray) for _ in range(len(iterable))] else: result = Future(store=is_ray) - self.pool_scheduler.start(func, iterable, result, ncores, chunksize, is_ray=is_ray) + self.pool_scheduler.start( + func, iterable, result, ncores, chunksize, is_ray=is_ray + ) return result # iterable is a sequence of (function, args) tuples diff --git a/charm4py/ray/api.py b/charm4py/ray/api.py index c49f6250..6f39840b 100644 --- a/charm4py/ray/api.py +++ b/charm4py/ray/api.py @@ -1,20 +1,27 @@ import types -from copy import deepcopy counter = 0 + def init(): from charm4py import charm, Group, ObjectStore + global object_store object_store = Group(ObjectStore) - charm.thisProxy.updateGlobals({'object_store' : object_store,}, - awaitable=True, module_name='charm4py.ray.api').get() + charm.thisProxy.updateGlobals( + { + "object_store": object_store, + }, + awaitable=True, + module_name="charm4py.ray.api", + ).get() def get_object_store(): global object_store return object_store + class RayProxyFunction(object): def __init__(self, func): self.func = func @@ -28,21 +35,25 @@ def remote(self, *args, **kwargs): class RayProxy(object): def __init__(self, subclass, args, pe): - from charm4py import Chare, register, charm + from charm4py import Chare + self.proxy = Chare(subclass, args=args, onPE=pe) for f in dir(self.proxy): - if not f.startswith('__'): + if not f.startswith("__"): setattr(self, f, RayProxyFunction(self.remote_function(f))) def remote_function(self, f): proxy_func = getattr(self.proxy, f) + def call_remote(*args, **kwargs): return proxy_func(*args, **kwargs, is_ray=True) + return call_remote def get_ray_class(subclass): from charm4py import Chare, register, charm + @register class RayChare(Chare): @staticmethod @@ -51,37 +62,47 @@ def remote(*a): ray_proxy = RayProxy(subclass, a, counter % charm.numPes()) counter += 1 return ray_proxy + return RayChare + def get_ray_task(func): from charm4py import charm + def task(*args): func._ck_coro = True - return charm.pool.map_async(func, [args], chunksize=1, multi_future=True, is_ray=True)[0] + return charm.pool.map_async( + func, [args], chunksize=1, multi_future=True, is_ray=True + )[0] + return task + def remote(*args, **kwargs): - from charm4py import charm, Chare, register - + from charm4py import Chare, register + num_returns = kwargs.pop("num_returns", 1) if len(args) == 1 and len(kwargs) == 0: if isinstance(args[0], types.FunctionType): args[0].remote = get_ray_task(args[0]) return args[0] - else: + else: # decorating without any arguments - subclass = type(args[0].__name__, (Chare, args[0]), {"__init__": args[0].__init__}) + subclass = type( + args[0].__name__, (Chare, args[0]), {"__init__": args[0].__init__} + ) register(subclass) rayclass = get_ray_class(subclass) rayclass.__name__ = args[0].__name__ return rayclass else: raise NotImplementedError("Arguments not implemented yet") - + def get(arg): from charm4py import charm from ..threads import Future + if isinstance(arg, Future): return charm.get_future_value(arg) elif isinstance(arg, list): @@ -94,13 +115,15 @@ def wait(futs, num_returns=1, timeout=None, fetch_local=True): if timeout != None or not fetch_local: raise NotImplementedError("timeout and fetch_local not implemented yet") from charm4py import charm + ready = charm.getany_future_value(futs, num_returns) not_ready = list(set(futs) - set(ready)) return ready, not_ready + def put(obj): - from ..threads import Future from ..charm import charm + fut = charm.threadMgr.createFuture(store=True) fut.create_object(obj) - return fut \ No newline at end of file + return fut diff --git a/charm4py/reduction.py b/charm4py/reduction.py index 03992500..c8e99671 100644 --- a/charm4py/reduction.py +++ b/charm4py/reduction.py @@ -3,21 +3,26 @@ import operator as op from itertools import chain import sys + if sys.version_info[0] < 3: import cPickle else: import pickle as cPickle try: import numpy as np + haveNumpy = True except ImportError: # this is to avoid numpy dependency haveNumpy = False + class NumpyDummyModule: class ndarray: pass + class number: pass + np = NumpyDummyModule() @@ -26,14 +31,38 @@ class number: NUM_C_TYPES = 13 # Set of integer identifiers for C types used with internal reducers -(C_BOOL, C_CHAR, C_SHORT, C_INT, C_LONG, C_LONG_LONG, - C_UCHAR, C_USHORT, C_UINT, C_ULONG, C_ULONG_LONG, - C_FLOAT, C_DOUBLE) = range(NUM_C_TYPES) +( + C_BOOL, + C_CHAR, + C_SHORT, + C_INT, + C_LONG, + C_LONG_LONG, + C_UCHAR, + C_USHORT, + C_UINT, + C_ULONG, + C_ULONG_LONG, + C_FLOAT, + C_DOUBLE, +) = range(NUM_C_TYPES) # map names of C types (as they appear in CkReductionTypesExt) to their identifiers -c_typename_to_id = {'char': C_CHAR, 'short': C_SHORT, 'int': C_INT, 'long': C_LONG, 'long_long': C_LONG_LONG, - 'uchar': C_UCHAR, 'ushort': C_USHORT, 'uint': C_UINT, 'ulong': C_ULONG, 'ulong_long': C_ULONG_LONG, - 'float': C_FLOAT, 'double': C_DOUBLE, 'bool': C_BOOL} +c_typename_to_id = { + "char": C_CHAR, + "short": C_SHORT, + "int": C_INT, + "long": C_LONG, + "long_long": C_LONG_LONG, + "uchar": C_UCHAR, + "ushort": C_USHORT, + "uint": C_UINT, + "ulong": C_ULONG, + "ulong_long": C_ULONG_LONG, + "float": C_FLOAT, + "double": C_DOUBLE, + "bool": C_BOOL, +} def _useNumpyForReduction(contribs): @@ -41,6 +70,8 @@ def _useNumpyForReduction(contribs): # always prefer numpy when we can use it to take advantage of speed # also, the non-section version will return numpy arrays when possible return haveNumpy or isNumpyType + + # ------------------- Reducers ------------------- @@ -165,20 +196,23 @@ def __init__(self, charm): self.addReducer(_bcast_exc_reducer) self.addReducer(gather, pre=gather_preprocess, post=gather_postprocess) - self.nop = charm.ReducerType.nop - self.sum = (SUM, self._sum) # (internal op code, python reducer) + self.nop = charm.ReducerType.nop + self.sum = (SUM, self._sum) # (internal op code, python reducer) self.product = (PRODUCT, self._product) - self.max = (MAX, self._max) - self.min = (MIN, self._min) + self.max = (MAX, self._max) + self.min = (MIN, self._min) self.logical_and = (AND, self._and) - self.logical_or = (OR, self._or) + self.logical_or = (OR, self._or) self.logical_xor = (XOR, self._xor) def addReducer(self, func, pre=None, post=None): if hasattr(self, func.__name__): from .charm import Charm4PyError - raise Charm4PyError("Reducer with name " + func.__name__ + " already registered") - func.hasPreprocess = False + + raise Charm4PyError( + "Reducer with name " + func.__name__ + " already registered" + ) + func.hasPreprocess = False func.hasPostprocess = False if pre is not None: func.hasPreprocess = True @@ -191,6 +225,7 @@ def addReducer(self, func, pre=None, post=None): # ------------------- Reduction Manager ------------------- + class ReductionManager(object): def __init__(self, charm, reducers): @@ -204,50 +239,72 @@ def populateConversionTables(self): # - c_type is identifier for C type (C_CHAR, C_SHORT, etc) # - charm_reducer_type is value for internal reducer type as they appear in CkReductionTypesExt self.red_table = [[]] * 7 - self.red_table[SUM] = [0] * NUM_C_TYPES + self.red_table[SUM] = [0] * NUM_C_TYPES self.red_table[PRODUCT] = [0] * NUM_C_TYPES - self.red_table[MAX] = [0] * NUM_C_TYPES - self.red_table[MIN] = [0] * NUM_C_TYPES - self.red_table[AND] = [0] * NUM_C_TYPES - self.red_table[OR] = [0] * NUM_C_TYPES - self.red_table[XOR] = [0] * NUM_C_TYPES - - fields = self.charm.lib.getReductionTypesFields() # get names of fields in CkReductionTypesExt + self.red_table[MAX] = [0] * NUM_C_TYPES + self.red_table[MIN] = [0] * NUM_C_TYPES + self.red_table[AND] = [0] * NUM_C_TYPES + self.red_table[OR] = [0] * NUM_C_TYPES + self.red_table[XOR] = [0] * NUM_C_TYPES + + fields = ( + self.charm.lib.getReductionTypesFields() + ) # get names of fields in CkReductionTypesExt maxFieldVal = max([getattr(self.charm.ReducerType, f) for f in fields]) # charm_reducer_to_ctype maps the values in CkReductionTypesExt to C type identifier self.charm_reducer_to_ctype = [None] * (maxFieldVal + 1) for f in fields: - if f == 'nop': + if f == "nop": continue - elif f == 'external_py': - op, c_type_str = None, 'char' - elif f.startswith('logical'): - op, c_type_str = f.split('_')[1:] + elif f == "external_py": + op, c_type_str = None, "char" + elif f.startswith("logical"): + op, c_type_str = f.split("_")[1:] else: - op, c_type_str = f.split('_', 1) # e.g. from 'sum_long' extracts 'sum' and 'long' - ctype_code = c_typename_to_id[c_type_str] # e.g. map 'long' to C_LONG - f_val = getattr(self.charm.ReducerType, f) # value of the field in CkReductionTypesExt + op, c_type_str = f.split( + "_", 1 + ) # e.g. from 'sum_long' extracts 'sum' and 'long' + ctype_code = c_typename_to_id[c_type_str] # e.g. map 'long' to C_LONG + f_val = getattr( + self.charm.ReducerType, f + ) # value of the field in CkReductionTypesExt # print(f, "ctype_code", ctype_code, "f_val=", f_val) self.charm_reducer_to_ctype[f_val] = ctype_code - if op == 'sum': self.red_table[SUM][ctype_code] = f_val - elif op == 'product': self.red_table[PRODUCT][ctype_code] = f_val - elif op == 'max': self.red_table[MAX][ctype_code] = f_val - elif op == 'min': self.red_table[MIN][ctype_code] = f_val - elif op == 'and': self.red_table[AND][ctype_code] = f_val - elif op == 'or': self.red_table[OR][ctype_code] = f_val - elif op == 'xor': self.red_table[XOR][ctype_code] = f_val + if op == "sum": + self.red_table[SUM][ctype_code] = f_val + elif op == "product": + self.red_table[PRODUCT][ctype_code] = f_val + elif op == "max": + self.red_table[MAX][ctype_code] = f_val + elif op == "min": + self.red_table[MIN][ctype_code] = f_val + elif op == "and": + self.red_table[AND][ctype_code] = f_val + elif op == "or": + self.red_table[OR][ctype_code] = f_val + elif op == "xor": + self.red_table[XOR][ctype_code] = f_val # ------ numpy data types ------ if haveNumpy: # map numpy data types to internal reduction C code identifier - self.numpy_type_map = {'bool': C_BOOL, 'int8': C_CHAR, 'int16': C_SHORT, - 'int32': C_INT, 'int64': C_LONG, 'uint8': C_UCHAR, - 'uint16': C_USHORT, 'uint32': C_UINT, 'uint64': C_ULONG, - #'float16': ? - 'float32': C_FLOAT, 'float64': C_DOUBLE} - if np.dtype('int64').itemsize > self.charm.lib.sizeof(C_LONG): - self.numpy_type_map['int64'] = C_LONG_LONG - self.numpy_type_map['uint64'] = C_ULONG_LONG + self.numpy_type_map = { + "bool": C_BOOL, + "int8": C_CHAR, + "int16": C_SHORT, + "int32": C_INT, + "int64": C_LONG, + "uint8": C_UCHAR, + "uint16": C_USHORT, + "uint32": C_UINT, + "uint64": C_ULONG, + #'float16': ? + "float32": C_FLOAT, + "float64": C_DOUBLE, + } + if np.dtype("int64").itemsize > self.charm.lib.sizeof(C_LONG): + self.numpy_type_map["int64"] = C_LONG_LONG + self.numpy_type_map["uint64"] = C_ULONG_LONG # verify that mapping is correct for dt, c_type in self.numpy_type_map.items(): @@ -261,29 +318,56 @@ def populateConversionTables(self): if self.rev_np_array_type_map[C_LONG] is None: self.rev_np_array_type_map[C_LONG] = np.int_().dtype.name self.rev_np_array_type_map[C_ULONG] = np.uint().dtype.name - assert np.dtype('int_').itemsize == self.charm.lib.sizeof(C_LONG) - assert np.dtype('uint').itemsize == self.charm.lib.sizeof(C_ULONG) + assert np.dtype("int_").itemsize == self.charm.lib.sizeof(C_LONG) + assert np.dtype("uint").itemsize == self.charm.lib.sizeof(C_ULONG) if self.rev_np_array_type_map[C_LONG_LONG] is None: self.rev_np_array_type_map[C_LONG_LONG] = np.longlong().dtype.name self.rev_np_array_type_map[C_ULONG_LONG] = np.ulonglong().dtype.name - assert np.dtype('longlong').itemsize == self.charm.lib.sizeof(C_LONG_LONG) - assert np.dtype('ulonglong').itemsize == self.charm.lib.sizeof(C_ULONG_LONG) + assert np.dtype("longlong").itemsize == self.charm.lib.sizeof( + C_LONG_LONG + ) + assert np.dtype("ulonglong").itemsize == self.charm.lib.sizeof( + C_ULONG_LONG + ) # ------ array.array data types ------ # map array.array data types to internal reduction C code identifier - self.array_type_map = {'b': C_CHAR, 'B': C_UCHAR, 'h': C_SHORT, 'H': C_USHORT, - 'i': C_INT, 'I': C_UINT, 'l': C_LONG, 'L': C_ULONG, - 'f': C_FLOAT, 'd': C_DOUBLE} + self.array_type_map = { + "b": C_CHAR, + "B": C_UCHAR, + "h": C_SHORT, + "H": C_USHORT, + "i": C_INT, + "I": C_UINT, + "l": C_LONG, + "L": C_ULONG, + "f": C_FLOAT, + "d": C_DOUBLE, + } if sys.version_info >= (3, 3, 0): - self.array_type_map['q'] = C_LONG_LONG - self.array_type_map['Q'] = C_ULONG_LONG + self.array_type_map["q"] = C_LONG_LONG + self.array_type_map["Q"] = C_ULONG_LONG # verify that mapping is correct for dt, c_type in self.array_type_map.items(): assert array.array(dt).itemsize == self.charm.lib.sizeof(c_type) - self.rev_array_type_map = ['b', 'b', 'h', 'i', 'l', 'q', 'B', 'H', 'I', 'L', 'Q', 'f', 'd'] + self.rev_array_type_map = [ + "b", + "b", + "h", + "i", + "l", + "q", + "B", + "H", + "I", + "L", + "Q", + "f", + "d", + ] assert len(self.rev_array_type_map) == NUM_C_TYPES # ------ python data types ------ @@ -337,12 +421,19 @@ def prepare(self, data, reducer, contributor): pyReducer = reducer if pyReducer is None: - assert charm_reducer_type > 0, 'Could not find a valid reducer. Check that datatype matches the operator' + assert ( + charm_reducer_type > 0 + ), "Could not find a valid reducer. Check that datatype matches the operator" return (charm_reducer_type, data, c_type) else: - if not hasattr(pyReducer, 'hasPreprocess'): + if not hasattr(pyReducer, "hasPreprocess"): from .charm import Charm4PyError - raise Charm4PyError('Invalid reducer ' + str(reducer) + '. Reducers must be functions registered with addReducer') + + raise Charm4PyError( + "Invalid reducer " + + str(reducer) + + ". Reducers must be functions registered with addReducer" + ) if pyReducer.hasPreprocess: data = pyReducer.preprocess(data, contributor) rednMsg = ({b"custom_reducer": pyReducer.__name__}, [data]) diff --git a/charm4py/sections.py b/charm4py/sections.py index 27ae633b..b0d1b4c0 100644 --- a/charm4py/sections.py +++ b/charm4py/sections.py @@ -5,7 +5,9 @@ # Reduction Info object: holds state for an in-progress reduction class RedInfo(object): def __init__(self): - self.ready = False # got all messages, can reduce and send contribution to the parent + self.ready = ( + False # got all messages, can reduce and send contribution to the parent + ) self.msgs = [] # list of reduction msgs received on this PE self.reducer = None # reducer function self.cb = None # reduction callback @@ -20,16 +22,19 @@ def __init__(self): self.parent = None self.children = [] # these are PE numbers self.local_elems = [] # list of local chares that are part of the section - self.buffered_msgs = [] # stores msgs received for this section before creation has completed + self.buffered_msgs = ( + [] + ) # stores msgs received for this section before creation has completed self.redno = 0 # current reduction number for this section self.reds = [] # list of RedInfo objects for pending reductions - def __init__(self): - assert not hasattr(charm, 'sectionMgr') + assert not hasattr(charm, "sectionMgr") charm.sectionMgr = self self.profiling = charm.options.profiling - self.sections = defaultdict(SectionManager.SectionEntry) # stores section entries for this PE + self.sections = defaultdict( + SectionManager.SectionEntry + ) # stores section entries for this PE self.send_ep = self.thisProxy.sendToSection.ep def createSectionDown(self, sid, pes, parent=None): @@ -54,7 +59,7 @@ def createSectionDown(self, sid, pes, parent=None): entry.buffered_msgs = [] self.releaseRed(sid, entry, entry.reds) - @when('cons is not None or gid in charm.groups') + @when("cons is not None or gid in charm.groups") def createGroupSectionDown(self, sid, gid, pes, parent=None, cons=None): entry = self.sections[sid] entry.final = True @@ -111,8 +116,9 @@ def sendToSectionLocal(self, sid, ep, header, *args): em = charm.runningEntryMethod em.startMeasuringSendTime() msg = charm.packMsg(None, [sid, ep, header] + list(args), {}) - charm.lib.CkGroupSendMulti(self.thisProxy.gid, entry.children, - self.send_ep, msg) + charm.lib.CkGroupSendMulti( + self.thisProxy.gid, entry.children, self.send_ep, msg + ) del msg if profiling: em.stopMeasuringSendTime() @@ -136,7 +142,9 @@ def sendToSection(self, sid, ep, header, *args): # (thus avoiding any copies) charm.lib.sendToSection(self.thisProxy.gid, entry.children) if profiling: - charm.recordSend(charm.msg_recv_stats[4]) # send size is same as last received msg size + charm.recordSend( + charm.msg_recv_stats[4] + ) # send size is same as last received msg size em.stopMeasuringSendTime() for obj in entry.local_elems: @@ -182,9 +190,13 @@ def releaseRed(self, sid, entry, reds): redinfo.cb(reduced_data) else: if reducer == Reducer._bcast_exc_reducer: - entry.parent.contrib(sid, entry.redno - 1, reduced_data, reducer, None) + entry.parent.contrib( + sid, entry.redno - 1, reduced_data, reducer, None + ) else: - entry.parent.contrib(sid, entry.redno - 1, reduced_data, None, None) + entry.parent.contrib( + sid, entry.redno - 1, reduced_data, None, None + ) else: return diff --git a/charm4py/threads.py b/charm4py/threads.py index 3c6f0547..5cd84b57 100644 --- a/charm4py/threads.py +++ b/charm4py/threads.py @@ -4,9 +4,9 @@ # Future IDs (fids) are sometimes carried as reference numbers inside # Charm++ CkCallback objects. The data type most commonly used for # this is unsigned short, hence this limit -# FIXME: This could fail according to the above warning, +# FIXME: This could fail according to the above warning, # but we need large number of futures for the ray -# programming model. +# programming model. FIDMAXVAL = 4294967295 @@ -26,16 +26,21 @@ def __init__(self, msg): # See commit 25e2935 if need to resurrect code where proxies were included when # futures were pickled. + class Future(object): def __init__(self, fid, gr, src, num_vals, store=False): self.fid = fid # unique future ID within the process that created it self.gr = gr # greenlet that created the future - self.src = src # PE where the future was created (not used for collective futures) + self.src = ( + src # PE where the future was created (not used for collective futures) + ) self.nvals = num_vals # number of values that the future expects to receive self.values = [] # values of the future self.blocked = False # flag to check if creator thread is blocked on the future - self.gotvalues = False # flag to check if expected number of values have been received + self.gotvalues = ( + False # flag to check if expected number of values have been received + ) self.error = None # if the future receives an Exception, it is set here if store: self.store_id = (self.src << 32) + self.fid @@ -48,10 +53,11 @@ def __init__(self, fid, gr, src, num_vals, store=False): self.borrow_depth = 0 def get(self): - """ Blocking call on current entry method's thread to obtain the values of the - future. If the values are already available then they are returned immediately. + """Blocking call on current entry method's thread to obtain the values of the + future. If the values are already available then they are returned immediately. """ from .charm import charm + if self.store: return charm.get_future_value(self) else: @@ -74,7 +80,7 @@ def waitReady(self, f): self.blocked = 2 def send(self, result=None): - """ Send a value to this future. """ + """Send a value to this future.""" if self.store: self.create_object(result) else: @@ -87,7 +93,7 @@ def getTargetProxyEntryMethod(self): return charm.thisProxy[self.src]._future_deposit_result def deposit(self, result): - """ Deposit a value for this future. """ + """Deposit a value for this future.""" self.values.append(result) if isinstance(result, Exception): self.error = result @@ -108,55 +114,67 @@ def resume(self, threadMgr): def lookup_location(self): from .charm import charm + if not self.store: - raise ValueError("Operation not supported for future not" - " stored in the object store") + raise ValueError( + "Operation not supported for future not" " stored in the object store" + ) obj_store = get_object_store() local_obj_store = obj_store[charm.myPe()].ckLocalBranch() return local_obj_store.lookup_location(self.store_id) - + def lookup_object(self): from .charm import charm + if not self.store: - raise ValueError("Operation not supported for future not" - " stored in the object store") + raise ValueError( + "Operation not supported for future not" " stored in the object store" + ) obj_store = get_object_store() local_obj_store = obj_store[charm.myPe()].ckLocalBranch() return local_obj_store.lookup_object(self.store_id) - + def delete_object(self): from .charm import charm + if not self.store: - raise ValueError("Operation not supported for future not" - " stored in the object store") + raise ValueError( + "Operation not supported for future not" " stored in the object store" + ) obj_store = get_object_store() obj_store[self.store_id % charm.numPes()].delete_remote_objects(self.store_id) - + def is_local(self): if not self.store: - raise ValueError("Operation not supported for future not" - " stored in the object store") - return not (self.lookup_object() is None) - + raise ValueError( + "Operation not supported for future not" " stored in the object store" + ) + return self.lookup_object() is not None + def create_object(self, obj): from .charm import charm + if not self.store: - raise ValueError("Operation not supported for future not" - " stored in the object store") + raise ValueError( + "Operation not supported for future not" " stored in the object store" + ) obj_store = get_object_store() local_obj_store = obj_store[charm.myPe()].ckLocalBranch() local_obj_store.create_object(self.store_id, obj) def request_object(self): if not self.store: - raise ValueError("Operation not supported for future not" - " stored in the object store") + raise ValueError( + "Operation not supported for future not" " stored in the object store" + ) if self._requested: return from .charm import charm + obj_store = get_object_store() obj_store[self.store_id % charm.numPes()].request_location_object( - self.store_id, charm.myPe()) + self.store_id, charm.myPe() + ) self._requested = True def __getstate__(self): @@ -180,12 +198,14 @@ def __del__(self): if self.store: if self.parent == None and self.num_borrowers == 0: # This is the owner, delete the object from the object store - #print("Deleting owner", self.store_id) + # print("Deleting owner", self.store_id) self.delete_object() else: # this is a borrower, notify its parent of the deletion - #print("Deleting", self.store_id, "from", charm.myPe(), "sending notify to", self.parent) - charm.thisProxy[self.parent].notify_future_deletion(self.store_id, self.borrow_depth - 1) + # print("Deleting", self.store_id, "from", charm.myPe(), "sending notify to", self.parent) + charm.thisProxy[self.parent].notify_future_deletion( + self.store_id, self.borrow_depth - 1 + ) class CollectiveFuture(Future): @@ -235,6 +255,7 @@ class EntryMethodThreadManager(object): def __init__(self, _charm): global charm, Charm4PyError, threadMgr from .charm import Charm4PyError + charm = _charm threadMgr = self self.options = charm.options @@ -256,17 +277,23 @@ def isMainThread(self): def objMigrating(self, obj): if obj._numthreads > 0: - raise Charm4PyError('Migration of chares with active threads is not currently supported') + raise Charm4PyError( + "Migration of chares with active threads is not currently supported" + ) def throwNotThreadedError(self): - raise NotThreadedError("Method '" + charm.last_em_exec.C.__name__ + "." + - charm.last_em_exec.name + - "' must be a couroutine to be able to suspend (decorate it with @coro)") + raise NotThreadedError( + "Method '" + + charm.last_em_exec.C.__name__ + + "." + + charm.last_em_exec.name + + "' must be a couroutine to be able to suspend (decorate it with @coro)" + ) def pauseThread(self): - """ Called by an entry method thread to wait for something. - Returns data that the thread was waiting for, or None if it was - waiting for an event + """Called by an entry method thread to wait for something. + Returns data that the thread was waiting for, or None if it was + waiting for an event """ gr = getcurrent() main_gr = self.main_gr @@ -287,10 +314,10 @@ def pauseThread(self): return main_gr.switch() def _resumeThread(self, gr, arg): - """ Deposit a result or signal that a local entry method thread is waiting on, - and resume it. This executes on the main thread. + """Deposit a result or signal that a local entry method thread is waiting on, + and resume it. This executes on the main thread. """ - #assert getcurrent() == self.main_gr + # assert getcurrent() == self.main_gr if gr.notify: obj = gr.obj obj._thread_notify_target.threadResumed(obj._thread_notify_data) @@ -309,14 +336,14 @@ def resumeThread_prof(self, gr, arg): ems[-1].startMeasuringTime() def createFuture(self, num_vals=1, store=False): - """ Creates a new Future object by obtaining a unique (local) future ID. """ + """Creates a new Future object by obtaining a unique (local) future ID.""" gr = getcurrent() if not store and gr == self.main_gr: self.throwNotThreadedError() # get a unique local Future ID global FIDMAXVAL futures = self.futures - assert len(futures) < FIDMAXVAL, 'Too many pending futures, cannot create more' + assert len(futures) < FIDMAXVAL, "Too many pending futures, cannot create more" fid = (self.lastfid % FIDMAXVAL) + 1 while fid in futures: fid = (fid % FIDMAXVAL) + 1 @@ -326,7 +353,7 @@ def createFuture(self, num_vals=1, store=False): return f def createCollectiveFuture(self, fid, obj, proxy): - """ fid is supplied in this case and has to be the same for all distributed chares """ + """fid is supplied in this case and has to be the same for all distributed chares""" gr = getcurrent() if gr == self.main_gr: self.throwNotThreadedError() @@ -335,13 +362,15 @@ def createCollectiveFuture(self, fid, obj, proxy): return f def depositFuture(self, fid, result): - """ Set a value of a future that is being managed by this ThreadManager. """ + """Set a value of a future that is being managed by this ThreadManager.""" futures = self.futures try: f = futures[fid] except KeyError: - raise Charm4PyError('No pending future with fid=' + str(fid) + '. A common reason is ' - 'sending to a future that already received its value(s)') + raise Charm4PyError( + "No pending future with fid=" + str(fid) + ". A common reason is " + "sending to a future that already received its value(s)" + ) if f.deposit(result): del futures[fid] # resume if a thread is blocked on the future diff --git a/charm4py/wait.py b/charm4py/wait.py index 355f5a4b..158ae91c 100644 --- a/charm4py/wait.py +++ b/charm4py/wait.py @@ -14,19 +14,19 @@ class MsgTagCond(object): group = True def __init__(self, cond_str, attrib_name, arg_idx): - self.cond_str = cond_str + self.cond_str = cond_str self.attrib_name = attrib_name - self.arg_idx = arg_idx + self.arg_idx = arg_idx def evaluateWhen(self, obj, args): return args[self.arg_idx] == getattr(obj, self.attrib_name) def createWaitCondition(self): c = object.__new__(MsgTagCond) - c.cond_str = self.cond_str + c.cond_str = self.cond_str c.attrib_name = self.attrib_name - c.arg_idx = self.arg_idx - c.wait_queue = defaultdict(list) + c.arg_idx = self.arg_idx + c.wait_queue = defaultdict(list) return c def enqueue(self, elem): @@ -62,17 +62,17 @@ class ChareStateMsgCond(object): group = False def __init__(self, cond_str, cond_func): - self.cond_str = cond_str + self.cond_str = cond_str self.cond_func = cond_func def createWaitCondition(self): c = object.__new__(ChareStateMsgCond) - c.cond_str = self.cond_str + c.cond_str = self.cond_str c.cond_func = self.cond_func return c def evaluateWhen(self, obj, args): - #return eval(cond_str) # eval is very slow + # return eval(cond_str) # eval is very slow return self.cond_func(obj, args) def enqueue(self, elem): @@ -80,7 +80,7 @@ def enqueue(self, elem): def check(self, obj): t, em, header, args = self.elem - #if eval(me.cond_str): # eval is very slow + # if eval(me.cond_str): # eval is very slow if self.cond_func(obj, args): em.run(obj, header, args) return True, True @@ -112,20 +112,21 @@ class ChareStateCond(object): group = True def __init__(self, cond_str, module_name): - self.cond_str = cond_str + self.cond_str = cond_str self.globals_module_name = module_name - self.cond_func = eval('lambda self: ' + cond_str, - import_module(module_name).__dict__) + self.cond_func = eval( + "lambda self: " + cond_str, import_module(module_name).__dict__ + ) def createWaitCondition(self): c = object.__new__(ChareStateCond) - c.cond_str = self.cond_str - c.cond_func = self.cond_func + c.cond_str = self.cond_str + c.cond_func = self.cond_func c.wait_queue = [] return c def evaluateWhen(self, obj, args): - #return eval(me.cond_str) # eval is very slow + # return eval(me.cond_str) # eval is very slow return self.cond_func(obj) def enqueue(self, elem): @@ -133,7 +134,7 @@ def enqueue(self, elem): def check(self, obj): dequeued = False - #while eval(me.cond_str): # eval is very slow + # while eval(me.cond_str): # eval is very slow while self.cond_func(obj): elem = self.wait_queue.pop() if elem[0] == 0: @@ -153,16 +154,20 @@ def __getstate__(self): return self.cond_str, self.wait_queue, self._cond_next, self.globals_module_name def __setstate__(self, state): - self.cond_str, self.wait_queue, self._cond_next, self.globals_module_name = state - self.cond_func = eval('lambda self: ' + self.cond_str, - import_module(self.globals_module_name).__dict__) + self.cond_str, self.wait_queue, self._cond_next, self.globals_module_name = ( + state + ) + self.cond_func = eval( + "lambda self: " + self.cond_str, + import_module(self.globals_module_name).__dict__, + ) def is_tag_cond(root_ast): - """ Determine if the AST corresponds to a 'when' condition of the form - `self.xyz == args[x]` where xyz is the name of an attribute, x is an - integer. if True, returns the condition string, the name of the attribute - (e.g. xyz) and the integer index (e.g. x). Otherwise returns None """ + """Determine if the AST corresponds to a 'when' condition of the form + `self.xyz == args[x]` where xyz is the name of an attribute, x is an + integer. if True, returns the condition string, the name of the attribute + (e.g. xyz) and the integer index (e.g. x). Otherwise returns None""" try: if not isinstance(root_ast.body, ast.Compare): return None @@ -181,10 +186,10 @@ def is_tag_cond(root_ast): elif isinstance(right, ast.Attribute) and (isinstance(left, ast.Subscript)): attrib, args = right, left - if (attrib is None) or (attrib.value.id != 'self'): + if (attrib is None) or (attrib.value.id != "self"): return None - if args.value.id != 'args': + if args.value.id != "args": return None idx = args.slice.value @@ -192,11 +197,11 @@ def is_tag_cond(root_ast): idx = idx.n elif isinstance(idx, ast.Constant): idx = idx.value - + if not isinstance(idx, int): return None - return ('self.' + attrib.attr + ' == args[' + str(idx) + ']', attrib.attr, idx) + return ("self." + attrib.attr + " == args[" + str(idx) + "]", attrib.attr, idx) except: return None @@ -208,18 +213,25 @@ def __init__(self, method_arguments): self.num_msg_args = 0 def visit_Attribute(self, node): - if isinstance(node.value, ast.Name) and node.value.id in self.method_arguments and node.value.id != 'self': + if ( + isinstance(node.value, ast.Name) + and node.value.id in self.method_arguments + and node.value.id != "self" + ): idx = self.method_arguments[node.value.id] self.num_msg_args += 1 - return ast.copy_location(ast.Attribute( - value=ast.Subscript( - value=ast.Name(id='args', ctx=ast.Load()), - slice=ast.Index(value=ast.Num(n=idx)), - ctx=node.ctx + return ast.copy_location( + ast.Attribute( + value=ast.Subscript( + value=ast.Name(id="args", ctx=ast.Load()), + slice=ast.Index(value=ast.Num(n=idx)), + ctx=node.ctx, + ), + attr=node.attr, + ctx=node.ctx, ), - attr=node.attr, - ctx=node.ctx - ), node) + node, + ) else: return self.generic_visit(node) @@ -227,27 +239,31 @@ def visit_Name(self, node): if node.id in self.method_arguments: idx = self.method_arguments[node.id] self.num_msg_args += 1 - return ast.copy_location(ast.Subscript( - value=ast.Name(id='args', ctx=ast.Load()), - slice=ast.Index(value=ast.Num(n=idx)), - ctx=node.ctx - ), node) + return ast.copy_location( + ast.Subscript( + value=ast.Name(id="args", ctx=ast.Load()), + slice=ast.Index(value=ast.Num(n=idx)), + ctx=node.ctx, + ), + node, + ) else: return node -#import astunparse +# import astunparse + def parse_cond_str(cond_str, module_name, method_arguments={}): - #print("Original condition string is", cond_str) - t = ast.parse(cond_str, filename='', mode='eval') + # print("Original condition string is", cond_str) + t = ast.parse(cond_str, filename="", mode="eval") if len(method_arguments) > 0: # in the AST, convert names of method arguments to `args[x]`, where x is the # position of the argument in the function definition transformer = MsgArgsTransformer(method_arguments) transformer.visit(t) - #print("Transformed to", astunparse.unparse(t), "num args detected=", transformer.num_msg_args) + # print("Transformed to", astunparse.unparse(t), "num args detected=", transformer.num_msg_args) if transformer.num_msg_args == 0: return ChareStateCond(cond_str, module_name) else: @@ -258,11 +274,12 @@ def parse_cond_str(cond_str, module_name, method_arguments={}): return MsgTagCond(*tag_cond) # compile AST to code, then eval to a lambda function - new_tree = ast.parse("lambda self, args: x", filename='', mode='eval') + new_tree = ast.parse("lambda self, args: x", filename="", mode="eval") new_tree.body.body = t.body new_tree = ast.fix_missing_locations(new_tree) - lambda_func = eval(compile(new_tree, '', 'eval'), - import_module(module_name).__dict__) + lambda_func = eval( + compile(new_tree, "", "eval"), import_module(module_name).__dict__ + ) return ChareStateMsgCond(cond_str, lambda_func) diff --git a/charmrun/start.py b/charmrun/start.py index ea176165..8d354d0d 100644 --- a/charmrun/start.py +++ b/charmrun/start.py @@ -12,11 +12,13 @@ def executable_is_python(args): Note: Returns true if no executable was found or if an executable was found and that executable is a Python file. """ + def is_exe(fpath): return os.path.isfile(fpath) and os.access(fpath, os.X_OK) def is_pyfile(fpath): return os.path.isfile(fpath) and fpath.endswith(".py") + for each in args: if is_pyfile(each): return True @@ -31,33 +33,34 @@ def nodelist_islocal(filename, regexp): # it is an error if filename doesn't exist, but I'll let charmrun print # the error. don't add ++local so that charmrun detects it return False - with open(filename, 'r') as f: + with open(filename, "r") as f: for line in f: m = regexp.search(line) - if m is not None and m.group(1) not in {'localhost', '127.0.0.1'}: + if m is not None and m.group(1) not in {"localhost", "127.0.0.1"}: return False return True def checkNodeListLocal(args): import re + regexp = re.compile("^\s*host\s+(\S+)\s*$") try: - i = args.index('++nodelist') + i = args.index("++nodelist") except ValueError: i = -1 if i != -1: - return nodelist_islocal(args[i+1], regexp) + return nodelist_islocal(args[i + 1], regexp) - if 'NODELIST' in os.environ: - return nodelist_islocal(os.environ['NODELIST'], regexp) + if "NODELIST" in os.environ: + return nodelist_islocal(os.environ["NODELIST"], regexp) - nodelist_cur_dir = os.path.join(os.getcwd(), 'nodelist') + nodelist_cur_dir = os.path.join(os.getcwd(), "nodelist") if os.path.exists(nodelist_cur_dir): return nodelist_islocal(nodelist_cur_dir, regexp) - nodelist_home_dir = os.path.join(os.path.expanduser('~'), '.nodelist') + nodelist_home_dir = os.path.join(os.path.expanduser("~"), ".nodelist") if os.path.exists(nodelist_home_dir): return nodelist_islocal(nodelist_home_dir, regexp) @@ -69,13 +72,13 @@ def start(args=[]): if len(args) == 0: args = sys.argv[1:] - if '++local' not in args and '++mpiexec' not in args and checkNodeListLocal(args): - args.append('++local') + if "++local" not in args and "++mpiexec" not in args and checkNodeListLocal(args): + args.append("++local") - if '++interactive' in args and 'charm4py.interactive' not in args: - args += ['-m', 'charm4py.interactive'] + if "++interactive" in args and "charm4py.interactive" not in args: + args += ["-m", "charm4py.interactive"] - cmd = [os.path.join(os.path.dirname(__file__), 'charmrun')] + cmd = [os.path.join(os.path.dirname(__file__), "charmrun")] if executable_is_python(args): # Note: sys.executable is the absolute path to the Python interpreter # We only want to invoke the interpreter if the execution target is a @@ -85,10 +88,10 @@ def start(args=[]): try: return subprocess.call(cmd) except FileNotFoundError: - print('charmrun executable not found. You are running \"' + __file__ + '\"') - print('Make sure this is a built or installed version of charmrun') + print('charmrun executable not found. You are running "' + __file__ + '"') + print("Make sure this is a built or installed version of charmrun") return 1 -if __name__ == '__main__': +if __name__ == "__main__": sys.exit(start()) diff --git a/docs/conf.py b/docs/conf.py index a9fe20af..d9f8388d 100644 --- a/docs/conf.py +++ b/docs/conf.py @@ -13,53 +13,51 @@ # All configuration values have a default; values that are commented out # serve to show the default. -import sys -import os # If extensions (or modules to document with autodoc) are in another directory, # add these directories to sys.path here. If the directory is relative to the # documentation root, use os.path.abspath to make it absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) +# sys.path.insert(0, os.path.abspath('.')) # -- General configuration ------------------------------------------------ # If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' +# needs_sphinx = '1.0' # Add any Sphinx extension module names here, as strings. They can be # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom # ones. extensions = [ - 'sphinx.ext.todo', + "sphinx.ext.todo", ] # Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] +templates_path = ["_templates"] # The suffix(es) of source filenames. # You can specify multiple suffix as a list of string: # source_suffix = ['.rst', '.md'] -source_suffix = '.rst' +source_suffix = ".rst" # The encoding of source files. -#source_encoding = 'utf-8-sig' +# source_encoding = 'utf-8-sig' # The master toctree document. -master_doc = 'index' +master_doc = "index" # General information about the project. -project = 'Charm4py' -copyright = '2019, University of Illinois' -author = 'Juan Galvez' +project = "Charm4py" +copyright = "2019, University of Illinois" +author = "Juan Galvez" # The version info for the project you're documenting, acts as replacement for # |version| and |release|, also used in various other places throughout the # built documents. # The short X.Y version. -version = '1.1' +version = "1.1" # The full version, including alpha/beta/rc tags. -release = '1.1' +release = "1.1" # The language for content autogenerated by Sphinx. Refer to documentation # for a list of supported languages. @@ -70,37 +68,37 @@ # There are two options for replacing |today|: either, you set today to some # non-false value, then it is used: -#today = '' +# today = '' # Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' +# today_fmt = '%B %d, %Y' # List of patterns, relative to source directory, that match files and # directories to ignore when looking for source files. -exclude_patterns = ['_build'] +exclude_patterns = ["_build"] # The reST default role (used for this markup: `text`) to use for all # documents. -#default_role = None +# default_role = None # If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True +# add_function_parentheses = True # If true, the current module name will be prepended to all description # unit titles (such as .. function::). -#add_module_names = True +# add_module_names = True # If true, sectionauthor and moduleauthor directives will be shown in the # output. They are ignored by default. -#show_authors = False +# show_authors = False # The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' +pygments_style = "sphinx" # A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] +# modindex_common_prefix = [] # If true, keep warnings as "system message" paragraphs in the built documents. -#keep_warnings = False +# keep_warnings = False # If true, `todo` and `todoList` produce output, else they produce nothing. todo_include_todos = True @@ -115,151 +113,144 @@ # Theme options are theme-specific and customize the look and feel of a theme # further. For a list of options available for each theme, see the # documentation. -#html_theme_options = {} +# html_theme_options = {} # Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] +# html_theme_path = [] # The name for this set of Sphinx documents. If None, it defaults to # " v documentation". -#html_title = None +# html_title = None # A shorter title for the navigation bar. Default is the same as html_title. -#html_short_title = None +# html_short_title = None # The name of an image file (relative to this directory) to place at the top # of the sidebar. -#html_logo = None +# html_logo = None # The name of an image file (relative to this directory) to use as a favicon of # the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 # pixels large. -#html_favicon = None +# html_favicon = None # Add any paths that contain custom static files (such as style sheets) here, # relative to this directory. They are copied after the builtin static files, # so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] +html_static_path = ["_static"] # Add any extra paths that contain custom files (such as robots.txt or # .htaccess) here, relative to this directory. These files are copied # directly to the root of the documentation. -#html_extra_path = [] +# html_extra_path = [] # If not '', a 'Last updated on:' timestamp is inserted at every page bottom, # using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' +# html_last_updated_fmt = '%b %d, %Y' # If true, SmartyPants will be used to convert quotes and dashes to # typographically correct entities. -#html_use_smartypants = True +# html_use_smartypants = True # Custom sidebar templates, maps document names to template names. -#html_sidebars = {} +# html_sidebars = {} # Additional templates that should be rendered to pages, maps page names to # template names. -#html_additional_pages = {} +# html_additional_pages = {} # If false, no module index is generated. -#html_domain_indices = True +# html_domain_indices = True # If false, no index is generated. -#html_use_index = True +# html_use_index = True # If true, the index is split into individual pages for each letter. -#html_split_index = False +# html_split_index = False # If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True +# html_show_sourcelink = True # If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -#html_show_sphinx = True +# html_show_sphinx = True # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -#html_show_copyright = True +# html_show_copyright = True # If true, an OpenSearch description file will be output, and all pages will # contain a tag referring to it. The value of this option must be the # base URL from which the finished HTML is served. -#html_use_opensearch = '' +# html_use_opensearch = '' # This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None +# html_file_suffix = None # Language to be used for generating the HTML full-text search index. # Sphinx supports the following languages: # 'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja' # 'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr' -#html_search_language = 'en' +# html_search_language = 'en' # A dictionary with options for the search language support, empty by default. # Now only 'ja' uses this config value -#html_search_options = {'type': 'default'} +# html_search_options = {'type': 'default'} # The name of a javascript file (relative to the configuration directory) that # implements a search results scorer. If empty, the default will be used. -#html_search_scorer = 'scorer.js' +# html_search_scorer = 'scorer.js' # Output file base name for HTML help builder. -htmlhelp_basename = 'charm4pydoc' +htmlhelp_basename = "charm4pydoc" # -- Options for LaTeX output --------------------------------------------- latex_elements = { -# The paper size ('letterpaper' or 'a4paper'). -#'papersize': 'letterpaper', - -# The font size ('10pt', '11pt' or '12pt'). -#'pointsize': '10pt', - -# Additional stuff for the LaTeX preamble. -#'preamble': '', - -# Latex figure (float) alignment -#'figure_align': 'htbp', + # The paper size ('letterpaper' or 'a4paper'). + #'papersize': 'letterpaper', + # The font size ('10pt', '11pt' or '12pt'). + #'pointsize': '10pt', + # Additional stuff for the LaTeX preamble. + #'preamble': '', + # Latex figure (float) alignment + #'figure_align': 'htbp', } # Grouping the document tree into LaTeX files. List of tuples # (source start file, target name, title, # author, documentclass [howto, manual, or own class]). latex_documents = [ - (master_doc, 'charm4py.tex', 'Charm4py Documentation', - 'Juan Galvez', 'manual'), + (master_doc, "charm4py.tex", "Charm4py Documentation", "Juan Galvez", "manual"), ] # The name of an image file (relative to this directory) to place at the top of # the title page. -#latex_logo = None +# latex_logo = None # For "manual" documents, if this is true, then toplevel headings are parts, # not chapters. -#latex_use_parts = False +# latex_use_parts = False # If true, show page references after internal links. -#latex_show_pagerefs = False +# latex_show_pagerefs = False # If true, show URL addresses after external links. -#latex_show_urls = False +# latex_show_urls = False # Documents to append as an appendix to all manuals. -#latex_appendices = [] +# latex_appendices = [] # If false, no module index is generated. -#latex_domain_indices = True +# latex_domain_indices = True # -- Options for manual page output --------------------------------------- # One entry per manual page. List of tuples # (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'charm4py', 'Charm4py Documentation', - [author], 1) -] +man_pages = [(master_doc, "charm4py", "Charm4py Documentation", [author], 1)] # If true, show URL addresses after external links. -#man_show_urls = False +# man_show_urls = False # -- Options for Texinfo output ------------------------------------------- @@ -268,19 +259,25 @@ # (source start file, target name, title, author, # dir menu entry, description, category) texinfo_documents = [ - (master_doc, 'charm4py', 'Charm4py Documentation', - author, 'charm4py', 'One line description of project.', - 'Miscellaneous'), + ( + master_doc, + "charm4py", + "Charm4py Documentation", + author, + "charm4py", + "One line description of project.", + "Miscellaneous", + ), ] # Documents to append as an appendix to all manuals. -#texinfo_appendices = [] +# texinfo_appendices = [] # If false, no module index is generated. -#texinfo_domain_indices = True +# texinfo_domain_indices = True # How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' +# texinfo_show_urls = 'footnote' # If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False +# texinfo_no_detailmenu = False diff --git a/examples/cannon/cannon.py b/examples/cannon/cannon.py index e2516236..0a068c35 100644 --- a/examples/cannon/cannon.py +++ b/examples/cannon/cannon.py @@ -9,10 +9,12 @@ def njit(func): return func + @njit def matmul(C, A, B): C += A @ B + class SubMatrix(Chare): def __init__(self, subdim_size, charedim, init_done): super().__init__() @@ -22,12 +24,12 @@ def __init__(self, subdim_size, charedim, init_done): self.neighbor_cache = {} self.sub_a = np.ones((subdim_size, subdim_size), dtype=np.float64) - self.sub_a[:,:] = (charedim*self.thisIndex[1]) + self.thisIndex[0] + self.sub_a[:, :] = (charedim * self.thisIndex[1]) + self.thisIndex[0] self.sub_b = np.ones((subdim_size, subdim_size), dtype=np.float64) - self.sub_b[:,:] = (charedim*self.thisIndex[0]) + self.thisIndex[1] + self.sub_b[:, :] = (charedim * self.thisIndex[0]) + self.thisIndex[1] - self.recv_a = np.ndarray((subdim_size,subdim_size), dtype=np.float64) - self.recv_b = np.ndarray((subdim_size,subdim_size), dtype=np.float64) + self.recv_a = np.ndarray((subdim_size, subdim_size), dtype=np.float64) + self.recv_b = np.ndarray((subdim_size, subdim_size), dtype=np.float64) self.sub_c = np.zeros((subdim_size, subdim_size), dtype=np.float64) @@ -40,9 +42,7 @@ def __init__(self, subdim_size, charedim, init_done): def get_neighbor_channel(self, target_idx): if target_idx not in self.neighbor_cache: - self.neighbor_cache[target_idx] = Channel(self, - self.thisProxy[target_idx] - ) + self.neighbor_cache[target_idx] = Channel(self, self.thisProxy[target_idx]) return self.neighbor_cache[target_idx] @coro @@ -74,12 +74,14 @@ def cannons_multiplication(self, mult_done_future): # the communication routines should be optimized so both sends/receives can complete in parallel def shift(self, up_shift, left_shift): - send_target_idx = ((self.thisIndex[0] - up_shift) % self.charedim, - (self.thisIndex[1] - left_shift) % self.charedim - ) - recv_target_idx = ((self.thisIndex[0] + up_shift) % self.charedim, - (self.thisIndex[1] + left_shift) % self.charedim - ) + send_target_idx = ( + (self.thisIndex[0] - up_shift) % self.charedim, + (self.thisIndex[1] - left_shift) % self.charedim, + ) + recv_target_idx = ( + (self.thisIndex[0] + up_shift) % self.charedim, + (self.thisIndex[1] + left_shift) % self.charedim, + ) send_ch = self.get_neighbor_channel(send_target_idx) recv_ch = self.get_neighbor_channel(recv_target_idx) @@ -95,9 +97,10 @@ def shift(self, up_shift, left_shift): def main(args): if len(args) < 3: print(f"USAGE: {args[0]} matrix_dim chare_dim") - print("matrix_dim and chare_dim must be perfect squares " - "where matrix_dim is divisible by chare_dim" - ) + print( + "matrix_dim and chare_dim must be perfect squares " + "where matrix_dim is divisible by chare_dim" + ) charm.exit(1) matrix_dim = int(args[1]) chare_dim = int(args[2]) @@ -111,9 +114,9 @@ def main(args): print(f"Size of each chare's sub-array: {8*(subdim_size**2)/(1024**2)}MiB") init_done = Future() - chares = Array(SubMatrix, (chare_dim, chare_dim), - args=[subdim_size, chare_dim, init_done] - ) + chares = Array( + SubMatrix, (chare_dim, chare_dim), args=[subdim_size, chare_dim, init_done] + ) init_done.get() mult_done_future = Future() @@ -125,4 +128,5 @@ def main(args): print(f"Elapsed time: {tend-tstart}") charm.exit() + charm.start(main) diff --git a/examples/ccs/ccs_server.py b/examples/ccs/ccs_server.py index 5739949c..cde1de09 100644 --- a/examples/ccs/ccs_server.py +++ b/examples/ccs/ccs_server.py @@ -1,13 +1,15 @@ -from charm4py import charm, Chare, Array, Future, Reducer, Group +from charm4py import charm, Chare, Future, Reducer, Group + def handler(msg): print("CCS Ping handler called on " + str(charm.myPe())) - msg = msg.decode('utf-8') - msg = msg.rstrip('\x00') + msg = msg.decode("utf-8") + msg = msg.rstrip("\x00") answer = "Hello to sender " + str(msg) + " from PE " + str(charm.myPe()) + ".\n" - answer_bytes = answer.encode('utf-8') + answer_bytes = answer.encode("utf-8") charm.CcsSendReply(answer_bytes) + class RegisterPerChare(Chare): def register(self, return_future, handler): @@ -15,6 +17,7 @@ def register(self, return_future, handler): charm.CcsRegisterHandler("ping", handler) self.reduce(return_future, Reducer.nop) + def main(args): # No need to initialize converse, because charm.start does this # just register the handler @@ -25,4 +28,4 @@ def main(args): print("CCS Handlers registered . Waiting for net requests...") -charm.start(main) \ No newline at end of file +charm.start(main) diff --git a/examples/cuda/hapi/hapi-cuda-callback.py b/examples/cuda/hapi/hapi-cuda-callback.py index a7887e52..12690f0d 100644 --- a/examples/cuda/hapi/hapi-cuda-callback.py +++ b/examples/cuda/hapi/hapi-cuda-callback.py @@ -3,12 +3,14 @@ import numba.cuda as cuda import numpy as np + @cuda.jit def elementwise_sum_kernel(x_in, x_out): idx = cuda.grid(1) if idx < x_in.shape[0]: x_out[idx] = x_in[idx] + x_in[idx] + def main(args): N = 1_000_000 array_size = (N,) @@ -33,7 +35,9 @@ def main(args): charm.hapiAddCudaCallback(stream_handle, return_fut) return_fut.get() kernel_done_time = time.perf_counter() - print(f"Callback received, kernel finished in {kernel_done_time - start_time:.6f} seconds.") + print( + f"Callback received, kernel finished in {kernel_done_time - start_time:.6f} seconds." + ) B_host = B_gpu.copy_to_host(stream=s) @@ -44,4 +48,5 @@ def main(args): charm.exit() + charm.start(main) diff --git a/examples/cuda/hapi/multi_gpu_callback.py b/examples/cuda/hapi/multi_gpu_callback.py index bc4f4926..fe0e311f 100644 --- a/examples/cuda/hapi/multi_gpu_callback.py +++ b/examples/cuda/hapi/multi_gpu_callback.py @@ -1,43 +1,48 @@ -''' +""" Use one process to launch two torch matmul kernels, each on a separate device A HAPI callback is registered for each kernel which triggers two different methods Must run this program with 2 different gpus -''' +""" from charm4py import charm import torch + def main(args): - N=10000 + N = 10000 if not torch.cuda.is_available(): print("Error: No GPU detected") charm.exit() if torch.cuda.device_count() < 2: - print("Error: fewer than 2 GPUs, only " + str(torch.cuda.device_count()) + " gpus found") + print( + "Error: fewer than 2 GPUs, only " + + str(torch.cuda.device_count()) + + " gpus found" + ) charm.exit() - - cuda0 = torch.device('cuda:0') #first device - cuda1 = torch.device('cuda:1') #second device + + cuda0 = torch.device("cuda:0") # first device + cuda1 = torch.device("cuda:1") # second device stream0 = torch.cuda.Stream(device=cuda0) stream1 = torch.cuda.Stream(device=cuda1) - #allocate tensors on device 0 + # allocate tensors on device 0 with cuda0: - a0 = torch.randn(N,N) - b0 = torch.randn(N,N) + a0 = torch.randn(N, N) + b0 = torch.randn(N, N) c0 = torch.mm(a0, b0) - - #allocate tensors on device 1 + + # allocate tensors on device 1 with cuda1: - a1 = torch.randn(N,N) - b1 = torch.randn(N,N) + a1 = torch.randn(N, N) + b1 = torch.randn(N, N) c1 = torch.mm(a1, b1) - - #create callbacks (should we implement callbacks to entry methods?) + + # create callbacks (should we implement callbacks to entry methods?) future0 = charm.Future() future1 = charm.Future() print("Future 0 id: ", future0.fid) @@ -47,8 +52,9 @@ def main(args): charm.hapiAddCudaCallback(stream1.cuda_stream, future1) for fut_object in charm.iwait(futures): - print('One device kernel complete, id: ', fut_object.fid) + print("One device kernel complete, id: ", fut_object.fid) charm.exit() + charm.start(main) diff --git a/examples/dist-task-scheduler/scheduler.py b/examples/dist-task-scheduler/scheduler.py index 227db11a..45cb679b 100644 --- a/examples/dist-task-scheduler/scheduler.py +++ b/examples/dist-task-scheduler/scheduler.py @@ -4,7 +4,7 @@ class Job(object): - """ This class is mainly for book-keeping (store and manage job state) """ + """This class is mainly for book-keeping (store and manage job state)""" def __init__(self, job_id, func, tasks, callback): self.id = job_id @@ -31,7 +31,7 @@ def nextTask(self): class Scheduler(Chare): - """ The scheduler sends tasks to distributed workers """ + """The scheduler sends tasks to distributed workers""" def __init__(self): # create a Worker on every process, pass them a reference (proxy) to myself @@ -42,8 +42,8 @@ def __init__(self): self.jobs = {} def map_async(self, func, iterable, callback): - """ Start a new parallel map job (apply func to elements in iterable). - The result will be sent back via the provided callback """ + """Start a new parallel map job (apply func to elements in iterable). + The result will be sent back via the provided callback""" self.addJob(func, list(iterable), callback) self.schedule() @@ -64,7 +64,7 @@ def schedule(self): self.workers[free_worker].apply(job.func, task, task_id, job.id) def taskDone(self, worker_id, task_id, job_id, result): - """ Called by workers to tell the scheduler that they are done with a task """ + """Called by workers to tell the scheduler that they are done with a task""" self.free_workers.add(worker_id) job = self.jobs[job_id] job.addResult(task_id, result) @@ -81,7 +81,7 @@ def __init__(self, scheduler): self.scheduler = scheduler def apply(self, func, arg, task_id, job_id): - """ Apply function to argument and send the result to the scheduler """ + """Apply function to argument and send the result to the scheduler""" result = func(arg) self.scheduler.taskDone(self.thisIndex, task_id, job_id, result) @@ -101,7 +101,7 @@ def main(args): scheduler.map_async(square, [1, 2, 3, 4, 5], callback=future1) scheduler.map_async(square, [1, 3, 5, 7, 9], callback=future2) # wait for the two jobs to complete and print the results - print('Final results are:') + print("Final results are:") print(future1.get()) print(future2.get()) exit() diff --git a/examples/fibonacci/fib-numba.py b/examples/fibonacci/fib-numba.py index 602317b2..f1e44f8e 100644 --- a/examples/fibonacci/fib-numba.py +++ b/examples/fibonacci/fib-numba.py @@ -25,7 +25,7 @@ def fib(n): # this will create two tasks which will be sent to distributed workers # (tasks can execute on any PE). map will block here for the result of # fib(n-1) and fib(n-2), which is why we mark fib as a coroutine - return sum(charm.pool.map(fib, [n-1, n-2])) + return sum(charm.pool.map(fib, [n - 1, n - 2])) @numba.jit(nopython=True, cache=False) # numba really speeds up the computation @@ -33,7 +33,7 @@ def fib_seq(n): if n < 2: return n else: - return fib_seq(n-1) + fib_seq(n-2) + return fib_seq(n - 1) + fib_seq(n - 2) class Util(Chare): @@ -43,7 +43,7 @@ def compile(self): def main(args): global GRAINSIZE - print('\nUsage: fib-numba.py [n] [grainsize]') + print("\nUsage: fib-numba.py [n] [grainsize]") n = 40 if len(args) > 1: n = int(args[1]) @@ -52,14 +52,14 @@ def main(args): GRAINSIZE = int(args[2]) GRAINSIZE = max(2, GRAINSIZE) # set GRAINSIZE as a global variable on all processes before starting - charm.thisProxy.updateGlobals({'GRAINSIZE': GRAINSIZE}, awaitable=True).get() + charm.thisProxy.updateGlobals({"GRAINSIZE": GRAINSIZE}, awaitable=True).get() # precompile fib_seq on every process before the actual computation starts, # by calling the function. this helps get consistent benchmark results Group(Util).compile(awaitable=True).get() - print('Calculating fibonacci of N=' + str(n) + ', grainsize=', GRAINSIZE) + print("Calculating fibonacci of N=" + str(n) + ", grainsize=", GRAINSIZE) t0 = time.time() result = fib(n) - print('Result is', result, 'elapsed=', round(time.time() - t0, 3)) + print("Result is", result, "elapsed=", round(time.time() - t0, 3)) exit() diff --git a/examples/fibonacci/fib.py b/examples/fibonacci/fib.py index 9f44a196..db537e24 100644 --- a/examples/fibonacci/fib.py +++ b/examples/fibonacci/fib.py @@ -19,18 +19,18 @@ def fib(n): # this will create two tasks which will be sent to distributed workers # (tasks can execute on any PE). map will block here for the result of # fib(n-1) and fib(n-2), which is why we mark fib as a coroutine - return sum(charm.pool.map(fib, [n-1, n-2])) + return sum(charm.pool.map(fib, [n - 1, n - 2])) def main(args): - print('\nUsage: fib.py [n]') + print("\nUsage: fib.py [n]") n = 12 if len(args) > 1: n = int(args[1]) - print('Calculating fibonacci of N=' + str(n)) + print("Calculating fibonacci of N=" + str(n)) t0 = time.time() result = fib(n) - print('Result is', result, 'elapsed=', round(time.time() - t0, 3)) + print("Result is", result, "elapsed=", round(time.time() - t0, 3)) exit() diff --git a/examples/fibonacci/fibonacci_with_futures.py b/examples/fibonacci/fibonacci_with_futures.py index 519aa7d0..d29453d3 100644 --- a/examples/fibonacci/fibonacci_with_futures.py +++ b/examples/fibonacci/fibonacci_with_futures.py @@ -1,8 +1,10 @@ from charm4py import charm, Chare, Future, coro -#modeled after the charm with futures example in the charm++ textbook + +# modeled after the charm with futures example in the charm++ textbook THRESHOLD = 20 + class Fib(Chare): @coro @@ -33,6 +35,7 @@ def seqFib(self, n): else: return self.seqFib(n - 1) + self.seqFib(n - 2) + @coro def main(args): if len(args) < 2: @@ -54,4 +57,5 @@ def main(args): print("The requested Fibonacci number is:", res) charm.exit() + charm.start(main) diff --git a/examples/hello/array_hello.py b/examples/hello/array_hello.py index 577f1e60..fdcf8b08 100644 --- a/examples/hello/array_hello.py +++ b/examples/hello/array_hello.py @@ -11,17 +11,22 @@ def __init__(self, array_dims): self.array_dims = array_dims def sayHi(self, hello_num): - print('Hi[' + str(hello_num) + '] from element', self.thisIndex, 'on PE', charm.myPe()) - lastIdx = tuple([size-1 for size in self.array_dims]) + print( + "Hi[" + str(hello_num) + "] from element", + self.thisIndex, + "on PE", + charm.myPe(), + ) + lastIdx = tuple([size - 1 for size in self.array_dims]) if self.thisIndex == lastIdx: # this is the last index, we are done - print('All done') + print("All done") exit() else: # send a hello message to the next element (in row-major order) nextIndex = list(self.thisIndex) num_dims = len(self.array_dims) - for i in range(num_dims-1, -1, -1): + for i in range(num_dims - 1, -1, -1): nextIndex[i] = (nextIndex[i] + 1) % self.array_dims[i] if nextIndex[i] != 0: break @@ -29,7 +34,7 @@ def sayHi(self, hello_num): def main(args): - print('\nUsage: array_hello.py [dim1_size dim2_size ...]') + print("\nUsage: array_hello.py [dim1_size dim2_size ...]") array_dims = (2, 2, 2) # default: create a 2 x 2 x 2 chare array if len(args) > 1: array_dims = tuple([int(x) for x in args[1:]]) @@ -37,8 +42,14 @@ def main(args): num_elems = 1 for size in array_dims: num_elems *= size - print('Running Hello on', charm.numPes(), 'processors for', num_elems, - 'elements, array dimensions are', array_dims) + print( + "Running Hello on", + charm.numPes(), + "processors for", + num_elems, + "elements, array dimensions are", + array_dims, + ) # create a chare array of Hello chares, passing the array dimensions to # each element's constructor diff --git a/examples/hello/group_hello.py b/examples/hello/group_hello.py index 8346079c..a609ede4 100644 --- a/examples/hello/group_hello.py +++ b/examples/hello/group_hello.py @@ -8,10 +8,10 @@ class Hello(Chare): def sayHi(self, hello_num): - print('Hi[' + str(hello_num) + '] from element', self.thisIndex) + print("Hi[" + str(hello_num) + "] from element", self.thisIndex) if self.thisIndex == charm.numPes() - 1: # we reached the last element - print('All done') + print("All done") exit() else: # pass the hello message to the next element @@ -19,11 +19,11 @@ def sayHi(self, hello_num): def main(args): - print('\nRunning Hello on', charm.numPes(), 'processors') + print("\nRunning Hello on", charm.numPes(), "processors") # create a Group of Hello chares (there will be one chare per PE) group_proxy = Group(Hello) # send hello message to the first element group_proxy[0].sayHi(17) -charm.start(main) \ No newline at end of file +charm.start(main) diff --git a/examples/hwmon/hwmon.py b/examples/hwmon/hwmon.py index 9d36b7d9..009a5768 100644 --- a/examples/hwmon/hwmon.py +++ b/examples/hwmon/hwmon.py @@ -15,15 +15,15 @@ class Controller(Chare): @coro def start(self, monitors, logfilename=None): - print('\nStarting hardware monitor...') + print("\nStarting hardware monitor...") if logfilename is not None: - self.log = open(logfilename, 'a') + self.log = open(logfilename, "a") else: self.log = sys.stdout self.hosts = monitors.getHostName(ret=True).get() for i, host in enumerate(self.hosts): - print('Monitor', i, 'running on host', host) - print('Going to run for', EXIT_AFTER_SECS, 'secs') + print("Monitor", i, "running on host", host) + print("Going to run for", EXIT_AFTER_SECS, "secs") monitors.start(self.thisProxy) charm.scheduleCallableAfter(self.thisProxy.close, EXIT_AFTER_SECS) @@ -32,7 +32,13 @@ def close(self): exit() def reportAboveThreshold(self, values, from_id): - self.log.write('Host ' + str(self.hosts[from_id]) + ' is running hot: ' + str(values) + '\n') + self.log.write( + "Host " + + str(self.hosts[from_id]) + + " is running hot: " + + str(values) + + "\n" + ) self.log.flush() @@ -55,11 +61,11 @@ def getHostName(self): def read_sensor(self): # note that this depends on specific output format of the sensors # command, which could change in the future. Adapt as needed - lines = subprocess.check_output('sensors').decode().split('\n') + lines = subprocess.check_output("sensors").decode().split("\n") temps = [] for l in lines: fields = l.split() - if len(fields) > 0 and fields[0] == 'Core': + if len(fields) > 0 and fields[0] == "Core": temps.append(float(fields[2][1:-2])) return temps diff --git a/examples/jacobi/jacobi2d.py b/examples/jacobi/jacobi2d.py index 3cdefa07..d3946f67 100644 --- a/examples/jacobi/jacobi2d.py +++ b/examples/jacobi/jacobi2d.py @@ -1,15 +1,19 @@ from charm4py import charm, Chare, Group, Array, Future, coro, Channel, Reducer import time import numpy as np + try: from numba import jit + numbaFound = True except ImportError: numbaFound = False + # create a dummy numba.jit decorator def jit(*args, **kwargs): def deco(func): return func + return deco @@ -26,8 +30,10 @@ def __init__(self, sim_done_future): # store future to notify main function when computation is done self.sim_done_future = sim_done_future # each chare has a 2D block of the global array (the block is a 2D NumPy array) - self.temperature = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64) - self.new_temperature = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64) + self.temperature = np.zeros((blockDimX + 2, blockDimY + 2), dtype=np.float64) + self.new_temperature = np.zeros( + (blockDimX + 2, blockDimY + 2), dtype=np.float64 + ) # determine border conditions, who my neighbors are and establish Channels with them self.leftBound = self.rightBound = self.topBound = self.bottomBound = False @@ -41,67 +47,78 @@ def __init__(self, sim_done_future): self.leftBound = True self.istart += 1 else: - self.left_nb = Channel(self, remote=self.thisProxy[(x-1, y)]) + self.left_nb = Channel(self, remote=self.thisProxy[(x - 1, y)]) self.nbs.append(self.left_nb) if x == num_chare_x - 1: self.rightBound = True self.ifinish -= 1 else: - self.right_nb = Channel(self, remote=self.thisProxy[(x+1, y)]) + self.right_nb = Channel(self, remote=self.thisProxy[(x + 1, y)]) self.nbs.append(self.right_nb) if y == 0: self.topBound = True self.jstart += 1 else: - self.top_nb = Channel(self, remote=self.thisProxy[(x, y-1)]) + self.top_nb = Channel(self, remote=self.thisProxy[(x, y - 1)]) self.nbs.append(self.top_nb) if y == num_chare_y - 1: self.bottomBound = True self.jfinish -= 1 else: - self.bottom_nb = Channel(self, remote=self.thisProxy[(x, y+1)]) + self.bottom_nb = Channel(self, remote=self.thisProxy[(x, y + 1)]) self.nbs.append(self.bottom_nb) self.constrainBC() @coro def run(self): - """ this is the main computation loop """ + """this is the main computation loop""" iteration = 0 converged = False while not converged and iteration < MAX_ITER: # send ghost faces to my neighbors. sends are asynchronous if not self.leftBound: - self.left_nb.send(RIGHT, self.temperature[1, 1:blockDimY+1]) + self.left_nb.send(RIGHT, self.temperature[1, 1 : blockDimY + 1]) if not self.rightBound: - self.right_nb.send(LEFT, self.temperature[blockDimX, 1:blockDimY+1]) + self.right_nb.send(LEFT, self.temperature[blockDimX, 1 : blockDimY + 1]) if not self.topBound: - self.top_nb.send(BOTTOM, self.temperature[1:blockDimX+1, 1]) + self.top_nb.send(BOTTOM, self.temperature[1 : blockDimX + 1, 1]) if not self.bottomBound: - self.bottom_nb.send(TOP, self.temperature[1:blockDimX+1, blockDimY]) + self.bottom_nb.send(TOP, self.temperature[1 : blockDimX + 1, blockDimY]) # receive ghost data from neighbors. iawait iteratively yields # channels as they become ready (have data to receive) for nb in charm.iwait(self.nbs): direction, ghosts = nb.recv() if direction == LEFT: - self.temperature[0, 1:len(ghosts)+1] = ghosts + self.temperature[0, 1 : len(ghosts) + 1] = ghosts elif direction == RIGHT: - self.temperature[blockDimX+1, 1:len(ghosts)+1] = ghosts + self.temperature[blockDimX + 1, 1 : len(ghosts) + 1] = ghosts elif direction == TOP: - self.temperature[1:len(ghosts)+1, 0] = ghosts + self.temperature[1 : len(ghosts) + 1, 0] = ghosts elif direction == BOTTOM: - self.temperature[1:len(ghosts)+1, blockDimY+1] = ghosts + self.temperature[1 : len(ghosts) + 1, blockDimY + 1] = ghosts else: - charm.abort('Invalid direction') - - max_error = check_and_compute(self.temperature, self.new_temperature, - self.istart, self.ifinish, self.jstart, self.jfinish) - self.temperature, self.new_temperature = self.new_temperature, self.temperature - converged = self.allreduce(max_error <= THRESHOLD, Reducer.logical_and).get() + charm.abort("Invalid direction") + + max_error = check_and_compute( + self.temperature, + self.new_temperature, + self.istart, + self.ifinish, + self.jstart, + self.jfinish, + ) + self.temperature, self.new_temperature = ( + self.new_temperature, + self.temperature, + ) + converged = self.allreduce( + max_error <= THRESHOLD, Reducer.logical_and + ).get() iteration += 1 if self.thisIndex == (0, 0): @@ -111,17 +128,17 @@ def run(self): def constrainBC(self): # enforce some boundary conditions if self.topBound: - self.temperature[0:blockDimX+2, 1] = 1.0 - self.new_temperature[0:blockDimX+2, 1] = 1.0 + self.temperature[0 : blockDimX + 2, 1] = 1.0 + self.new_temperature[0 : blockDimX + 2, 1] = 1.0 if self.leftBound: - self.temperature[1, 0:blockDimY+2] = 1.0 - self.new_temperature[1, 0:blockDimY+2] = 1.0 + self.temperature[1, 0 : blockDimY + 2] = 1.0 + self.new_temperature[1, 0 : blockDimY + 2] = 1.0 if self.bottomBound: - self.temperature[0:blockDimX+2, blockDimY] = 1.0 - self.new_temperature[0:blockDimX+2, blockDimY] = 1.0 + self.temperature[0 : blockDimX + 2, blockDimY] = 1.0 + self.new_temperature[0 : blockDimX + 2, blockDimY] = 1.0 if self.rightBound: - self.temperature[blockDimX, 0:blockDimY+2] = 1.0 - self.new_temperature[blockDimX, 0:blockDimY+2] = 1.0 + self.temperature[blockDimX, 0 : blockDimY + 2] = 1.0 + self.new_temperature[blockDimX, 0 : blockDimY + 2] = 1.0 @jit(nopython=True, cache=False) @@ -131,31 +148,35 @@ def check_and_compute(temperature, new_temperature, istart, ifinish, jstart, jfi # when all neighbor values have been received, we update our values and proceed for i in range(istart, ifinish): for j in range(jstart, jfinish): - temperature_ith = (temperature[i,j] - + temperature[i-1,j] + temperature[i+1,j] - + temperature[i,j-1] + temperature[i,j+1]) * 0.2 + temperature_ith = ( + temperature[i, j] + + temperature[i - 1, j] + + temperature[i + 1, j] + + temperature[i, j - 1] + + temperature[i, j + 1] + ) * 0.2 # update relative error - difference = temperature_ith - temperature[i,j] + difference = temperature_ith - temperature[i, j] if difference < 0: difference *= -1.0 if max_error <= difference: max_error = difference - new_temperature[i,j] = temperature_ith + new_temperature[i, j] = temperature_ith return max_error class Util(Chare): def compile(self): - T = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64) - NT = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64) - check_and_compute(T, NT, 1, blockDimX+1, 1, blockDimY+1) + T = np.zeros((blockDimX + 2, blockDimY + 2), dtype=np.float64) + NT = np.zeros((blockDimX + 2, blockDimY + 2), dtype=np.float64) + check_and_compute(T, NT, 1, blockDimX + 1, 1, blockDimY + 1) def main(args): global blockDimX, blockDimY, num_chare_x, num_chare_y if len(args) != 3 and len(args) != 5: - print('\nUsage:\t', args[0], 'array_size block_size') - print('\t', args[0], 'array_size_X array_size_Y block_size_X block_size_Y') + print("\nUsage:\t", args[0], "array_size block_size") + print("\t", args[0], "array_size_X array_size_Y block_size_X block_size_Y") exit() if len(args) == 3: @@ -171,39 +192,65 @@ def main(args): num_chare_y = arrayDimY // blockDimY # set the following global variables on every PE, wait for the call to complete - charm.thisProxy.updateGlobals({'blockDimX': blockDimX, - 'blockDimY': blockDimY, - 'num_chare_x': num_chare_x, - 'num_chare_y': num_chare_y}, - awaitable=True).get() - - print('\nRunning Jacobi on', charm.numPes(), 'processors with', num_chare_x, 'x', num_chare_y, 'chares') - print('Array Dimensions:', arrayDimX, 'x', arrayDimY) - print('Block Dimensions:', blockDimX, 'x', blockDimY) - print('Max iterations:', MAX_ITER) - print('Threshold:', THRESHOLD) + charm.thisProxy.updateGlobals( + { + "blockDimX": blockDimX, + "blockDimY": blockDimY, + "num_chare_x": num_chare_x, + "num_chare_y": num_chare_y, + }, + awaitable=True, + ).get() + + print( + "\nRunning Jacobi on", + charm.numPes(), + "processors with", + num_chare_x, + "x", + num_chare_y, + "chares", + ) + print("Array Dimensions:", arrayDimX, "x", arrayDimY) + print("Block Dimensions:", blockDimX, "x", blockDimY) + print("Max iterations:", MAX_ITER) + print("Threshold:", THRESHOLD) if numbaFound: # wait until Numba functions are compiled on every PE, so we can get consistent benchmark results Group(Util).compile(awaitable=True).get() - print('Numba compilation complete') + print("Numba compilation complete") else: - print('!!WARNING!! Numba not found. Will run without Numba but it will be very slow') + print( + "!!WARNING!! Numba not found. Will run without Numba but it will be very slow" + ) sim_done = Future() # create 2D chare array of Jacobi objects (each chare will hold one block) array = Array(Jacobi, (num_chare_x, num_chare_y), args=[sim_done]) charm.awaitCreation(array) - print('Starting computation') + print("Starting computation") initTime = time.time() array.run() # this is a broadcast total_iterations = sim_done.get() # wait until the computation completes totalTime = time.time() - initTime if total_iterations >= MAX_ITER: - print('Finished due to max iterations', total_iterations, 'total time', round(totalTime, 3), 'seconds') + print( + "Finished due to max iterations", + total_iterations, + "total time", + round(totalTime, 3), + "seconds", + ) else: - print('Finished due to convergence, iterations', total_iterations, 'total time', round(totalTime, 3), 'seconds') + print( + "Finished due to convergence, iterations", + total_iterations, + "total time", + round(totalTime, 3), + "seconds", + ) exit() diff --git a/examples/liveviz/liveviz.py b/examples/liveviz/liveviz.py index c1538f02..730adc88 100644 --- a/examples/liveviz/liveviz.py +++ b/examples/liveviz/liveviz.py @@ -1,36 +1,41 @@ -from charm4py import charm, Chare, Array, Future, Reducer, Group, liveviz, coro +from charm4py import charm, Chare, Array, liveviz import random + class Unit(Chare): - - def __init__(self): - self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)] - - def reqImg(self, request): - self.particles = [] - - for _ in range(300): - x = random.randint(0, 49) - y = random.randint(0, 49) - - color = random.choice(self.colors) - - self.particles.append((x, y, color)) - - data = bytearray(50 * 50 * 3) - - for x, y, (r, g, b) in self.particles: - pixel_index = (y * 50 + x) * 3 - data[pixel_index] = r - data[pixel_index + 1] = g - data[pixel_index + 2] = b - - liveviz.LiveViz.deposit(data, self, self.thisIndex[0]*50, self.thisIndex[1]*50, 50, 50, 800, 800) + + def __init__(self): + self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)] + + def reqImg(self, request): + self.particles = [] + + for _ in range(300): + x = random.randint(0, 49) + y = random.randint(0, 49) + + color = random.choice(self.colors) + + self.particles.append((x, y, color)) + + data = bytearray(50 * 50 * 3) + + for x, y, (r, g, b) in self.particles: + pixel_index = (y * 50 + x) * 3 + data[pixel_index] = r + data[pixel_index + 1] = g + data[pixel_index + 2] = b + + liveviz.LiveViz.deposit( + data, self, self.thisIndex[0] * 50, self.thisIndex[1] * 50, 50, 50, 800, 800 + ) + def main(args): - units = Array(Unit, dims=(16,16)) + units = Array(Unit, dims=(16, 16)) config = liveviz.Config() liveviz.LiveViz.init(config, units.reqImg) print("CCS Handlers registered . Waiting for net requests...") + charm.start(main) diff --git a/examples/liveviz/liveviz_poll.py b/examples/liveviz/liveviz_poll.py index ed1ea873..5b9f4c15 100644 --- a/examples/liveviz/liveviz_poll.py +++ b/examples/liveviz/liveviz_poll.py @@ -1,39 +1,50 @@ -from charm4py import charm, Chare, Array, Future, Reducer, Group, liveviz, coro -import time +from charm4py import charm, Chare, Array, liveviz import random + class Unit(Chare): - - def __init__(self): - self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)] - - def reqImg(self): - for i in range(50): - self.particles = [] - - for _ in range(300): - x = random.randint(0, 49) - y = random.randint(0, 49) - - color = random.choice(self.colors) - - self.particles.append((x, y, color)) - - data = bytearray(50 * 50 * 3) - - for x, y, (r, g, b) in self.particles: - pixel_index = (y * 50 + x) * 3 - data[pixel_index] = r - data[pixel_index + 1] = g - data[pixel_index + 2] = b - - liveviz.LiveViz.deposit(data, self, self.thisIndex[0]*50, self.thisIndex[1]*50, 50, 50, 800, 800) + + def __init__(self): + self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)] + + def reqImg(self): + for i in range(50): + self.particles = [] + + for _ in range(300): + x = random.randint(0, 49) + y = random.randint(0, 49) + + color = random.choice(self.colors) + + self.particles.append((x, y, color)) + + data = bytearray(50 * 50 * 3) + + for x, y, (r, g, b) in self.particles: + pixel_index = (y * 50 + x) * 3 + data[pixel_index] = r + data[pixel_index + 1] = g + data[pixel_index + 2] = b + + liveviz.LiveViz.deposit( + data, + self, + self.thisIndex[0] * 50, + self.thisIndex[1] * 50, + 50, + 50, + 800, + 800, + ) + def main(args): - units = Array(Unit, dims=(16,16)) + units = Array(Unit, dims=(16, 16)) config = liveviz.Config() liveviz.LiveViz.init(config, units.reqImg, poll=True) units.reqImg() print("CCS Handlers registered . Waiting for net requests...") + charm.start(main) diff --git a/examples/miniapps/LeanMD/main.py b/examples/miniapps/LeanMD/main.py index 1b7ed5b6..321c70c9 100644 --- a/examples/miniapps/LeanMD/main.py +++ b/examples/miniapps/LeanMD/main.py @@ -1,4 +1,3 @@ -import array import random import numba import math @@ -6,15 +5,16 @@ import time from charm4py import * + class GlobalDefs: # These need to be member variables because it simplifies broadcasting - def __init__( self ): + def __init__(self): self.BLOCK_SIZE = 512 - self.HYDROGEN_MASS = ( 1.67 * 1e-24 ) # in g - self.VDW_A = ( 1.1328 * 1e-133 ) # in (g m^2/s^2) m^12 - self.VDW_B = ( 2.23224 * 1e-76 ) # (g m^2/s^2) m^6 + self.HYDROGEN_MASS = 1.67 * 1e-24 # in g + self.VDW_A = 1.1328 * 1e-133 # in (g m^2/s^2) m^12 + self.VDW_B = 2.23224 * 1e-76 # (g m^2/s^2) m^6 - self.ENERGY_VAR = (1.0 * 1e-5 ) + self.ENERGY_VAR = 1.0 * 1e-5 # average of next two should be what you want as your atom density # this should comply with the PERDIM parameter; for KAWAY 1 1 1, the maximum number @@ -26,7 +26,7 @@ def __init__( self ): self.PARTICLES_PER_CELL_START = 100 self.PARTICLES_PER_CELL_END = 250 - self.DEFAULT_DELTA = 1 # in femtoseconds + self.DEFAULT_DELTA = 1 # in femtoseconds self.DEFAULT_FIRST_LDB = 20 self.DEFAULT_LDB_PERIOD = 20 @@ -35,25 +35,24 @@ def __init__( self ): self.KAWAY_X = 2 self.KAWAY_Y = 2 self.KAWAY_Z = 1 - self.NBRS_X = (2*self.KAWAY_X+1) - self.NBRS_Y = (2*self.KAWAY_Y+1) - self.NBRS_Z = (2*self.KAWAY_Z+1) - self.NUM_NEIGHBORS = (self.NBRS_X * self.NBRS_Y * self.NBRS_Z) + self.NBRS_X = 2 * self.KAWAY_X + 1 + self.NBRS_Y = 2 * self.KAWAY_Y + 1 + self.NBRS_Z = 2 * self.KAWAY_Z + 1 + self.NUM_NEIGHBORS = self.NBRS_X * self.NBRS_Y * self.NBRS_Z self.CELLARRAY_DIM_X = 3 self.CELLARRAY_DIM_Y = 3 self.CELLARRAY_DIM_Z = 3 - self.PTP_CUT_OFF = 26 # cut off for atom to atom interactions + self.PTP_CUT_OFF = 26 # cut off for atom to atom interactions self.CELL_MARGIN = 4 # constant diff between cutoff and cell size - self.CELL_SIZE_X = (self.PTP_CUT_OFF + self.CELL_MARGIN)//self.KAWAY_X - self.CELL_SIZE_Y = (self.PTP_CUT_OFF + self.CELL_MARGIN)//self.KAWAY_Y - self.CELL_SIZE_Z = (self.PTP_CUT_OFF + self.CELL_MARGIN)//self.KAWAY_Z + self.CELL_SIZE_X = (self.PTP_CUT_OFF + self.CELL_MARGIN) // self.KAWAY_X + self.CELL_SIZE_Y = (self.PTP_CUT_OFF + self.CELL_MARGIN) // self.KAWAY_Y + self.CELL_SIZE_Z = (self.PTP_CUT_OFF + self.CELL_MARGIN) // self.KAWAY_Z self.cellArrayDimX = self.CELLARRAY_DIM_X self.cellArrayDimY = self.CELLARRAY_DIM_Y self.cellArrayDimZ = self.CELLARRAY_DIM_Z - # variables to control initial uniform placement of atoms; # atoms should not be too close at startup for a stable system # PERDIM * GAP should be less than (PTPCUTOFF+CELL_MARGIN) @@ -67,158 +66,197 @@ def __init__( self ): self.MIGRATE_STEPCOUNT = 20 self.DEFAULT_FINALSTEPCOUNT = 1001 - self.MAX_VELOCITY = .1 # in A/fs + self.MAX_VELOCITY = 0.1 # in A/fs self.finalStepCount = self.DEFAULT_FINALSTEPCOUNT self.firstLdbStep = self.DEFAULT_FIRST_LDB self.ldbPeriod = self.DEFAULT_LDB_PERIOD - # Proxies for the different arrays self.cellArray = None self.computeArray = None + def WRAP_X(a): return (a + cellArrayDimX) % cellArrayDimX + + def WRAP_Y(a): return (a + cellArrayDimY) % cellArrayDimY + + def WRAP_Z(a): return (a + cellArrayDimZ) % cellArrayDimZ -@numba.njit( cache = True ) -def velocityCheck( inVelocity: float ) -> float: - if abs( inVelocity ) > MAX_VELOCITY: +@numba.njit(cache=True) +def velocityCheck(inVelocity: float) -> float: + if abs(inVelocity) > MAX_VELOCITY: if inVelocity < 0.0: return -1 * MAX_VELOCITY return MAX_VELOCITY return inVelocity -@numba.njit( cache = True ) -def updateProperties( forces, particle_mass, particle_vel, particle_pos, - energy, stepCount, finalStepCount ): - powTen = 10.0 ** 10 - powTwenty = 10.0 ** -20 +@numba.njit(cache=True) +def updateProperties( + forces, particle_mass, particle_vel, particle_pos, energy, stepCount, finalStepCount +): + powTen = 10.0**10 + powTwenty = 10.0**-20 realTimeDeltaVel = DEFAULT_DELTA * powTwenty for i in range(particle_mass.size): mass = particle_mass[i] # calculate energy only at beginning and end - if (stepCount == 1): - dot = particle_vel[i,0]**2 + particle_vel[i,1]**2 + particle_vel[i,2]**2 - energy[0] += (0.5 * mass * dot * powTen) # in milliJoules - elif (stepCount == finalStepCount): - dot = particle_vel[i,0]**2 + particle_vel[i,1]**2 + particle_vel[i,2]**2 - energy[1] += (0.5 * mass * dot * powTen) + if stepCount == 1: + dot = ( + particle_vel[i, 0] ** 2 + + particle_vel[i, 1] ** 2 + + particle_vel[i, 2] ** 2 + ) + energy[0] += 0.5 * mass * dot * powTen # in milliJoules + elif stepCount == finalStepCount: + dot = ( + particle_vel[i, 0] ** 2 + + particle_vel[i, 1] ** 2 + + particle_vel[i, 2] ** 2 + ) + energy[1] += 0.5 * mass * dot * powTen # apply kinetic equations invMassParticle = 1.0 / mass - #self.particles[i].acc = forces[i] * invMassParticle # in m/sec^2 - #self.particles[i].vel += self.particles[i].acc * realTimeDeltaVel # in A/fs - # in m/sec^2 - particle_vel[i,0] += forces[i,0] * invMassParticle * realTimeDeltaVel # in A/fs - particle_vel[i,1] += forces[i,1] * invMassParticle * realTimeDeltaVel # in A/fs - particle_vel[i,2] += forces[i,2] * invMassParticle * realTimeDeltaVel # in A/fs - - particle_vel[i,0] = velocityCheck(particle_vel[i,0]) - particle_vel[i,1] = velocityCheck(particle_vel[i,1]) - particle_vel[i,2] = velocityCheck(particle_vel[i,2]) - - particle_pos[i,0] += particle_vel[i,0] * DEFAULT_DELTA # in A - particle_pos[i,1] += particle_vel[i,1] * DEFAULT_DELTA # in A - particle_pos[i,2] += particle_vel[i,2] * DEFAULT_DELTA # in A - -class CellMap( ArrayMap ): + # self.particles[i].acc = forces[i] * invMassParticle # in m/sec^2 + # self.particles[i].vel += self.particles[i].acc * realTimeDeltaVel # in A/fs + # in m/sec^2 + particle_vel[i, 0] += ( + forces[i, 0] * invMassParticle * realTimeDeltaVel + ) # in A/fs + particle_vel[i, 1] += ( + forces[i, 1] * invMassParticle * realTimeDeltaVel + ) # in A/fs + particle_vel[i, 2] += ( + forces[i, 2] * invMassParticle * realTimeDeltaVel + ) # in A/fs + + particle_vel[i, 0] = velocityCheck(particle_vel[i, 0]) + particle_vel[i, 1] = velocityCheck(particle_vel[i, 1]) + particle_vel[i, 2] = velocityCheck(particle_vel[i, 2]) + + particle_pos[i, 0] += particle_vel[i, 0] * DEFAULT_DELTA # in A + particle_pos[i, 1] += particle_vel[i, 1] * DEFAULT_DELTA # in A + particle_pos[i, 2] += particle_vel[i, 2] * DEFAULT_DELTA # in A + + +class CellMap(ArrayMap): # group - def __init__( self, cellX, cellY, cellZ ): + def __init__(self, cellX, cellY, cellZ): self.num_x = cellX self.num_y = cellY self.num_z = cellZ self.num_yz = self.num_y * self.num_z - self.ratio = charm.numPes() / ( self.num_x * self.num_yz ) + self.ratio = charm.numPes() / (self.num_x * self.num_yz) + + def procNum(self, index): + patchID = index[2] + index[1] * self.num_z + index[0] * self.num_yz + return int(patchID * self.ratio) - def procNum( self, index ): - patchID = index[ 2 ] + index[ 1 ] * self.num_z + index[ 0 ] * self.num_yz - return int( patchID * self.ratio ) class Particle: - def __init__( self ): + def __init__(self): self.mass = 0.0 - self.position = np.zeros( 3 ) - self.acceleration = np.zeros( 3 ) - self.velocity = np.zeros( 3 ) + self.position = np.zeros(3) + self.acceleration = np.zeros(3) + self.velocity = np.zeros(3) + -class Cell( Chare ): +class Cell(Chare): - def __init__( self, energyFuture ): - self.stepCount :int = 0 - self.mynumParts :int = 0 - self.inbrs :int = NUM_NEIGHBORS + def __init__(self, energyFuture): + self.stepCount: int = 0 + self.mynumParts: int = 0 + self.inbrs: int = NUM_NEIGHBORS self.stepTime: float = 0 self.computesList = [0] * self.inbrs self.neighborChannels = list() - self.updateCount :int = 0 + self.updateCount: int = 0 self.duplicateComputes = None - self.energy = np.zeros(2, dtype = np.float64) + self.energy = np.zeros(2, dtype=np.float64) self.mCastSecProxy = None self.energyFuture = energyFuture - self.myid: int = self.thisIndex[ 2 ] + cellArrayDimZ * \ - ( self.thisIndex[1] + self.thisIndex[0] * cellArrayDimY) + self.myid: int = self.thisIndex[2] + cellArrayDimZ * ( + self.thisIndex[1] + self.thisIndex[0] * cellArrayDimY + ) - num = self.myid * (PARTICLES_PER_CELL_END-PARTICLES_PER_CELL_START) - denom = cellArrayDimX*cellArrayDimY*cellArrayDimZ - self.myNumParts = PARTICLES_PER_CELL_START + ( num // denom ) + num = self.myid * (PARTICLES_PER_CELL_END - PARTICLES_PER_CELL_START) + denom = cellArrayDimX * cellArrayDimY * cellArrayDimZ + self.myNumParts = PARTICLES_PER_CELL_START + (num // denom) - self.particle_mass = np.zeros( self.myNumParts, dtype = np.float64 ) - self.particle_pos = np.zeros( ( self.myNumParts, 3 ), dtype = np.float64 ) - self.particle_vel = np.zeros( ( self.myNumParts, 3 ), dtype = np.float64 ) + self.particle_mass = np.zeros(self.myNumParts, dtype=np.float64) + self.particle_pos = np.zeros((self.myNumParts, 3), dtype=np.float64) + self.particle_vel = np.zeros((self.myNumParts, 3), dtype=np.float64) self.neighborChannels = self.createNeighborChannels() - random.seed( self.myid ) + random.seed(self.myid) - for i in range( self.myNumParts ): - self.particle_mass[ i ] = HYDROGEN_MASS + for i in range(self.myNumParts): + self.particle_mass[i] = HYDROGEN_MASS # uniformly place particles, avoid close distance among them - x = (GAP/2.0) + self.thisIndex[0] * CELL_SIZE_X + ((i*KAWAY_Y*KAWAY_Z)//(PERDIM*PERDIM))*GAP - y = (GAP/2.0) + self.thisIndex[1] * CELL_SIZE_Y + (((i*KAWAY_Z)//PERDIM)%(PERDIM//KAWAY_Y))*GAP - z = (GAP/2.0) + self.thisIndex[2] * CELL_SIZE_Z + (i%(PERDIM//KAWAY_Z))*GAP - self.particle_pos[ i ] = x, y, z - - self.particle_vel[i] = np.array( ( (random.random() - 0.5) * .2 * MAX_VELOCITY, - (random.random() - 0.5) * .2 * MAX_VELOCITY, - (random.random() - 0.5) * .2 * MAX_VELOCITY), - dtype = np.float64 + x = ( + (GAP / 2.0) + + self.thisIndex[0] * CELL_SIZE_X + + ((i * KAWAY_Y * KAWAY_Z) // (PERDIM * PERDIM)) * GAP + ) + y = ( + (GAP / 2.0) + + self.thisIndex[1] * CELL_SIZE_Y + + (((i * KAWAY_Z) // PERDIM) % (PERDIM // KAWAY_Y)) * GAP + ) + z = ( + (GAP / 2.0) + + self.thisIndex[2] * CELL_SIZE_Z + + (i % (PERDIM // KAWAY_Z)) * GAP + ) + self.particle_pos[i] = x, y, z + + self.particle_vel[i] = np.array( + ( + (random.random() - 0.5) * 0.2 * MAX_VELOCITY, + (random.random() - 0.5) * 0.2 * MAX_VELOCITY, + (random.random() - 0.5) * 0.2 * MAX_VELOCITY, + ), + dtype=np.float64, ) - self.energy[ 0 ] = 0 - self.energy[ 1 ] = 0 + self.energy[0] = 0 + self.energy[1] = 0 - def reportDuplicates( self ): + def reportDuplicates(self): for d in self.duplicateComputes: - computeArray[ d ].setDuplicate() + computeArray[d].setDuplicate() - def nbrNumtoNbrIdx( self, num ): + def nbrNumtoNbrIdx(self, num): x1 = num // (NBRS_Y * NBRS_Z) - NBRS_X // 2 y1 = (num % (NBRS_Y * NBRS_Z)) // NBRS_Z - NBRS_Y // 2 z1 = num % NBRS_Z - NBRS_Z // 2 - return ( WRAP_X( self.thisIndex[ 0 ] + x1 ), - WRAP_Y( self.thisIndex[ 1 ] + y1 ), - WRAP_Z( self.thisIndex[ 2 ] + z1 ) + return ( + WRAP_X(self.thisIndex[0] + x1), + WRAP_Y(self.thisIndex[1] + y1), + WRAP_Z(self.thisIndex[2] + z1), ) @coro - def createNeighborChannels( self ): + def createNeighborChannels(self): output = list() - for num in range( self.inbrs ): - nbrIdx = self.nbrNumtoNbrIdx( num ) - output.append( Channel( self, remote = self.thisProxy[ nbrIdx ] ) ) + for num in range(self.inbrs): + nbrIdx = self.nbrNumtoNbrIdx(num) + output.append(Channel(self, remote=self.thisProxy[nbrIdx])) return output - def createComputes( self ): + def createComputes(self): x, y, z = self.thisIndex currPe = charm.myPe() + 1 @@ -226,9 +264,9 @@ def createComputes( self ): dupes = list() seen = set() - for num in range( self.inbrs ): - dx = num // ( NBRS_Y * NBRS_Z ) - NBRS_X // 2 - dy = ( num % ( NBRS_Y * NBRS_Z ) ) // NBRS_Z - NBRS_Y // 2 + for num in range(self.inbrs): + dx = num // (NBRS_Y * NBRS_Z) - NBRS_X // 2 + dy = (num % (NBRS_Y * NBRS_Z)) // NBRS_Z - NBRS_Y // 2 dz = num % NBRS_Z - NBRS_Z // 2 if num >= self.inbrs // 2: @@ -243,9 +281,12 @@ def createComputes( self ): currPe += 1 # CkArrayIndex6D index(px1, py1, pz1, px2, py2, pz2); - index = ( px1, py1, pz1, px2, py2, pz2 ) - computeArray.ckInsert( index, onPE = ( currPe ) % charm.numPes(), - args = [ self.energyFuture ], useAtSync = True + index = (px1, py1, pz1, px2, py2, pz2) + computeArray.ckInsert( + index, + onPE=(currPe) % charm.numPes(), + args=[self.energyFuture], + useAtSync=True, ) self.computesList[num] = index else: @@ -260,179 +301,190 @@ def createComputes( self ): for c in self.computesList: if c in seen: - dupes.append( c ) + dupes.append(c) seen.add(c) self.computesList = list(seen) self.duplicateComputes = dupes - def migrateToCell( self, particlePos ): - x = self.thisIndex[ 0 ] * CELL_SIZE_X + CELL_ORIGIN_X - y = self.thisIndex[ 1 ] * CELL_SIZE_Y + CELL_ORIGIN_Y - z = self.thisIndex[ 2 ] * CELL_SIZE_Z + CELL_ORIGIN_Z + def migrateToCell(self, particlePos): + x = self.thisIndex[0] * CELL_SIZE_X + CELL_ORIGIN_X + y = self.thisIndex[1] * CELL_SIZE_Y + CELL_ORIGIN_Y + z = self.thisIndex[2] * CELL_SIZE_Z + CELL_ORIGIN_Z px = py = pz = 0 - particleXpos = particlePos[ 0 ] - particleYpos = particlePos[ 1 ] - particleZpos = particlePos[ 2 ] + particleXpos = particlePos[0] + particleYpos = particlePos[1] + particleZpos = particlePos[2] - if particleXpos < (x-CELL_SIZE_X): + if particleXpos < (x - CELL_SIZE_X): px = -2 elif particleXpos < x: px = -1 - elif particleXpos > (x+2*CELL_SIZE_X): + elif particleXpos > (x + 2 * CELL_SIZE_X): px = 2 - elif particleXpos > (x+CELL_SIZE_X): + elif particleXpos > (x + CELL_SIZE_X): px = 1 - if particleYpos < (y-CELL_SIZE_Y): + if particleYpos < (y - CELL_SIZE_Y): py = -2 elif particleYpos < y: py = -1 - elif particleYpos > (y+2*CELL_SIZE_Y): + elif particleYpos > (y + 2 * CELL_SIZE_Y): py = 2 - elif particleYpos > (y+CELL_SIZE_Y): + elif particleYpos > (y + CELL_SIZE_Y): py = 1 - if particleZpos < (z-CELL_SIZE_Z): + if particleZpos < (z - CELL_SIZE_Z): pz = -2 elif particleZpos < z: pz = -1 - elif particleZpos > (z+2*CELL_SIZE_Z): + elif particleZpos > (z + 2 * CELL_SIZE_Z): pz = 2 - elif particleZpos > (z+CELL_SIZE_Z): + elif particleZpos > (z + CELL_SIZE_Z): pz = 1 - return ( px, py, pz ) # setting px, py, pz to zero - - def wrapAround( self, particlePos ): - if particlePos[ 0 ] < CELL_ORIGIN_X: - particlePos[ 0 ] += CELL_SIZE_X*cellArrayDimX - if particlePos[ 1 ] < CELL_ORIGIN_Y: - particlePos[ 1 ] += CELL_SIZE_Y*cellArrayDimY - if particlePos[ 2 ] < CELL_ORIGIN_Z: - particlePos[ 2 ] += CELL_SIZE_Z*cellArrayDimZ - - if particlePos[ 0 ] > CELL_ORIGIN_X + CELL_SIZE_X*cellArrayDimX: - particlePos[ 0 ] -= CELL_SIZE_X*cellArrayDimX - if particlePos[ 1 ] > CELL_ORIGIN_Y + CELL_SIZE_Y*cellArrayDimY: - particlePos[ 1 ] -= CELL_SIZE_Y*cellArrayDimY - if particlePos[ 2 ] > CELL_ORIGIN_Z + CELL_SIZE_Z*cellArrayDimZ: - particlePos[ 2 ] -= CELL_SIZE_Z*cellArrayDimZ + return (px, py, pz) # setting px, py, pz to zero + + def wrapAround(self, particlePos): + if particlePos[0] < CELL_ORIGIN_X: + particlePos[0] += CELL_SIZE_X * cellArrayDimX + if particlePos[1] < CELL_ORIGIN_Y: + particlePos[1] += CELL_SIZE_Y * cellArrayDimY + if particlePos[2] < CELL_ORIGIN_Z: + particlePos[2] += CELL_SIZE_Z * cellArrayDimZ + + if particlePos[0] > CELL_ORIGIN_X + CELL_SIZE_X * cellArrayDimX: + particlePos[0] -= CELL_SIZE_X * cellArrayDimX + if particlePos[1] > CELL_ORIGIN_Y + CELL_SIZE_Y * cellArrayDimY: + particlePos[1] -= CELL_SIZE_Y * cellArrayDimY + if particlePos[2] > CELL_ORIGIN_Z + CELL_SIZE_Z * cellArrayDimZ: + particlePos[2] -= CELL_SIZE_Z * cellArrayDimZ return particlePos - def createSection( self ): + def createSection(self): # computeArray is global - self.mCastSecProxy = charm.split( computeArray, 1, elems = [ self.computesList ] )[ 0 ] + self.mCastSecProxy = charm.split(computeArray, 1, elems=[self.computesList])[0] @coro - def migrateParticles( self ): - outgoing = [ [[],[],[]] for _ in range(self.inbrs) ] + def migrateParticles(self): + outgoing = [[[], [], []] for _ in range(self.inbrs)] size = numParts = self.particle_mass.size - for i in range(numParts - 1, -1 -1 ): - x1, y1, z1 = self.migrateToCell( self.particle_pos[ i ] ) - if any( [x1, y1, z1 ] ): - outIndex = (x1+KAWAY_X)*NBRS_Y*NBRS_Z + (y1+KAWAY_Y)*NBRS_Z + (z1+KAWAY_Z) + for i in range(numParts - 1, -1 - 1): + x1, y1, z1 = self.migrateToCell(self.particle_pos[i]) + if any([x1, y1, z1]): + outIndex = ( + (x1 + KAWAY_X) * NBRS_Y * NBRS_Z + + (y1 + KAWAY_Y) * NBRS_Z + + (z1 + KAWAY_Z) + ) outgoing[outIndex][0].append(self.particle_mass[i]) - outgoing[outIndex][1].append(self.wrapAround(self.particle_pos[i].copy())) + outgoing[outIndex][1].append( + self.wrapAround(self.particle_pos[i].copy()) + ) outgoing[outIndex][2].append(self.particle_vel[i].copy()) - self.particle_mass[i] = self.particle_mass[size-1] - self.particle_pos[i] = self.particle_pos[size-1] - self.particle_vel[i] = self.particle_vel[size-1] + self.particle_mass[i] = self.particle_mass[size - 1] + self.particle_pos[i] = self.particle_pos[size - 1] + self.particle_vel[i] = self.particle_vel[size - 1] size -= 1 - if size < numParts: self.particle_mass = self.particle_mass[:size].copy() - self.particle_pos = self.particle_pos[:size].copy() - self.particle_vel = self.particle_vel[:size].copy() - + self.particle_pos = self.particle_pos[:size].copy() + self.particle_vel = self.particle_vel[:size].copy() - for num in range( self.inbrs ): + for num in range(self.inbrs): numOutgoing = len(outgoing[num][0]) if numOutgoing > 0: mass = np.array(outgoing[num][0], dtype=np.float64) - pos = np.concatenate(outgoing[num][1]) - vel = np.concatenate(outgoing[num][2]) - self.neighborChannels[ num ].send(True, mass, pos, vel) + pos = np.concatenate(outgoing[num][1]) + vel = np.concatenate(outgoing[num][2]) + self.neighborChannels[num].send(True, mass, pos, vel) else: - self.neighborChannels[ num ].send(True, None, None, None) + self.neighborChannels[num].send(True, None, None, None) - - def sendPositions( self, forceFuture ): - self.mCastSecProxy.calculateForces( self.mCastSecProxy, - np.array(self.thisIndex), - self.particle_pos, forceFuture + def sendPositions(self, forceFuture): + self.mCastSecProxy.calculateForces( + self.mCastSecProxy, np.array(self.thisIndex), self.particle_pos, forceFuture ) def resumeFromSync(self): - if not any( self.thisIndex ): + if not any(self.thisIndex): stepT = time.time() - print( f'Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step' ) + print(f"Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step") self.stepTime = stepT - self.thisProxy[ self.thisIndex ].run() - + self.thisProxy[self.thisIndex].run() @coro - def run( self ): + def run(self): if self.stepCount == 0: self.reportDuplicates() self.createSection() self.stepCount = 1 # todo: something not quite right here - if not any( self.thisIndex ): + if not any(self.thisIndex): self.stepTime = time.time() - - for self.stepCount in range( self.stepCount, finalStepCount + 1 ): + for self.stepCount in range(self.stepCount, finalStepCount + 1): reduceForceFuture = Future() - self.sendPositions( reduceForceFuture ) + self.sendPositions(reduceForceFuture) forces = reduceForceFuture.get() - updateProperties( forces, self.particle_mass, self.particle_vel, - self.particle_pos, self.energy, self.stepCount, - finalStepCount + updateProperties( + forces, + self.particle_mass, + self.particle_vel, + self.particle_pos, + self.energy, + self.stepCount, + finalStepCount, ) if not self.stepCount % MIGRATE_STEPCOUNT: self.migrateParticles() - for ch in charm.iwait( self.neighborChannels ): - self.receiveParticles( *ch.recv() ) + for ch in charm.iwait(self.neighborChannels): + self.receiveParticles(*ch.recv()) # TODO: Add a check to see if load balancing should be done here - if self.shouldLoadBalance(): + if self.shouldLoadBalance(): self.AtSync() return - if not any( self.thisIndex ): + if not any(self.thisIndex): stepT = time.time() - print( f'Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step' ) + print( + f"Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step" + ) self.stepTime = stepT - self.reduce( self.energyFuture, self.energy, Reducer.sum ) - - def shouldLoadBalance( self ): - return not any( [ self.stepCount <= firstLdbStep, self.stepCount % ldbPeriod, self.stepCount >= finalStepCount ] ) + self.reduce(self.energyFuture, self.energy, Reducer.sum) + + def shouldLoadBalance(self): + return not any( + [ + self.stepCount <= firstLdbStep, + self.stepCount % ldbPeriod, + self.stepCount >= finalStepCount, + ] + ) - def receiveParticles( self, r, mass, poss, vel ): + def receiveParticles(self, r, mass, poss, vel): if mass is not None: total = self.particle_mass.size + mass.size self.particle_mass = np.append(self.particle_mass, mass) - self.particle_pos = np.append(self.particle_pos, pos) - self.particle_vel = np.append(self.particle_vel, vel) + self.particle_pos = np.append(self.particle_pos, pos) + self.particle_vel = np.append(self.particle_vel, vel) self.particle_pos.shape = (total, 3) self.particle_vel.shape = (total, 3) class Physics: - @numba.njit( cache = True ) - def calcPairForces( firstIndex, secondIndex, - firstPos, secondPos, - stepCount, - force1, force2 + @numba.njit(cache=True) + def calcPairForces( + firstIndex, secondIndex, firstPos, secondPos, stepCount, force1, force2 ) -> float: firstLen = firstPos.shape[0] @@ -444,102 +496,106 @@ def calcPairForces( firstIndex, secondIndex, # check for wrap around and adjust locations accordingly diff_0, diff_1, diff_2 = 0.0, 0.0, 0.0 - if abs(firstIndex[0] - secondIndex[0]) > 1 : + if abs(firstIndex[0] - secondIndex[0]) > 1: diff_0 = CELL_SIZE_X * cellArrayDimX - if secondIndex[0] < firstIndex[0] : diff_0 = -1 * diff_0 - if abs(firstIndex[1] - secondIndex[1]) > 1 : + if secondIndex[0] < firstIndex[0]: + diff_0 = -1 * diff_0 + if abs(firstIndex[1] - secondIndex[1]) > 1: diff_1 = CELL_SIZE_Y * cellArrayDimY - if secondIndex[1] < firstIndex[1] : diff_1 = -1 * diff_1 + if secondIndex[1] < firstIndex[1]: + diff_1 = -1 * diff_1 - if abs(firstIndex[2] - secondIndex[2]) > 1 : + if abs(firstIndex[2] - secondIndex[2]) > 1: diff_2 = CELL_SIZE_Z * cellArrayDimZ - if secondIndex[2] < firstIndex[2] : diff_2 = -1 * diff_2 + if secondIndex[2] < firstIndex[2]: + diff_2 = -1 * diff_2 ptpCutOffSqd = PTP_CUT_OFF * PTP_CUT_OFF - powTen = 10.0 ** -10 - powTwenty = 10.0 ** -20 + powTen = 10.0**-10 + powTwenty = 10.0**-20 separation_0, separation_1, separation_2 = 0.0, 0.0, 0.0 for i1 in range(0, firstLen, BLOCK_SIZE): for j1 in range(0, secondLen, BLOCK_SIZE): - for i in range(i1, min(i1+BLOCK_SIZE, firstLen)): - for j in range(j1, min(j1+BLOCK_SIZE, secondLen)): - #separation = firstPos[i] - secondPos[j] - separation_0 = firstPos[i,0] + diff_0 - secondPos[j,0] - separation_1 = firstPos[i,1] + diff_1 - secondPos[j,1] - separation_2 = firstPos[i,2] + diff_2 - secondPos[j,2] + for i in range(i1, min(i1 + BLOCK_SIZE, firstLen)): + for j in range(j1, min(j1 + BLOCK_SIZE, secondLen)): + # separation = firstPos[i] - secondPos[j] + separation_0 = firstPos[i, 0] + diff_0 - secondPos[j, 0] + separation_1 = firstPos[i, 1] + diff_1 - secondPos[j, 1] + separation_2 = firstPos[i, 2] + diff_2 - secondPos[j, 2] rsqd = separation_0**2 + separation_1**2 + separation_2**2 - #rsqd = dot(separation, separation) + # rsqd = dot(separation, separation) if rsqd > 1 and rsqd < ptpCutOffSqd: rsqd = rsqd * powTwenty r = math.sqrt(rsqd) rSix = rsqd * rsqd * rsqd rTwelve = rSix * rSix - f = ( (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix) + f = (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix if doEnergy: - energy += ( VDW_A / rTwelve - VDW_B / rSix) # in milliJoules + energy += ( + VDW_A / rTwelve - VDW_B / rSix + ) # in milliJoules fr = f / rsqd - #force = separation * (fr * powTen) - #force1[i] += force - #force2[j] -= force + # force = separation * (fr * powTen) + # force1[i] += force + # force2[j] -= force force_0 = separation_0 * (fr * powTen) force_1 = separation_1 * (fr * powTen) force_2 = separation_2 * (fr * powTen) - force1[i,0] += force_0 - force1[i,1] += force_1 - force1[i,2] += force_2 - force2[j,0] -= force_0 - force2[j,1] -= force_1 - force2[j,2] -= force_2 + force1[i, 0] += force_0 + force1[i, 1] += force_1 + force1[i, 2] += force_2 + force2[j, 0] -= force_0 + force2[j, 1] -= force_1 + force2[j, 2] -= force_2 return energy - @numba.njit( cache = True ) - def calcInternalForces( firstPos, firstIndex, stepCount, force1 ): + @numba.njit(cache=True) + def calcInternalForces(firstPos, firstIndex, stepCount, force1): firstLen = firstPos.shape[0] energy = 0.0 doEnergy = False - if (stepCount == 1 or stepCount == finalStepCount): + if stepCount == 1 or stepCount == finalStepCount: doEnergy = True ptpCutOffSqd = PTP_CUT_OFF * PTP_CUT_OFF - powTen = 10.0 ** -10 - powTwenty = 10.0 ** -20 + powTen = 10.0**-10 + powTwenty = 10.0**-20 separation_0, separation_1, separation_2 = 0.0, 0.0, 0.0 force_0, force_1, force_2 = 0.0, 0.0, 0.0 - for i in range(firstLen) : - for j in range(i+1, firstLen) : + for i in range(firstLen): + for j in range(i + 1, firstLen): # computing base values - separation_0 = firstPos[i,0] - firstPos[j,0] - separation_1 = firstPos[i,1] - firstPos[j,1] - separation_2 = firstPos[i,2] - firstPos[j,2] + separation_0 = firstPos[i, 0] - firstPos[j, 0] + separation_1 = firstPos[i, 1] - firstPos[j, 1] + separation_2 = firstPos[i, 2] - firstPos[j, 2] rsqd = separation_0**2 + separation_1**2 + separation_2**2 if rsqd > 1 and rsqd < ptpCutOffSqd: rsqd = rsqd * powTwenty r = math.sqrt(rsqd) rSix = rsqd * rsqd * rsqd rTwelve = rSix * rSix - f = ( (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix) - if(doEnergy) : - energy += ( VDW_A / rTwelve - VDW_B / rSix) + f = (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix + if doEnergy: + energy += VDW_A / rTwelve - VDW_B / rSix fr = f / rsqd force_0 = separation_0 * (fr * powTen) force_1 = separation_1 * (fr * powTen) force_2 = separation_2 * (fr * powTen) - force1[i,0] += force_0 - force1[i,1] += force_1 - force1[i,2] += force_2 - force1[j,0] -= force_0 - force1[j,1] -= force_1 - force1[j,2] -= force_2 + force1[i, 0] += force_0 + force1[i, 1] += force_1 + force1[i, 2] += force_2 + force1[j, 0] -= force_0 + force1[j, 1] -= force_1 + force1[j, 2] -= force_2 return energy - -class Compute( Chare ): - def __init__( self, energySumFuture = None ): +class Compute(Chare): + def __init__(self, energySumFuture=None): self.energy = np.zeros(2, dtype=np.float64) self.stepCount = 1 self.energySumFuture = energySumFuture @@ -549,41 +605,51 @@ def __init__( self, energySumFuture = None ): self.isDuplicate = False self._self_compute = None - - def isSelfCompute( self ): + def isSelfCompute(self): if self._self_compute is None: - conds = [ self.thisIndex[ x ] == self.thisIndex[ x + 3 ] for x in range( len( self.thisIndex ) // 2 ) ] - self._self_compute = all( conds ) + conds = [ + self.thisIndex[x] == self.thisIndex[x + 3] + for x in range(len(self.thisIndex) // 2) + ] + self._self_compute = all(conds) return self._self_compute - def setDuplicate(self): self.isDuplicate = True - def calculateForces( self, secProxy, senderCoords, forces, doneFut ): + def calculateForces(self, secProxy, senderCoords, forces, doneFut): if self.isSelfCompute(): - self.selfInteract( secProxy, senderCoords, forces, doneFut ) + self.selfInteract(secProxy, senderCoords, forces, doneFut) self.stepCount += 1 else: - self.dataReceived.append( [ secProxy, senderCoords, forces, doneFut ] ) - assert len( self.dataReceived ) < 3 + self.dataReceived.append([secProxy, senderCoords, forces, doneFut]) + assert len(self.dataReceived) < 3 if self.isDuplicate: # Not all neighbors are unique, we treat the duplicates as # self interactions, but we have to receive both duplicates. - self.selfInteract( secProxy, senderCoords, forces, doneFut ) + self.selfInteract(secProxy, senderCoords, forces, doneFut) self.dataReceived = list() - elif len( self.dataReceived ) == 2: - redProxy1, coords1, forces1, doneFut1 = self.dataReceived[ 0 ] - redProxy2, coords2, forces2, doneFut2 = self.dataReceived[ 1 ] - self.thisProxy[self.thisIndex].interact( redProxy1, coords1, forces1, doneFut1, redProxy2, coords2, forces2, doneFut2 ) + elif len(self.dataReceived) == 2: + redProxy1, coords1, forces1, doneFut1 = self.dataReceived[0] + redProxy2, coords2, forces2, doneFut2 = self.dataReceived[1] + self.thisProxy[self.thisIndex].interact( + redProxy1, + coords1, + forces1, + doneFut1, + redProxy2, + coords2, + forces2, + doneFut2, + ) self.dataReceived = list() self.stepCount += 1 if self.stepCount > finalStepCount: # Everything done, reduction on potential energy - assert len( self.energy ) == 2 - self.reduce( self.energySumFuture, self.energy, Reducer.sum ) + assert len(self.energy) == 2 + self.reduce(self.energySumFuture, self.energy, Reducer.sum) # TODO: Add a check to see if load balancing should be done here if self.stepCount > firstLdbStep and not self.stepCount % ldbPeriod: @@ -594,131 +660,135 @@ def resumeFromSync(self): # Still, this method must exist in the chare pass - def selfInteract( self, mcast1, senderCoords, msg, doneFuture ): + def selfInteract(self, mcast1, senderCoords, msg, doneFuture): energyP: float = 0 - force1 = np.zeros( (len(msg),3), dtype = np.float64 ) + force1 = np.zeros((len(msg), 3), dtype=np.float64) - energyP = Physics.calcInternalForces( msg, senderCoords, self.stepCount, force1 ) + energyP = Physics.calcInternalForces(msg, senderCoords, self.stepCount, force1) if self.stepCount == 1: - self.energy[ 0 ] = energyP + self.energy[0] = energyP elif self.stepCount == finalStepCount: - self.energy[ 1 ] = energyP + self.energy[1] = energyP - self.contribute( force1, Reducer.sum, doneFuture, mcast1 ) + self.contribute(force1, Reducer.sum, doneFuture, mcast1) - def setReductionClient( self, proxy, method ): + def setReductionClient(self, proxy, method): self.reductionClientProxy = proxy self.reductionClientMethod = method - self.reductionClientFn = getattr( proxy, method ) + self.reductionClientFn = getattr(proxy, method) - def interact( self, mcast1, coords1, msg1, doneFut1, - mcast2, coords2, msg2, doneFut2 + def interact( + self, mcast1, coords1, msg1, doneFut1, mcast2, coords2, msg2, doneFut2 ): x1, y1, z1 = coords1 x2, y2, z2 = coords1 doSwap = False - if x2 * cellArrayDimY * cellArrayDimZ + y2 * cellArrayDimZ + z2 < \ - x1 * cellArrayDimY * cellArrayDimZ + y1 * cellArrayDimZ + z1: + if ( + x2 * cellArrayDimY * cellArrayDimZ + y2 * cellArrayDimZ + z2 + < x1 * cellArrayDimY * cellArrayDimZ + y1 * cellArrayDimZ + z1 + ): mcast1, mcast2 = mcast2, mcast1 doneFut1, doneFut2 = doneFut2, doneFut1 doSwap = True # unpacking arguments so they can be sent to the numba calcPairForces - force1 = np.zeros( ( len(msg1), 3 ), dtype = np.float64 ) - force2 = np.zeros( ( len(msg2), 3 ), dtype = np.float64 ) - energyP = Physics.calcPairForces( coords1, coords2, - msg1, - msg2, - self.stepCount, - force1, - force2 + force1 = np.zeros((len(msg1), 3), dtype=np.float64) + force2 = np.zeros((len(msg2), 3), dtype=np.float64) + energyP = Physics.calcPairForces( + coords1, coords2, msg1, msg2, self.stepCount, force1, force2 ) if doSwap: force1, force2 = force2, force1 if self.stepCount == 1: - self.energy[ 0 ] = energyP + self.energy[0] = energyP elif self.stepCount == finalStepCount: - self.energy[ 1 ] = energyP + self.energy[1] = energyP - self.reduce( doneFut1, force1, Reducer.sum, mcast1 ) - self.reduce( doneFut2, force2, Reducer.sum, mcast2 ) + self.reduce(doneFut1, force1, Reducer.sum, mcast1) + self.reduce(doneFut2, force2, Reducer.sum, mcast2) -def energySum( startEnergy, endEnergy ): +def energySum(startEnergy, endEnergy): iE1, fE1 = startEnergy iE2, fE2 = endEnergy - if abs( fE1 + fE2 - iE1 - iE2 ) > ENERGY_VAR: - print( f'Energy value has varied significantly from {iE1+iE2} to {fE1 + fE2}' ) + if abs(fE1 + fE2 - iE1 - iE2) > ENERGY_VAR: + print(f"Energy value has varied significantly from {iE1+iE2} to {fE1 + fE2}") else: - print( 'Energy conservation test passed for maximum allowed variation of ' - f'{ENERGY_VAR} units. \nSIMULATION SUCCESSFUL' + print( + "Energy conservation test passed for maximum allowed variation of " + f"{ENERGY_VAR} units. \nSIMULATION SUCCESSFUL" ) -def main( args ): - print( 'LENNARD JONES MOLECULAR DYNAMICS START UP...' ) - Chare( Compute ) +def main(args): + print("LENNARD JONES MOLECULAR DYNAMICS START UP...") + Chare(Compute) - if len( args ) != 7: - print( 'USAGE python3 -m charmrun.start +p dimX dimY dimZ steps firstLBstep LBPeriod' ) + if len(args) != 7: + print( + "USAGE python3 -m charmrun.start +p dimX dimY dimZ steps firstLBstep LBPeriod" + ) exit() globs = GlobalDefs() - dimX, dimY, dimZ = [ int( x ) for x in args[ 1:4 ] ] + dimX, dimY, dimZ = [int(x) for x in args[1:4]] globs.cellArrayDimX, globs.cellArrayDimY, globs.cellArrayDimZ = dimX, dimY, dimZ - steps = int( args[ 4 ] ) + steps = int(args[4]) globs.finalStepCount = steps - globs.firstLdbStep = int( args[ 5 ] ) - globs.lbPeriod = int( args[ 6 ] ) + globs.firstLdbStep = int(args[5]) + globs.lbPeriod = int(args[6]) - print( f'Cell Array Dimension X: {dimX} Y: {dimY} Z: {dimZ} ' - f'of size {globs.CELL_SIZE_X} {globs.CELL_SIZE_Y} {globs.CELL_SIZE_Z}' + print( + f"Cell Array Dimension X: {dimX} Y: {dimY} Z: {dimZ} " + f"of size {globs.CELL_SIZE_X} {globs.CELL_SIZE_Y} {globs.CELL_SIZE_Z}" ) - print( f'Final Step Count: {steps}' ) - print( f'First LB Step: {globs.firstLdbStep}' ) - print( f'LB Period: {globs.lbPeriod}' ) + print(f"Final Step Count: {steps}") + print(f"First LB Step: {globs.firstLdbStep}") + print(f"LB Period: {globs.lbPeriod}") - charm.thisProxy.updateGlobals( globs.__dict__, awaitable = True ).get() + charm.thisProxy.updateGlobals(globs.__dict__, awaitable=True).get() doneFuture = Future() # 2, one for start energy and one for end energy - energyFuture = Future( 2 ) + energyFuture = Future(2) - cellMap = Group( CellMap, args = ( dimX, dimY, dimZ ) ) - globs.cellArray = Array( Cell, ( dimX, dimY, dimZ ), map = cellMap, args = [ energyFuture ], useAtSync = True ) - globs.computeArray = Array( Compute, ndims = 6 ) - charm.thisProxy.updateGlobals( globs.__dict__, awaitable = True ).get() - globs.cellArray.createComputes( awaitable = True ).get() - charm.thisProxy.updateGlobals( globs.__dict__, awaitable = True ).get() + cellMap = Group(CellMap, args=(dimX, dimY, dimZ)) + globs.cellArray = Array( + Cell, (dimX, dimY, dimZ), map=cellMap, args=[energyFuture], useAtSync=True + ) + globs.computeArray = Array(Compute, ndims=6) + charm.thisProxy.updateGlobals(globs.__dict__, awaitable=True).get() + globs.cellArray.createComputes(awaitable=True).get() + charm.thisProxy.updateGlobals(globs.__dict__, awaitable=True).get() - print( f'Cells: {globs.cellArrayDimY} X {globs.cellArrayDimY} X {globs.cellArrayDimZ} .... created' ) + print( + f"Cells: {globs.cellArrayDimY} X {globs.cellArrayDimY} X {globs.cellArrayDimZ} .... created" + ) computeArray.ckDoneInserting() - nComputes = (NUM_NEIGHBORS//2+1) * \ - cellArrayDimX*cellArrayDimY*cellArrayDimZ - print(f"Computes: {nComputes} .... created\n" ) + nComputes = (NUM_NEIGHBORS // 2 + 1) * cellArrayDimX * cellArrayDimY * cellArrayDimZ + print(f"Computes: {nComputes} .... created\n") print("Starting simulation .... \n\n") startBenchmarkTime = time.time() - cellArray.run() starting, ending = energyFuture.get() - energySum( starting, ending ) + energySum(starting, ending) endBenchmarkTime = time.time() - print( f'Total application time: {endBenchmarkTime - startBenchmarkTime}' ) + print(f"Total application time: {endBenchmarkTime - startBenchmarkTime}") exit() -if __name__ == '__main__': - charm.start( main ) +if __name__ == "__main__": + charm.start(main) diff --git a/examples/miniapps/MiniWeather/constants.py b/examples/miniapps/MiniWeather/constants.py index 8ebfc933..32e59f97 100644 --- a/examples/miniapps/MiniWeather/constants.py +++ b/examples/miniapps/MiniWeather/constants.py @@ -1,37 +1,73 @@ import numpy as np -pi = 3.14159265358979323846264338327; #Pi -grav = 9.8; #Gravitational acceleration (m / s^2) -cp = 1004.; #Specific heat of dry air at constant pressure -cv = 717.; #Specific heat of dry air at constant volume -rd = 287.; #Dry air constant for equation of state (P=rho*rd*T) -p0 = 1.e5; #Standard pressure at the surface in Pascals -C0 = 27.5629410929725921310572974482; #Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) -gamm = 1.40027894002789400278940027894; #gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) -#Define domain and stability-related constants -xlen = 2.e4; #Length of the domain in the x-direction (meters) -zlen = 1.e4; #Length of the domain in the z-direction (meters) -hv_beta = 0.25; #How strong to diffuse the solution: hv_beta \in [0:1] -cfl = 1.50; #"Courant, Friedrichs, Lewy" number (for numerical stability) -max_speed = 450; #Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) -hs = 2; #"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction -sten_size = 4; #Size of the stencil used for interpolation +pi = 3.14159265358979323846264338327 +# Pi +grav = 9.8 +# Gravitational acceleration (m / s^2) +cp = 1004.0 +# Specific heat of dry air at constant pressure +cv = 717.0 +# Specific heat of dry air at constant volume +rd = 287.0 +# Dry air constant for equation of state (P=rho*rd*T) +p0 = 1.0e5 +# Standard pressure at the surface in Pascals +C0 = 27.5629410929725921310572974482 +# Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma) +gamm = 1.40027894002789400278940027894 +# gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much) +# Define domain and stability-related constants +xlen = 2.0e4 +# Length of the domain in the x-direction (meters) +zlen = 1.0e4 +# Length of the domain in the z-direction (meters) +hv_beta = 0.25 +# How strong to diffuse the solution: hv_beta \in [0:1] +cfl = 1.50 +# "Courant, Friedrichs, Lewy" number (for numerical stability) +max_speed = 450 +# Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec) +hs = 2 +# "Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction +sten_size = 4 +# Size of the stencil used for interpolation # Parameters for indexing and flags -NUM_VARS = 4; #Number of fluid state variables -ID_DENS = 0; #index for density ("rho") -ID_UMOM = 1; #index for momentum in the x-direction ("rho * u") -ID_WMOM = 2; #index for momentum in the z-direction ("rho * w") -ID_RHOT = 3; #index for density * potential temperature ("rho * theta") -DIR_X = 1; #Integer constant to express that this operation is in the x-direction -DIR_Z = 2; #Integer constant to express that this operation is in the z-direction -DATA_SPEC_COLLISION = 1; -DATA_SPEC_THERMAL = 2; -DATA_SPEC_MOUNTAIN = 3; -DATA_SPEC_TURBULENCE = 4; -DATA_SPEC_DENSITY_CURRENT = 5; -DATA_SPEC_INJECTION = 6; +NUM_VARS = 4 +# Number of fluid state variables +ID_DENS = 0 +# index for density ("rho") +ID_UMOM = 1 +# index for momentum in the x-direction ("rho * u") +ID_WMOM = 2 +# index for momentum in the z-direction ("rho * w") +ID_RHOT = 3 +# index for density * potential temperature ("rho * theta") +DIR_X = 1 +# Integer constant to express that this operation is in the x-direction +DIR_Z = 2 +# Integer constant to express that this operation is in the z-direction +DATA_SPEC_COLLISION = 1 +DATA_SPEC_THERMAL = 2 +DATA_SPEC_MOUNTAIN = 3 +DATA_SPEC_TURBULENCE = 4 +DATA_SPEC_DENSITY_CURRENT = 5 +DATA_SPEC_INJECTION = 6 -nqpoints = 3; -qpoints = np.array([0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0], dtype=np.float64) -qweights = np.array([0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0], dtype=np.float64) \ No newline at end of file +nqpoints = 3 +qpoints = np.array( + [ + 0.112701665379258311482073460022e0, + 0.500000000000000000000000000000e0, + 0.887298334620741688517926539980e0, + ], + dtype=np.float64, +) +qweights = np.array( + [ + 0.277777777777777777777777777779e0, + 0.444444444444444444444444444444e0, + 0.277777777777777777777777777779e0, + ], + dtype=np.float64, +) diff --git a/examples/miniapps/MiniWeather/create_visualization.py b/examples/miniapps/MiniWeather/create_visualization.py index 51738086..890b6dfa 100644 --- a/examples/miniapps/MiniWeather/create_visualization.py +++ b/examples/miniapps/MiniWeather/create_visualization.py @@ -6,6 +6,7 @@ import glob import re + def create_gif(input_dir, output_gif_filename, qoi_index): """ Creates a GIF from .npz simulation output files from multiple chares. @@ -15,176 +16,231 @@ def create_gif(input_dir, output_gif_filename, qoi_index): output_gif_filename (str): Name of the output GIF file. qoi_index (int): Index of the Quantity of Interest to visualize. """ - + search_pattern = os.path.join(input_dir, "data_iter_*_chare_*.npz") all_npz_files = sorted(glob.glob(search_pattern)) if not all_npz_files: - print(f"No .npz files found in {input_dir} matching the pattern {search_pattern}") + print( + f"No .npz files found in {input_dir} matching the pattern {search_pattern}" + ) return print(f"Found {len(all_npz_files)} total chare .npz files to process.") - iteration_files_metadata = {} + iteration_files_metadata = {} filename_pattern = re.compile(r"data_iter_(\d+)_chare_(\d+)_(\d+)\.npz") for file_path in all_npz_files: basename = os.path.basename(file_path) match = filename_pattern.match(basename) if not match: - print(f"Warning: Filename {basename} does not match expected pattern data_iter_XXXXXX_chare_YYY_ZZZ.npz. Skipping.") + print( + f"Warning: Filename {basename} does not match expected pattern data_iter_XXXXXX_chare_YYY_ZZZ.npz. Skipping." + ) continue - + iter_num = int(match.group(1)) try: with np.load(file_path) as data_archive: - required_keys = ['state', 'etime', 'chare_nx', 'chare_i_beg', 'chare_nz', 'chare_k_beg'] + required_keys = [ + "state", + "etime", + "chare_nx", + "chare_i_beg", + "chare_nz", + "chare_k_beg", + ] if not all(key in data_archive for key in required_keys): - print(f"Warning: File {file_path} is missing one or more required keys ({', '.join(required_keys)}). Skipping.") + print( + f"Warning: File {file_path} is missing one or more required keys ({', '.join(required_keys)}). Skipping." + ) continue meta = { - 'path': file_path, - 'etime': float(data_archive['etime']), - 'chare_nx': int(data_archive['chare_nx']), - 'chare_i_beg': int(data_archive['chare_i_beg']), - 'chare_nz': int(data_archive['chare_nz']), - 'chare_k_beg': int(data_archive['chare_k_beg']) + "path": file_path, + "etime": float(data_archive["etime"]), + "chare_nx": int(data_archive["chare_nx"]), + "chare_i_beg": int(data_archive["chare_i_beg"]), + "chare_nz": int(data_archive["chare_nz"]), + "chare_k_beg": int(data_archive["chare_k_beg"]), } - + if iter_num not in iteration_files_metadata: iteration_files_metadata[iter_num] = [] iteration_files_metadata[iter_num].append(meta) except Exception as e: print(f"Could not load metadata from {file_path}: {e}") continue - + if not iteration_files_metadata: print("No valid iteration data could be processed from file metadata.") return sorted_iter_nums = sorted(iteration_files_metadata.keys()) - - reconstructed_frames_info = [] + + reconstructed_frames_info = [] num_vars_global = None print("Reconstructing data for each iteration...") for iter_idx, iter_num in enumerate(sorted_iter_nums): chare_metas_for_iter = iteration_files_metadata[iter_num] - if not chare_metas_for_iter: continue + if not chare_metas_for_iter: + continue current_global_nx = 0 current_global_nz = 0 - sim_time_for_iter = chare_metas_for_iter[0]['etime'] - + sim_time_for_iter = chare_metas_for_iter[0]["etime"] + temp_chare_data_for_iter = [] valid_iter = True for chare_meta in chare_metas_for_iter: - current_global_nx = max(current_global_nx, chare_meta['chare_i_beg'] + chare_meta['chare_nx']) - current_global_nz = max(current_global_nz, chare_meta['chare_k_beg'] + chare_meta['chare_nz']) - + current_global_nx = max( + current_global_nx, chare_meta["chare_i_beg"] + chare_meta["chare_nx"] + ) + current_global_nz = max( + current_global_nz, chare_meta["chare_k_beg"] + chare_meta["chare_nz"] + ) + try: - with np.load(chare_meta['path']) as data_archive: - state_data_chare = data_archive['state'] - + with np.load(chare_meta["path"]) as data_archive: + state_data_chare = data_archive["state"] + if num_vars_global is None: num_vars_global = state_data_chare.shape[0] if qoi_index >= num_vars_global: - print(f"Error: QoI index {qoi_index} is out of bounds for data (num_vars={num_vars_global}). Max valid QoI index is {num_vars_global - 1}.") - return # Critical error, stop processing + print( + f"Error: QoI index {qoi_index} is out of bounds for data (num_vars={num_vars_global}). Max valid QoI index is {num_vars_global - 1}." + ) + return # Critical error, stop processing elif state_data_chare.shape[0] != num_vars_global: - print(f"Warning: Inconsistent number of variables in {chare_meta['path']} ({state_data_chare.shape[0]} vs {num_vars_global}). Skipping iteration {iter_num}.") + print( + f"Warning: Inconsistent number of variables in {chare_meta['path']} ({state_data_chare.shape[0]} vs {num_vars_global}). Skipping iteration {iter_num}." + ) valid_iter = False - break - - temp_chare_data_for_iter.append({**chare_meta, 'state': state_data_chare}) + break + + temp_chare_data_for_iter.append( + {**chare_meta, "state": state_data_chare} + ) except Exception as e: - print(f"Could not load state from {chare_meta['path']} for iter {iter_num}: {e}") + print( + f"Could not load state from {chare_meta['path']} for iter {iter_num}: {e}" + ) valid_iter = False break - + if not valid_iter or not temp_chare_data_for_iter: - print(f"Warning: Skipping iteration {iter_num} due to data loading issues or inconsistencies.") + print( + f"Warning: Skipping iteration {iter_num} due to data loading issues or inconsistencies." + ) continue - - if num_vars_global is None: # Should be set if at least one chare was processed - print(f"Warning: Number of variables could not be determined for iteration {iter_num}. Skipping.") + + if num_vars_global is None: # Should be set if at least one chare was processed + print( + f"Warning: Number of variables could not be determined for iteration {iter_num}. Skipping." + ) continue - full_state_np = np.zeros((num_vars_global, current_global_nz, current_global_nx), dtype=np.float64) - + full_state_np = np.zeros( + (num_vars_global, current_global_nz, current_global_nx), dtype=np.float64 + ) + for data_loaded in temp_chare_data_for_iter: - s = data_loaded['state'] - i_beg, i_len = data_loaded['chare_i_beg'], data_loaded['chare_nx'] - k_beg, k_len = data_loaded['chare_k_beg'], data_loaded['chare_nz'] - full_state_np[:, k_beg:k_beg+k_len, i_beg:i_beg+i_len] = s - + s = data_loaded["state"] + i_beg, i_len = data_loaded["chare_i_beg"], data_loaded["chare_nx"] + k_beg, k_len = data_loaded["chare_k_beg"], data_loaded["chare_nz"] + full_state_np[:, k_beg : k_beg + k_len, i_beg : i_beg + i_len] = s + qoi_slice = full_state_np[qoi_index, :, :] - reconstructed_frames_info.append({ - 'iter_num': iter_num, - 'sim_time': sim_time_for_iter, - 'qoi_data': qoi_slice - }) - if (iter_idx + 1) % 10 == 0 or (iter_idx + 1) == len(sorted_iter_nums) or len(sorted_iter_nums) < 10 : - print(f" Reconstructed data for iteration {iter_num} ({iter_idx+1}/{len(sorted_iter_nums)})") + reconstructed_frames_info.append( + {"iter_num": iter_num, "sim_time": sim_time_for_iter, "qoi_data": qoi_slice} + ) + if ( + (iter_idx + 1) % 10 == 0 + or (iter_idx + 1) == len(sorted_iter_nums) + or len(sorted_iter_nums) < 10 + ): + print( + f" Reconstructed data for iteration {iter_num} ({iter_idx+1}/{len(sorted_iter_nums)})" + ) if not reconstructed_frames_info: print("No simulation frames could be reconstructed. GIF creation aborted.") return - + vmin, vmax = None, None print("Determining color scale from reconstructed data...") for i, frame_info in enumerate(reconstructed_frames_info): - qoi_data = frame_info['qoi_data'] + qoi_data = frame_info["qoi_data"] if i == 0: vmin = np.min(qoi_data) vmax = np.max(qoi_data) else: vmin = min(vmin, np.min(qoi_data)) vmax = max(vmax, np.max(qoi_data)) - + if vmin is None or vmax is None: - print("Could not determine color scale from reconstructed data. No valid data files processed or QoI data was empty.") + print( + "Could not determine color scale from reconstructed data. No valid data files processed or QoI data was empty." + ) return - + print(f"Color scale determined: vmin={vmin:.2e}, vmax={vmax:.2e}") images = [] print("Generating images for GIF...") for i, frame_info in enumerate(reconstructed_frames_info): - if (i + 1) % 10 == 0 or (i + 1) == len(reconstructed_frames_info) or len(reconstructed_frames_info) < 10: - print(f"Processing frame {i+1}/{len(reconstructed_frames_info)} for iter {frame_info['iter_num']}") - - qoi_data_to_plot = frame_info['qoi_data'] - sim_time = frame_info['sim_time'] - iter_num_for_title = frame_info['iter_num'] + if ( + (i + 1) % 10 == 0 + or (i + 1) == len(reconstructed_frames_info) + or len(reconstructed_frames_info) < 10 + ): + print( + f"Processing frame {i+1}/{len(reconstructed_frames_info)} for iter {frame_info['iter_num']}" + ) + + qoi_data_to_plot = frame_info["qoi_data"] + sim_time = frame_info["sim_time"] + iter_num_for_title = frame_info["iter_num"] nz_dim, nx_dim = qoi_data_to_plot.shape aspect_ratio = nx_dim / nz_dim if nz_dim > 0 else 1.0 - base_fig_height = 5 + base_fig_height = 5 fig_width = base_fig_height * aspect_ratio - max_fig_width = 10 + max_fig_width = 10 if fig_width > max_fig_width: fig_width = max_fig_width - base_fig_height = fig_width / aspect_ratio if aspect_ratio > 0 else base_fig_height + base_fig_height = ( + fig_width / aspect_ratio if aspect_ratio > 0 else base_fig_height + ) fig, ax = plt.subplots(figsize=(fig_width, base_fig_height)) - im = ax.imshow(qoi_data_to_plot, aspect='auto', origin='lower', cmap='viridis', vmin=vmin, vmax=vmax) + im = ax.imshow( + qoi_data_to_plot, + aspect="auto", + origin="lower", + cmap="viridis", + vmin=vmin, + vmax=vmax, + ) plt.colorbar(im, ax=ax, label=f"QoI {qoi_index}") - - ax.set_title(f"Sim Time: {sim_time:.3f}s (Iter: {iter_num_for_title}) - QoI {qoi_index}") + + ax.set_title( + f"Sim Time: {sim_time:.3f}s (Iter: {iter_num_for_title}) - QoI {qoi_index}" + ) ax.set_xlabel("Global X-index") ax.set_ylabel("Global Z-index") fig.canvas.draw() - buf = fig.canvas.buffer_rgba() + buf = fig.canvas.buffer_rgba() image_rgba = np.frombuffer(buf, dtype=np.uint8) - canvas_width, canvas_height = fig.canvas.get_width_height() - image_rgba = image_rgba.reshape(canvas_height, canvas_width, 4) - images.append(image_rgba[:, :, :3]) + canvas_width, canvas_height = fig.canvas.get_width_height() + image_rgba = image_rgba.reshape(canvas_height, canvas_width, 4) + images.append(image_rgba[:, :, :3]) plt.close(fig) @@ -199,16 +255,33 @@ def create_gif(input_dir, output_gif_filename, qoi_index): except Exception as e: print(f"Error saving GIF: {e}") + if __name__ == "__main__": - parser = argparse.ArgumentParser(description="Create a GIF from MiniWeather multi-chare simulation output .npz files.") - parser.add_argument("input_dir", type=str, help="Directory containing the .npz simulation output files (e.g., data_iter_*_chare_*.npz).") - parser.add_argument("--out", type=str, default="simulation_qoi0.gif", help="Output GIF filename (default: simulation_qoi0.gif).") - parser.add_argument("--qoi", type=int, default=0, help="Index of the Quantity of Interest to visualize (default: 0, e.g., density).") - + parser = argparse.ArgumentParser( + description="Create a GIF from MiniWeather multi-chare simulation output .npz files." + ) + parser.add_argument( + "input_dir", + type=str, + help="Directory containing the .npz simulation output files (e.g., data_iter_*_chare_*.npz).", + ) + parser.add_argument( + "--out", + type=str, + default="simulation_qoi0.gif", + help="Output GIF filename (default: simulation_qoi0.gif).", + ) + parser.add_argument( + "--qoi", + type=int, + default=0, + help="Index of the Quantity of Interest to visualize (default: 0, e.g., density).", + ) + args = parser.parse_args() output_filename = args.out - if args.qoi != 0 and args.out == "simulation_qoi0.gif": + if args.qoi != 0 and args.out == "simulation_qoi0.gif": output_filename = f"simulation_qoi{args.qoi}.gif" - - create_gif(args.input_dir, output_filename, args.qoi) \ No newline at end of file + + create_gif(args.input_dir, output_filename, args.qoi) diff --git a/examples/miniapps/MiniWeather/kernels.py b/examples/miniapps/MiniWeather/kernels.py index bc4c7ad9..1d3d850a 100644 --- a/examples/miniapps/MiniWeather/kernels.py +++ b/examples/miniapps/MiniWeather/kernels.py @@ -12,16 +12,17 @@ def hydro_const_theta(z): z is the input coordinate Returns r and t, the background hydrostatic density and potential temperature """ - theta0 = 300. # Background potential temperature - exner0 = 1. # Surface-level Exner pressure + theta0 = 300.0 # Background potential temperature + exner0 = 1.0 # Surface-level Exner pressure # Establish hydrostatic balance first using Exner pressure - t = theta0 # Potential Temperature at z - exner = exner0 - grav * z / (cp * theta0) # Exner pressure at z - p = p0 * (exner**(cp/rd)) # Pressure at z - rt = (p / C0)**(1. / gamm) # rho*theta at z - r = rt / t # Density at z + t = theta0 # Potential Temperature at z + exner = exner0 - grav * z / (cp * theta0) # Exner pressure at z + p = p0 * (exner ** (cp / rd)) # Pressure at z + rt = (p / C0) ** (1.0 / gamm) # rho*theta at z + r = rt / t # Density at z return r, t + @numba.jit(nopython=True) def hydro_const_bvfreq(z, bv_freq0): """ @@ -30,15 +31,18 @@ def hydro_const_bvfreq(z, bv_freq0): bv_freq0 is the constant Brunt-Vaisala frequency Returns r and t, the background hydrostatic density and potential temperature """ - theta0 = 300. # Background potential temperature - exner0 = 1. # Surface-level Exner pressure - t = theta0 * np.exp( bv_freq0*bv_freq0 / grav * z ) # Pot temp at z - exner = exner0 - grav*grav / (cp * bv_freq0*bv_freq0) * (t - theta0) / (t * theta0) # Exner pressure at z - p = p0 * (exner**(cp/rd)) # Pressure at z - rt = (p / C0)**(1. / gamm) # rho*theta at z - r = rt / t # Density at z + theta0 = 300.0 # Background potential temperature + exner0 = 1.0 # Surface-level Exner pressure + t = theta0 * np.exp(bv_freq0 * bv_freq0 / grav * z) # Pot temp at z + exner = exner0 - grav * grav / (cp * bv_freq0 * bv_freq0) * (t - theta0) / ( + t * theta0 + ) # Exner pressure at z + p = p0 * (exner ** (cp / rd)) # Pressure at z + rt = (p / C0) ** (1.0 / gamm) # rho*theta at z + r = rt / t # Density at z return r, t + @numba.jit(nopython=True) def sample_ellipse_cosine(x, z, amp, x0, z0, xrad, zrad): """ @@ -48,11 +52,12 @@ def sample_ellipse_cosine(x, z, amp, x0, z0, xrad, zrad): Returns a double. """ # Compute distance from bubble center - dist = np.sqrt( ((x-x0)/xrad)**2 + ((z-z0)/zrad)**2 ) * math.pi / 2. - if dist <= math.pi / 2.: - return amp * (np.cos(dist)**2.) + dist = np.sqrt(((x - x0) / xrad) ** 2 + ((z - z0) / zrad) ** 2) * math.pi / 2.0 + if dist <= math.pi / 2.0: + return amp * (np.cos(dist) ** 2.0) else: - return 0. + return 0.0 + @numba.jit(nopython=True) def injection(x, z): @@ -62,12 +67,13 @@ def injection(x, z): Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature) """ hr, ht = hydro_const_theta(z) - r = 0. - t = 0. - u = 0. - w = 0. + r = 0.0 + t = 0.0 + u = 0.0 + w = 0.0 return r, u, w, t, hr, ht + @numba.jit(nopython=True) def density_current(x, z): """ @@ -76,13 +82,14 @@ def density_current(x, z): Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature) """ hr, ht = hydro_const_theta(z) - r = 0. - t = 0. - u = 0. - w = 0. - t = t + sample_ellipse_cosine(x,z,-20. ,xlen/2,5000.,4000.,2000.) + r = 0.0 + t = 0.0 + u = 0.0 + w = 0.0 + t = t + sample_ellipse_cosine(x, z, -20.0, xlen / 2, 5000.0, 4000.0, 2000.0) return r, u, w, t, hr, ht + @numba.jit(nopython=True) def turbulence(x, z): """ @@ -90,29 +97,31 @@ def turbulence(x, z): Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature) """ hr, ht = hydro_const_theta(z) - r = 0. - t = 0. - u = 0. - w = 0. + r = 0.0 + t = 0.0 + u = 0.0 + w = 0.0 # call random_number(u); # call random_number(w) # u = (u_rand - 0.5) * 20. # w = (w_rand - 0.5) * 20. return r, u, w, t, hr, ht + @numba.jit(nopython=True) def mountain_waves(x, z): """ x and z are input coordinates at which to sample Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature) """ - hr, ht = hydro_const_bvfreq(z,0.02) - r = 0. - t = 0. - u = 15. - w = 0. + hr, ht = hydro_const_bvfreq(z, 0.02) + r = 0.0 + t = 0.0 + u = 15.0 + w = 0.0 return r, u, w, t, hr, ht + @numba.jit(nopython=True) def thermal(x, z): """ @@ -121,13 +130,14 @@ def thermal(x, z): Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature) """ hr, ht = hydro_const_theta(z) - r = 0. - t = 0. - u = 0. - w = 0. - t = t + sample_ellipse_cosine(x,z, 3. ,xlen/2,2000.,2000.,2000.) + r = 0.0 + t = 0.0 + u = 0.0 + w = 0.0 + t = t + sample_ellipse_cosine(x, z, 3.0, xlen / 2, 2000.0, 2000.0, 2000.0) return r, u, w, t, hr, ht + @numba.jit(nopython=True) def collision(x, z): """ @@ -136,22 +146,26 @@ def collision(x, z): Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature) """ hr, ht = hydro_const_theta(z) - r = 0. - t = 0. - u = 0. - w = 0. - t = t + sample_ellipse_cosine(x,z, 20.,xlen/2,2000.,2000.,2000.) - t = t + sample_ellipse_cosine(x,z,-20.,xlen/2,8000.,2000.,2000.) + r = 0.0 + t = 0.0 + u = 0.0 + w = 0.0 + t = t + sample_ellipse_cosine(x, z, 20.0, xlen / 2, 2000.0, 2000.0, 2000.0) + t = t + sample_ellipse_cosine(x, z, -20.0, xlen / 2, 8000.0, 2000.0, 2000.0) return r, u, w, t, hr, ht + # End of CPU JIT functions #################################################################################### # CUDA GPU KERNELS #################################################################################### + @cuda.jit -def compute_flux_x_kernel(state, flux, hy_dens_cell, hy_dens_theta_cell, hv_coef, nx, nz, hs): +def compute_flux_x_kernel( + state, flux, hy_dens_cell, hy_dens_theta_cell, hv_coef, nx, nz, hs +): k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x @@ -163,21 +177,29 @@ def compute_flux_x_kernel(state, flux, hy_dens_cell, hy_dens_theta_cell, hv_coef for ll in range(NUM_VARS): for s in range(sten_size): stencil[s] = state[ll, k_idx + hs, i_idx + s] - - vals[ll] = -stencil[0]/12 + 7*stencil[1]/12 + 7*stencil[2]/12 - stencil[3]/12 - d3_vals[ll] = -stencil[0] + 3*stencil[1] - 3*stencil[2] + stencil[3] + + vals[ll] = ( + -stencil[0] / 12 + + 7 * stencil[1] / 12 + + 7 * stencil[2] / 12 + - stencil[3] / 12 + ) + d3_vals[ll] = -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3] r_val = vals[ID_DENS] + hy_dens_cell[k_idx + hs] u_val = vals[ID_UMOM] / r_val w_val = vals[ID_WMOM] / r_val t_val = (vals[ID_RHOT] + hy_dens_theta_cell[k_idx + hs]) / r_val - p_val = C0 * (r_val * t_val)**gamm + p_val = C0 * (r_val * t_val) ** gamm flux[ID_DENS, k_idx, i_idx] = r_val * u_val - hv_coef * d3_vals[ID_DENS] - flux[ID_UMOM, k_idx, i_idx] = r_val * u_val * u_val + p_val - hv_coef * d3_vals[ID_UMOM] + flux[ID_UMOM, k_idx, i_idx] = ( + r_val * u_val * u_val + p_val - hv_coef * d3_vals[ID_UMOM] + ) flux[ID_WMOM, k_idx, i_idx] = r_val * u_val * w_val - hv_coef * d3_vals[ID_WMOM] flux[ID_RHOT, k_idx, i_idx] = r_val * u_val * t_val - hv_coef * d3_vals[ID_RHOT] + @cuda.jit def compute_tend_x_kernel(flux, tend, nx, nz, grid_dx): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -185,10 +207,25 @@ def compute_tend_x_kernel(flux, tend, nx, nz, grid_dx): i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x if i_idx < nx and k_idx < nz and ll < NUM_VARS: - tend[ll, k_idx, i_idx] = -(flux[ll, k_idx, i_idx + 1] - flux[ll, k_idx, i_idx]) / grid_dx + tend[ll, k_idx, i_idx] = ( + -(flux[ll, k_idx, i_idx + 1] - flux[ll, k_idx, i_idx]) / grid_dx + ) + @cuda.jit -def compute_flux_z_kernel(state, flux, hy_dens_int, hy_pressure_int, hy_dens_theta_int, hv_coef, nx, nz, hs, k_beg_global, nz_global): +def compute_flux_z_kernel( + state, + flux, + hy_dens_int, + hy_pressure_int, + hy_dens_theta_int, + hv_coef, + nx, + nz, + hs, + k_beg_global, + nz_global, +): k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x @@ -200,16 +237,21 @@ def compute_flux_z_kernel(state, flux, hy_dens_int, hy_pressure_int, hy_dens_the for ll in range(NUM_VARS): for s in range(sten_size): stencil[s] = state[ll, k_idx + s, i_idx + hs] - - vals[ll] = -stencil[0]/12 + 7*stencil[1]/12 + 7*stencil[2]/12 - stencil[3]/12 - d3_vals[ll] = -stencil[0] + 3*stencil[1] - 3*stencil[2] + stencil[3] + + vals[ll] = ( + -stencil[0] / 12 + + 7 * stencil[1] / 12 + + 7 * stencil[2] / 12 + - stencil[3] / 12 + ) + d3_vals[ll] = -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3] r_val = vals[ID_DENS] + hy_dens_int[k_idx] u_val = vals[ID_UMOM] / r_val w_val = vals[ID_WMOM] / r_val t_val = (vals[ID_RHOT] + hy_dens_theta_int[k_idx]) / r_val - p_val = C0 * (r_val * t_val)**gamm - hy_pressure_int[k_idx] - + p_val = C0 * (r_val * t_val) ** gamm - hy_pressure_int[k_idx] + # Boundary conditions for w and density flux at global boundaries only actual_w_val = w_val actual_d3_dens = d3_vals[ID_DENS] @@ -221,9 +263,16 @@ def compute_flux_z_kernel(state, flux, hy_dens_int, hy_pressure_int, hy_dens_the actual_d3_dens = 0.0 flux[ID_DENS, k_idx, i_idx] = r_val * actual_w_val - hv_coef * actual_d3_dens - flux[ID_UMOM, k_idx, i_idx] = r_val * actual_w_val * u_val - hv_coef * d3_vals[ID_UMOM] - flux[ID_WMOM, k_idx, i_idx] = r_val * actual_w_val * actual_w_val + p_val - hv_coef * d3_vals[ID_WMOM] - flux[ID_RHOT, k_idx, i_idx] = r_val * actual_w_val * t_val - hv_coef * d3_vals[ID_RHOT] + flux[ID_UMOM, k_idx, i_idx] = ( + r_val * actual_w_val * u_val - hv_coef * d3_vals[ID_UMOM] + ) + flux[ID_WMOM, k_idx, i_idx] = ( + r_val * actual_w_val * actual_w_val + p_val - hv_coef * d3_vals[ID_WMOM] + ) + flux[ID_RHOT, k_idx, i_idx] = ( + r_val * actual_w_val * t_val - hv_coef * d3_vals[ID_RHOT] + ) + @cuda.jit def compute_tend_z_kernel(state, flux, tend, nx, nz, hs, grid_dz): @@ -232,10 +281,13 @@ def compute_tend_z_kernel(state, flux, tend, nx, nz, hs, grid_dz): i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x if i_idx < nx and k_idx < nz and ll < NUM_VARS: - tend[ll, k_idx, i_idx] = -(flux[ll, k_idx + 1, i_idx] - flux[ll, k_idx, i_idx]) / grid_dz + tend[ll, k_idx, i_idx] = ( + -(flux[ll, k_idx + 1, i_idx] - flux[ll, k_idx, i_idx]) / grid_dz + ) if ll == ID_WMOM: tend[ll, k_idx, i_idx] -= state[ID_DENS, k_idx + hs, i_idx + hs] * grav + @cuda.jit def pack_send_buf_kernel(state, sendbuf_l, sendbuf_r, nx, nz, hs): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -246,6 +298,7 @@ def pack_send_buf_kernel(state, sendbuf_l, sendbuf_r, nx, nz, hs): sendbuf_l[ll, k_idx, s_idx] = state[ll, k_idx + hs, hs + s_idx] sendbuf_r[ll, k_idx, s_idx] = state[ll, k_idx + hs, nx + s_idx] + @cuda.jit def unpack_recv_buf_kernel(state, recvbuf_l, recvbuf_r, nx, nz, hs): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -256,8 +309,11 @@ def unpack_recv_buf_kernel(state, recvbuf_l, recvbuf_r, nx, nz, hs): state[ll, k_idx + hs, s_idx] = recvbuf_l[ll, k_idx, s_idx] state[ll, k_idx + hs, nx + hs + s_idx] = recvbuf_r[ll, k_idx, s_idx] + @cuda.jit -def update_state_x_kernel(state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, k_beg, grid_dz): +def update_state_x_kernel( + state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, k_beg, grid_dz +): k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x @@ -266,10 +322,15 @@ def update_state_x_kernel(state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, k if math.fabs(z - 3 * zlen / 4) <= zlen / 16: r_plus_hr = state[ID_DENS, k_idx + hs, i_idx] + hy_dens_cell[k_idx + hs] state[ID_UMOM, k_idx + hs, i_idx] = r_plus_hr * 50.0 - state[ID_RHOT, k_idx + hs, i_idx] = r_plus_hr * 298.0 - hy_dens_theta_cell[k_idx + hs] + state[ID_RHOT, k_idx + hs, i_idx] = ( + r_plus_hr * 298.0 - hy_dens_theta_cell[k_idx + hs] + ) + @cuda.jit -def update_state_z_kernel(state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_width, k_beg_global, nz_global): +def update_state_z_kernel( + state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_width, k_beg_global, nz_global +): ll = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y i_glob_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x @@ -278,18 +339,28 @@ def update_state_z_kernel(state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_ if ll == ID_WMOM: state[ID_WMOM, 0, i_glob_idx] = 0.0 state[ID_WMOM, 1, i_glob_idx] = 0.0 - + if data_spec_int == DATA_SPEC_MOUNTAIN: x = (i_beg + i_glob_idx - hs + 0.5) * grid_dx if math.fabs(x - xlen / 4.0) < mnt_width: xloc = (x - (xlen / 4.0)) / mnt_width - mnt_deriv = -pi * math.cos(pi * xloc / 2.0) * math.sin(pi * xloc / 2.0) * 10.0 / grid_dx - state[ID_WMOM, 0, i_glob_idx] = mnt_deriv * state[ID_UMOM, hs, i_glob_idx] - state[ID_WMOM, 1, i_glob_idx] = mnt_deriv * state[ID_UMOM, hs, i_glob_idx] + mnt_deriv = ( + -pi + * math.cos(pi * xloc / 2.0) + * math.sin(pi * xloc / 2.0) + * 10.0 + / grid_dx + ) + state[ID_WMOM, 0, i_glob_idx] = ( + mnt_deriv * state[ID_UMOM, hs, i_glob_idx] + ) + state[ID_WMOM, 1, i_glob_idx] = ( + mnt_deriv * state[ID_UMOM, hs, i_glob_idx] + ) else: state[ll, 0, i_glob_idx] = state[ll, hs, i_glob_idx] state[ll, 1, i_glob_idx] = state[ll, hs, i_glob_idx] - + if k_beg_global + nz == nz_global: if ll == ID_WMOM: state[ID_WMOM, nz + hs, i_glob_idx] = 0.0 @@ -298,13 +369,25 @@ def update_state_z_kernel(state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_ state[ll, nz + hs, i_glob_idx] = state[ll, nz + hs - 1, i_glob_idx] state[ll, nz + hs + 1, i_glob_idx] = state[ll, nz + hs - 1, i_glob_idx] + @cuda.jit -def acc_mass_te_kernel(mass_arr, te_arr, state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, grid_dx, grid_dz): +def acc_mass_te_kernel( + mass_arr, + te_arr, + state, + hy_dens_cell, + hy_dens_theta_cell, + nx, + nz, + hs, + grid_dx, + grid_dz, +): k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x if k_idx < nz and i_idx < nx: - r_pert = state[ID_DENS, k_idx + hs, i_idx + hs] + r_pert = state[ID_DENS, k_idx + hs, i_idx + hs] u_mom = state[ID_UMOM, k_idx + hs, i_idx + hs] w_mom = state[ID_WMOM, k_idx + hs, i_idx + hs] rhot_pert = state[ID_RHOT, k_idx + hs, i_idx + hs] @@ -313,16 +396,17 @@ def acc_mass_te_kernel(mass_arr, te_arr, state, hy_dens_cell, hy_dens_theta_cell u_vel = u_mom / r_full w_vel = w_mom / r_full th_full = (rhot_pert + hy_dens_theta_cell[hs + k_idx]) / r_full - - p_full = C0 * (r_full * th_full)**gamm - t_abs = th_full / ((p0 / p_full)**(rd / cp)) - + + p_full = C0 * (r_full * th_full) ** gamm + t_abs = th_full / ((p0 / p_full) ** (rd / cp)) + ke = 0.5 * r_full * (u_vel**2 + w_vel**2) ie = r_full * cv * t_abs cuda.atomic.add(mass_arr, 0, r_full * grid_dx * grid_dz) cuda.atomic.add(te_arr, 0, (ke + ie) * grid_dx * grid_dz) + @cuda.jit def update_fluid_state_kernel(state_init, state_out, tend, nx, nz, hs, dt_arg): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -332,7 +416,10 @@ def update_fluid_state_kernel(state_init, state_out, tend, nx, nz, hs, dt_arg): if i_idx < nx and k_idx < nz and ll < NUM_VARS: state_idx_k = k_idx + hs state_idx_i = i_idx + hs - state_out[ll, state_idx_k, state_idx_i] = state_init[ll, state_idx_k, state_idx_i] + dt_arg * tend[ll, k_idx, i_idx] + state_out[ll, state_idx_k, state_idx_i] = ( + state_init[ll, state_idx_k, state_idx_i] + dt_arg * tend[ll, k_idx, i_idx] + ) + @cuda.jit def pack_send_buf_z_kernel(state, sendbuf_b, sendbuf_t, nx, nz, hs): @@ -344,6 +431,7 @@ def pack_send_buf_z_kernel(state, sendbuf_b, sendbuf_t, nx, nz, hs): sendbuf_b[ll, s_idx, i_idx] = state[ll, hs + s_idx, i_idx + hs] sendbuf_t[ll, s_idx, i_idx] = state[ll, nz + s_idx, i_idx + hs] + @cuda.jit def unpack_recv_buf_z_kernel(state, recvbuf_b, recvbuf_t, nx, nz, hs): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -354,6 +442,7 @@ def unpack_recv_buf_z_kernel(state, recvbuf_b, recvbuf_t, nx, nz, hs): state[ll, s_idx, i_idx + hs] = recvbuf_b[ll, s_idx, i_idx] state[ll, nz + hs + s_idx, i_idx + hs] = recvbuf_t[ll, s_idx, i_idx] + @cuda.jit def unpack_recv_buf_z_bottom_kernel(state, recvbuf_b, nx, nz, hs): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -363,6 +452,7 @@ def unpack_recv_buf_z_bottom_kernel(state, recvbuf_b, nx, nz, hs): if s_idx < hs and i_idx < nx and ll < NUM_VARS: state[ll, s_idx, i_idx + hs] = recvbuf_b[ll, s_idx, i_idx] + @cuda.jit def unpack_recv_buf_z_top_kernel(state, recvbuf_t, nx, nz, hs): ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z @@ -371,4 +461,3 @@ def unpack_recv_buf_z_top_kernel(state, recvbuf_t, nx, nz, hs): if s_idx < hs and i_idx < nx and ll < NUM_VARS: state[ll, nz + hs + s_idx, i_idx + hs] = recvbuf_t[ll, s_idx, i_idx] - diff --git a/examples/miniapps/MiniWeather/miniweather.py b/examples/miniapps/MiniWeather/miniweather.py index 95191b2c..5dfb89c6 100644 --- a/examples/miniapps/MiniWeather/miniweather.py +++ b/examples/miniapps/MiniWeather/miniweather.py @@ -7,25 +7,55 @@ from charm4py import charm, Chare, Array, Future, Reducer, coro, Channel from constants import ( - pi, grav, cp, cv, rd, p0, C0, gamm, xlen, zlen, hv_beta, cfl, max_speed, hs, - sten_size, NUM_VARS, ID_DENS, ID_UMOM, ID_WMOM, ID_RHOT, DIR_X, DIR_Z, - DATA_SPEC_COLLISION, DATA_SPEC_THERMAL, DATA_SPEC_MOUNTAIN, - DATA_SPEC_TURBULENCE, DATA_SPEC_DENSITY_CURRENT, DATA_SPEC_INJECTION, - nqpoints, qpoints, qweights + C0, + gamm, + xlen, + zlen, + hv_beta, + cfl, + max_speed, + hs, + NUM_VARS, + ID_DENS, + ID_UMOM, + ID_WMOM, + ID_RHOT, + DIR_X, + DIR_Z, + DATA_SPEC_COLLISION, + DATA_SPEC_THERMAL, + DATA_SPEC_MOUNTAIN, + DATA_SPEC_TURBULENCE, + DATA_SPEC_DENSITY_CURRENT, + DATA_SPEC_INJECTION, + nqpoints, + qpoints, + qweights, ) from kernels import ( - hydro_const_theta, hydro_const_bvfreq, sample_ellipse_cosine, - collision as collision_init, thermal as thermal_init, mountain_waves as mountain_waves_init, - turbulence as turbulence_init, density_current as density_current_init, injection as injection_init, - compute_flux_x_kernel, compute_tend_x_kernel, - compute_flux_z_kernel, compute_tend_z_kernel, - pack_send_buf_kernel, unpack_recv_buf_kernel, - pack_send_buf_z_kernel, unpack_recv_buf_z_kernel, - unpack_recv_buf_z_bottom_kernel, unpack_recv_buf_z_top_kernel, - update_state_x_kernel, update_state_z_kernel, - acc_mass_te_kernel, update_fluid_state_kernel + collision as collision_init, + thermal as thermal_init, + mountain_waves as mountain_waves_init, + turbulence as turbulence_init, + density_current as density_current_init, + injection as injection_init, + compute_flux_x_kernel, + compute_tend_x_kernel, + compute_flux_z_kernel, + compute_tend_z_kernel, + pack_send_buf_kernel, + unpack_recv_buf_kernel, + pack_send_buf_z_kernel, + unpack_recv_buf_z_kernel, + unpack_recv_buf_z_bottom_kernel, + unpack_recv_buf_z_top_kernel, + update_state_x_kernel, + update_state_z_kernel, + acc_mass_te_kernel, + update_fluid_state_kernel, ) + # Helper for domain decomposition def calculate_domain_decomposition_x(chare_idx, num_chares_x, nx_glob): nx_local_base = nx_glob // num_chares_x @@ -34,6 +64,7 @@ def calculate_domain_decomposition_x(chare_idx, num_chares_x, nx_glob): raise ValueError("nx_glob must be divisible by num_chares_x") return nx_local_base, chare_idx * nx_local_base + def calculate_domain_decomposition_z(chare_idx, num_chares_z, nz_glob): nz_local_base = nz_glob // num_chares_z remainder = nz_glob % num_chares_z @@ -41,16 +72,17 @@ def calculate_domain_decomposition_z(chare_idx, num_chares_z, nz_glob): raise ValueError("nz_glob must be divisible by num_chares_z") return nz_local_base, chare_idx * nz_local_base + class MiniWeatherChare(Chare): def __init__(self, args): - args_dict = args[0] - num_chares_x_in = args[1] - num_chares_z_in = args[2] - global_nx_in = args[3] - global_nz_in = args[4] - data_spec_int_in = args[5] - dt_in = args[6] - initial_etime_in = args[7] + args_dict = args[0] + num_chares_x_in = args[1] + num_chares_z_in = args[2] + global_nx_in = args[3] + global_nz_in = args[4] + data_spec_int_in = args[5] + dt_in = args[6] + initial_etime_in = args[7] self.args = argparse.Namespace(**args_dict) # For 2D chare array, thisIndex is a tuple (i, j) @@ -58,11 +90,11 @@ def __init__(self, args): self.chare_idx_z = self.thisIndex[1] self.num_chares_x = num_chares_x_in self.num_chares_z = num_chares_z_in - + self.nx_glob = global_nx_in self.nz_glob = global_nz_in self.data_spec_int = data_spec_int_in - self.dt = dt_in + self.dt = dt_in self.etime = initial_etime_in self.grid_dx = xlen / self.nx_glob @@ -98,7 +130,7 @@ def __init__(self, args): self.d_sendbuf_t = None self.d_recvbuf_b = None self.d_recvbuf_t = None - + self._direction_switch = True # Channel-based communication attributes @@ -112,12 +144,16 @@ def __init__(self, args): self.initial_te_val = 0.0 self.setup_channels() - + if charm.myPe() == 0 and self.chare_idx_x == 0 and self.chare_idx_z == 0: - print(f"Chare {self.chare_idx_x}, {self.chare_idx_z} initialized on PE {charm.myPe()}") + print( + f"Chare {self.chare_idx_x}, {self.chare_idx_z} initialized on PE {charm.myPe()}" + ) def setup_channels(self): - left_proxy_idx_x = (self.chare_idx_x - 1 + self.num_chares_x) % self.num_chares_x + left_proxy_idx_x = ( + self.chare_idx_x - 1 + self.num_chares_x + ) % self.num_chares_x left_neighbor_proxy = self.thisProxy[left_proxy_idx_x, self.chare_idx_z] self.left_channel = Channel(self, remote=left_neighbor_proxy) @@ -139,13 +175,17 @@ def setup_channels(self): else: self.top_channel = None - def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, setup_done_future): + def setup_chare_domain( + self, local_nx, i_beg_global, local_nz, k_beg_global, setup_done_future + ): self.nx = local_nx self.nz = local_nz self.i_beg_global_idx = i_beg_global self.k_beg_global_idx = k_beg_global - self.state_host = np.zeros((NUM_VARS, self.nz + 2 * hs, self.nx + 2 * hs), dtype=np.float64) + self.state_host = np.zeros( + (NUM_VARS, self.nz + 2 * hs, self.nx + 2 * hs), dtype=np.float64 + ) self.hy_dens_cell_host = np.zeros(self.nz + 2 * hs, dtype=np.float64) self.hy_dens_theta_cell_host = np.zeros(self.nz + 2 * hs, dtype=np.float64) self.hy_dens_int_host = np.zeros(self.nz + 1, dtype=np.float64) @@ -153,9 +193,12 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set self.hy_pressure_int_host = np.zeros(self.nz + 1, dtype=np.float64) problem_init_map = { - DATA_SPEC_COLLISION: collision_init, DATA_SPEC_THERMAL: thermal_init, - DATA_SPEC_MOUNTAIN: mountain_waves_init, DATA_SPEC_TURBULENCE: turbulence_init, - DATA_SPEC_DENSITY_CURRENT: density_current_init, DATA_SPEC_INJECTION: injection_init, + DATA_SPEC_COLLISION: collision_init, + DATA_SPEC_THERMAL: thermal_init, + DATA_SPEC_MOUNTAIN: mountain_waves_init, + DATA_SPEC_TURBULENCE: turbulence_init, + DATA_SPEC_DENSITY_CURRENT: density_current_init, + DATA_SPEC_INJECTION: injection_init, } init_routine = problem_init_map[self.data_spec_int] @@ -163,19 +206,35 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set for i_loop_idx in range(self.nx + 2 * hs): for kk_quad in range(nqpoints): for ii_quad in range(nqpoints): - x_glob = (self.i_beg_global_idx + i_loop_idx - hs + 0.5) * self.grid_dx + (qpoints[ii_quad] - 0.5) * self.grid_dx - z_glob = (self.k_beg_global_idx + k_loop_idx - hs + 0.5) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz - + x_glob = ( + self.i_beg_global_idx + i_loop_idx - hs + 0.5 + ) * self.grid_dx + (qpoints[ii_quad] - 0.5) * self.grid_dx + z_glob = ( + self.k_beg_global_idx + k_loop_idx - hs + 0.5 + ) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz + r, u, w, t, hr, ht = init_routine(x_glob, z_glob) - self.state_host[ID_DENS, k_loop_idx, i_loop_idx] += r * qweights[ii_quad] * qweights[kk_quad] - self.state_host[ID_UMOM, k_loop_idx, i_loop_idx] += (r + hr) * u * qweights[ii_quad] * qweights[kk_quad] - self.state_host[ID_WMOM, k_loop_idx, i_loop_idx] += (r + hr) * w * qweights[ii_quad] * qweights[kk_quad] - self.state_host[ID_RHOT, k_loop_idx, i_loop_idx] += ((r + hr) * (t + ht) - hr * ht) * qweights[ii_quad] * qweights[kk_quad] - + self.state_host[ID_DENS, k_loop_idx, i_loop_idx] += ( + r * qweights[ii_quad] * qweights[kk_quad] + ) + self.state_host[ID_UMOM, k_loop_idx, i_loop_idx] += ( + (r + hr) * u * qweights[ii_quad] * qweights[kk_quad] + ) + self.state_host[ID_WMOM, k_loop_idx, i_loop_idx] += ( + (r + hr) * w * qweights[ii_quad] * qweights[kk_quad] + ) + self.state_host[ID_RHOT, k_loop_idx, i_loop_idx] += ( + ((r + hr) * (t + ht) - hr * ht) + * qweights[ii_quad] + * qweights[kk_quad] + ) + for k_loop_idx in range(self.nz + 2 * hs): for kk_quad in range(nqpoints): - z_quad_hydro = (self.k_beg_global_idx + k_loop_idx - hs + 0.5) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz + z_quad_hydro = ( + self.k_beg_global_idx + k_loop_idx - hs + 0.5 + ) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz _r, _u, _w, _t, hr, ht = init_routine(0.0, z_quad_hydro) self.hy_dens_cell_host[k_loop_idx] += hr * qweights[kk_quad] self.hy_dens_theta_cell_host[k_loop_idx] += hr * ht * qweights[kk_quad] @@ -185,7 +244,7 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set _r, _u, _w, _t, hr, ht = init_routine(0.0, z_interface) self.hy_dens_int_host[k_loop_idx] = hr self.hy_dens_theta_int_host[k_loop_idx] = hr * ht - self.hy_pressure_int_host[k_loop_idx] = C0 * ((hr * ht)**gamm) + self.hy_pressure_int_host[k_loop_idx] = C0 * ((hr * ht) ** gamm) self.d_state = cuda.to_device(self.state_host) self.d_state_tmp = cuda.to_device(self.state_host) @@ -200,7 +259,7 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set self.d_flux = cuda.device_array(shape=flux_shape, dtype=np.float64) self.d_tend = cuda.device_array(shape=tend_shape, dtype=np.float64) - sendrecv_shape = (NUM_VARS, self.nz, hs) + sendrecv_shape = (NUM_VARS, self.nz, hs) self.d_sendbuf_l = cuda.device_array(shape=sendrecv_shape, dtype=np.float64) self.d_sendbuf_r = cuda.device_array(shape=sendrecv_shape, dtype=np.float64) self.d_recvbuf_l = cuda.device_array(shape=sendrecv_shape, dtype=np.float64) @@ -215,7 +274,7 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set local_mass, local_te = self._reductions() self.initial_mass_val = local_mass self.initial_te_val = local_te - + self.reduce(setup_done_future, [local_mass, local_te], Reducer.sum) def _reductions(self): @@ -223,13 +282,23 @@ def _reductions(self): d_te_val = cuda.to_device(np.zeros(1, dtype=np.float64)) threadsperblock = (16, 16, 1) - blockspergrid = (math.ceil(self.nx / threadsperblock[0]), - math.ceil(self.nz / threadsperblock[1]), - 1) + blockspergrid = ( + math.ceil(self.nx / threadsperblock[0]), + math.ceil(self.nz / threadsperblock[1]), + 1, + ) acc_mass_te_kernel[blockspergrid, threadsperblock]( - d_mass_val, d_te_val, self.d_state, self.d_hy_dens_cell, self.d_hy_dens_theta_cell, - self.nx, self.nz, hs, self.grid_dx, self.grid_dz + d_mass_val, + d_te_val, + self.d_state, + self.d_hy_dens_cell, + self.d_hy_dens_theta_cell, + self.nx, + self.nz, + hs, + self.grid_dx, + self.grid_dz, ) mass_host = d_mass_val.copy_to_host() te_host = d_te_val.copy_to_host() @@ -237,10 +306,12 @@ def _reductions(self): @coro def _set_halo_values_x(self, d_state_forcing): - threadsperblock_buffer = (16, 16, 1) - blockspergrid_buffer = (math.ceil(hs / threadsperblock_buffer[0]), - math.ceil(self.nz / threadsperblock_buffer[1]), - NUM_VARS) + threadsperblock_buffer = (16, 16, 1) + blockspergrid_buffer = ( + math.ceil(hs / threadsperblock_buffer[0]), + math.ceil(self.nz / threadsperblock_buffer[1]), + NUM_VARS, + ) pack_send_buf_kernel[blockspergrid_buffer, threadsperblock_buffer]( d_state_forcing, self.d_sendbuf_l, self.d_sendbuf_r, self.nx, self.nz, hs ) @@ -251,10 +322,10 @@ def _set_halo_values_x(self, d_state_forcing): self.left_channel.send(data_to_send_left) self.right_channel.send(data_to_send_right) - + data_for_my_d_recvbuf_l = self.left_channel.recv() data_for_my_d_recvbuf_r = self.right_channel.recv() - + self.d_recvbuf_l.copy_to_device(data_for_my_d_recvbuf_l) self.d_recvbuf_r.copy_to_device(data_for_my_d_recvbuf_r) @@ -264,64 +335,91 @@ def _set_halo_values_x(self, d_state_forcing): if self.data_spec_int == DATA_SPEC_INJECTION and self.chare_idx_x == 0: threadsperblock_inj = (16, 16, 1) - blockspergrid_inj = (math.ceil(hs / threadsperblock_inj[0]), - math.ceil(self.nz / threadsperblock_inj[1]), - 1) + blockspergrid_inj = ( + math.ceil(hs / threadsperblock_inj[0]), + math.ceil(self.nz / threadsperblock_inj[1]), + 1, + ) update_state_x_kernel[blockspergrid_inj, threadsperblock_inj]( - self.d_state, self.d_hy_dens_cell, self.d_hy_dens_theta_cell, - self.nx, self.nz, hs, self.k_beg_global_idx, self.grid_dz + self.d_state, + self.d_hy_dens_cell, + self.d_hy_dens_theta_cell, + self.nx, + self.nz, + hs, + self.k_beg_global_idx, + self.grid_dz, ) cuda.synchronize() @coro def _set_halo_values_z(self, d_state_forcing): - threadsperblock_buffer = (16, 16, 1) - blockspergrid_buffer = (math.ceil(self.nx / threadsperblock_buffer[0]), - math.ceil(hs / threadsperblock_buffer[1]), - NUM_VARS) + threadsperblock_buffer = (16, 16, 1) + blockspergrid_buffer = ( + math.ceil(self.nx / threadsperblock_buffer[0]), + math.ceil(hs / threadsperblock_buffer[1]), + NUM_VARS, + ) pack_send_buf_z_kernel[blockspergrid_buffer, threadsperblock_buffer]( d_state_forcing, self.d_sendbuf_b, self.d_sendbuf_t, self.nx, self.nz, hs ) cuda.synchronize() - if self.k_beg_global_idx == 0 or self.k_beg_global_idx + self.nz == self.nz_glob: + if ( + self.k_beg_global_idx == 0 + or self.k_beg_global_idx + self.nz == self.nz_glob + ): mnt_width = xlen / 8.0 - threadsperblock_update_z = (16, 16, 1) - blockspergrid_x = math.ceil((self.nx + 2 * hs) / threadsperblock_update_z[0]) + threadsperblock_update_z = (16, 16, 1) + blockspergrid_x = math.ceil( + (self.nx + 2 * hs) / threadsperblock_update_z[0] + ) blockspergrid_y = math.ceil(NUM_VARS / threadsperblock_update_z[1]) blockspergrid_update_z = (blockspergrid_x, blockspergrid_y, 1) update_state_z_kernel[blockspergrid_update_z, threadsperblock_update_z]( - d_state_forcing, self.data_spec_int, - self.i_beg_global_idx, self.nx, self.nz, hs, - self.grid_dx, mnt_width, self.k_beg_global_idx, self.nz_glob + d_state_forcing, + self.data_spec_int, + self.i_beg_global_idx, + self.nx, + self.nz, + hs, + self.grid_dx, + mnt_width, + self.k_beg_global_idx, + self.nz_glob, ) cuda.synchronize() if self.bottom_channel is not None: data_to_send_bottom = self.d_sendbuf_b.copy_to_host() self.bottom_channel.send(data_to_send_bottom) - + if self.top_channel is not None: data_to_send_top = self.d_sendbuf_t.copy_to_host() self.top_channel.send(data_to_send_top) - + if self.bottom_channel is not None: data_for_my_d_recvbuf_b = self.bottom_channel.recv() self.d_recvbuf_b.copy_to_device(data_for_my_d_recvbuf_b) - + if self.top_channel is not None: data_for_my_d_recvbuf_t = self.top_channel.recv() self.d_recvbuf_t.copy_to_device(data_for_my_d_recvbuf_t) if self.bottom_channel is not None and self.top_channel is not None: unpack_recv_buf_z_kernel[blockspergrid_buffer, threadsperblock_buffer]( - d_state_forcing, self.d_recvbuf_b, self.d_recvbuf_t, self.nx, self.nz, hs + d_state_forcing, + self.d_recvbuf_b, + self.d_recvbuf_t, + self.nx, + self.nz, + hs, ) elif self.bottom_channel is not None: - unpack_recv_buf_z_bottom_kernel[blockspergrid_buffer, threadsperblock_buffer]( - d_state_forcing, self.d_recvbuf_b, self.nx, self.nz, hs - ) + unpack_recv_buf_z_bottom_kernel[ + blockspergrid_buffer, threadsperblock_buffer + ](d_state_forcing, self.d_recvbuf_b, self.nx, self.nz, hs) elif self.top_channel is not None: unpack_recv_buf_z_top_kernel[blockspergrid_buffer, threadsperblock_buffer]( d_state_forcing, self.d_recvbuf_t, self.nx, self.nz, hs @@ -331,17 +429,29 @@ def _set_halo_values_z(self, d_state_forcing): def _compute_tendencies_x(self, dt_arg_for_hv_coef, d_state_forcing): threadsperblock_flux = (16, 16, 1) - blockspergrid_flux_x = (math.ceil((self.nx + 1) / threadsperblock_flux[0]), - math.ceil(self.nz / threadsperblock_flux[1]), 1) + blockspergrid_flux_x = ( + math.ceil((self.nx + 1) / threadsperblock_flux[0]), + math.ceil(self.nz / threadsperblock_flux[1]), + 1, + ) hv_coef = -hv_beta * self.grid_dx / (16.0 * dt_arg_for_hv_coef) compute_flux_x_kernel[blockspergrid_flux_x, threadsperblock_flux]( - d_state_forcing, self.d_flux, self.d_hy_dens_cell, self.d_hy_dens_theta_cell, - hv_coef, self.nx, self.nz, hs + d_state_forcing, + self.d_flux, + self.d_hy_dens_cell, + self.d_hy_dens_theta_cell, + hv_coef, + self.nx, + self.nz, + hs, ) - threadsperblock_tend = (16, 16, 1) - blockspergrid_tend_x = (math.ceil(self.nx / threadsperblock_tend[0]), - math.ceil(self.nz / threadsperblock_tend[1]), NUM_VARS) + threadsperblock_tend = (16, 16, 1) + blockspergrid_tend_x = ( + math.ceil(self.nx / threadsperblock_tend[0]), + math.ceil(self.nz / threadsperblock_tend[1]), + NUM_VARS, + ) compute_tend_x_kernel[blockspergrid_tend_x, threadsperblock_tend]( self.d_flux, self.d_tend, self.nx, self.nz, self.grid_dx ) @@ -350,33 +460,59 @@ def _compute_tendencies_x(self, dt_arg_for_hv_coef, d_state_forcing): def _compute_tendencies_z(self, dt_arg_for_hv_coef, d_state_forcing): hv_coef = -hv_beta * self.grid_dz / (16.0 * dt_arg_for_hv_coef) threadsperblock_flux = (16, 16, 1) - blockspergrid_flux_z = (math.ceil(self.nx / threadsperblock_flux[0]), - math.ceil((self.nz + 1) / threadsperblock_flux[1]), 1) + blockspergrid_flux_z = ( + math.ceil(self.nx / threadsperblock_flux[0]), + math.ceil((self.nz + 1) / threadsperblock_flux[1]), + 1, + ) compute_flux_z_kernel[blockspergrid_flux_z, threadsperblock_flux]( - d_state_forcing, self.d_flux, self.d_hy_dens_int, self.d_hy_pressure_int, self.d_hy_dens_theta_int, - hv_coef, self.nx, self.nz, hs, self.k_beg_global_idx, self.nz_glob + d_state_forcing, + self.d_flux, + self.d_hy_dens_int, + self.d_hy_pressure_int, + self.d_hy_dens_theta_int, + hv_coef, + self.nx, + self.nz, + hs, + self.k_beg_global_idx, + self.nz_glob, ) threadsperblock_tend = (16, 16, 1) - blockspergrid_tend_z = (math.ceil(self.nx / threadsperblock_tend[0]), - math.ceil(self.nz / threadsperblock_tend[1]), NUM_VARS) + blockspergrid_tend_z = ( + math.ceil(self.nx / threadsperblock_tend[0]), + math.ceil(self.nz / threadsperblock_tend[1]), + NUM_VARS, + ) compute_tend_z_kernel[blockspergrid_tend_z, threadsperblock_tend]( - d_state_forcing, self.d_flux, self.d_tend, self.nx, self.nz, hs, self.grid_dz + d_state_forcing, + self.d_flux, + self.d_tend, + self.nx, + self.nz, + hs, + self.grid_dz, ) cuda.synchronize() @coro - def _semi_discrete_step(self, dt_arg, current_dir, d_state_init, d_state_forcing, d_state_out): + def _semi_discrete_step( + self, dt_arg, current_dir, d_state_init, d_state_forcing, d_state_out + ): if current_dir == DIR_X: self._set_halo_values_x(d_state_forcing) self._compute_tendencies_x(dt_arg, d_state_forcing) elif current_dir == DIR_Z: self._set_halo_values_z(d_state_forcing) self._compute_tendencies_z(dt_arg, d_state_forcing) - + threadsperblock_update = (16, 16, 1) - blockspergrid_update = (math.ceil(self.nx / threadsperblock_update[0]), - math.ceil(self.nz / threadsperblock_update[1]), NUM_VARS) + blockspergrid_update = ( + math.ceil(self.nx / threadsperblock_update[0]), + math.ceil(self.nz / threadsperblock_update[1]), + NUM_VARS, + ) update_fluid_state_kernel[blockspergrid_update, threadsperblock_update]( d_state_init, d_state_out, self.d_tend, self.nx, self.nz, hs, dt_arg ) @@ -389,19 +525,43 @@ def _perform_timestep(self, dt_full_step): dt_rk_stage3 = dt_full_step / 1.0 if self._direction_switch: - self._semi_discrete_step(dt_rk_stage1, DIR_X, self.d_state, self.d_state, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state) - self._semi_discrete_step(dt_rk_stage1, DIR_Z, self.d_state, self.d_state, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state) + self._semi_discrete_step( + dt_rk_stage1, DIR_X, self.d_state, self.d_state, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state + ) + self._semi_discrete_step( + dt_rk_stage1, DIR_Z, self.d_state, self.d_state, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state + ) else: - self._semi_discrete_step(dt_rk_stage1, DIR_Z, self.d_state, self.d_state, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state) - self._semi_discrete_step(dt_rk_stage1, DIR_X, self.d_state, self.d_state, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp) - self._semi_discrete_step(dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state) + self._semi_discrete_step( + dt_rk_stage1, DIR_Z, self.d_state, self.d_state, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state + ) + self._semi_discrete_step( + dt_rk_stage1, DIR_X, self.d_state, self.d_state, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp + ) + self._semi_discrete_step( + dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state + ) self._direction_switch = not self._direction_switch @@ -409,24 +569,24 @@ def _perform_timestep(self, dt_full_step): # Main simulation loop def start_main_loop(self, all_chares_done_future): current_sim_time_target = self.args.sim_time - + chare_loop_start_time = time.time() - + n_iters = 0 while self.etime < current_sim_time_target and n_iters < self.args.max_iters: actual_dt = self.dt if self.etime + self.dt > current_sim_time_target: actual_dt = current_sim_time_target - self.etime - + self._perform_timestep(actual_dt) - + self.etime += actual_dt n_iters += 1 if self.args.output_freq > 0 and n_iters % self.args.output_freq == 0: - cuda.synchronize() + cuda.synchronize() state_host_output_local = self.d_state.copy_to_host() - + if hs > 0: state_ext = state_host_output_local[:, hs:-hs, hs:-hs] else: @@ -443,58 +603,134 @@ def start_main_loop(self, all_chares_done_future): denom = hy_dens_cell_local[:, None] + dens uwnd = state_ext[ID_UMOM, :, :] / denom wwnd = state_ext[ID_WMOM, :, :] / denom - theta = (state_ext[ID_RHOT, :, :] + hy_dens_theta_cell_local[:, None]) / denom \ - - (hy_dens_theta_cell_local / hy_dens_cell_local)[:, None] + theta = ( + state_ext[ID_RHOT, :, :] + hy_dens_theta_cell_local[:, None] + ) / denom - (hy_dens_theta_cell_local / hy_dens_cell_local)[:, None] norm_state = np.stack([dens, uwnd, wwnd, theta], axis=0) - output_filename = os.path.join(self.args.output_dir, f"data_iter_{n_iters:06d}_chare_{self.chare_idx_x}_{self.chare_idx_z}.npz") - np.savez(output_filename, - state=norm_state, - etime=self.etime, - chare_nx=self.nx, - chare_i_beg=self.i_beg_global_idx, - chare_nz=self.nz, - chare_k_beg=self.k_beg_global_idx) + output_filename = os.path.join( + self.args.output_dir, + f"data_iter_{n_iters:06d}_chare_{self.chare_idx_x}_{self.chare_idx_z}.npz", + ) + np.savez( + output_filename, + state=norm_state, + etime=self.etime, + chare_nx=self.nx, + chare_i_beg=self.i_beg_global_idx, + chare_nz=self.nz, + chare_k_beg=self.k_beg_global_idx, + ) if self.chare_idx_x == 0 and self.chare_idx_z == 0: - print(f"Iter: {n_iters}, Chare 0,0 output data to {output_filename} pattern at SimTime: {self.etime:.4f}s") - - if self.chare_idx_x == 0 and self.chare_idx_z == 0 and (n_iters % 10 == 0 or n_iters == 1 or (self.etime >= current_sim_time_target) or (n_iters == self.args.max_iters)): - print(f"Chare 0,0 - Iter: {n_iters:5d}, Sim Time: {self.etime:8.4f}s / {current_sim_time_target:.2f}s, Step dt: {actual_dt:.6f}s") + print( + f"Iter: {n_iters}, Chare 0,0 output data to {output_filename} pattern at SimTime: {self.etime:.4f}s" + ) + + if ( + self.chare_idx_x == 0 + and self.chare_idx_z == 0 + and ( + n_iters % 10 == 0 + or n_iters == 1 + or (self.etime >= current_sim_time_target) + or (n_iters == self.args.max_iters) + ) + ): + print( + f"Chare 0,0 - Iter: {n_iters:5d}, Sim Time: {self.etime:8.4f}s / {current_sim_time_target:.2f}s, Step dt: {actual_dt:.6f}s" + ) chare_loop_end_time = time.time() cuda.synchronize() - + if self.chare_idx_x == 0 and self.chare_idx_z == 0: - print(f"\nChare 0,0 finished main loop after {n_iters} iterations. Local loop wall time: {chare_loop_end_time - chare_loop_start_time:.3f} s.") - print(f"Chare 0,0 final simulation time: {self.etime:.4f}s") + print( + f"\nChare 0,0 finished main loop after {n_iters} iterations. Local loop wall time: {chare_loop_end_time - chare_loop_start_time:.3f} s." + ) + print(f"Chare 0,0 final simulation time: {self.etime:.4f}s") final_mass_local, final_te_local = self._reductions() - - self.reduce(all_chares_done_future, [final_mass_local, final_te_local, self.etime, float(n_iters)], - Reducer.gather) + + self.reduce( + all_chares_done_future, + [final_mass_local, final_te_local, self.etime, float(n_iters)], + Reducer.gather, + ) + def main_charm_wrapper(charm_args_list): - parser = argparse.ArgumentParser(description="MiniWeather Python Numba CUDA Simulation (Charm4Py)") - parser.add_argument("--nx_glob", type=int, default=200, help="Number of global cells in x-direction (default: 200)") - parser.add_argument("--nz_glob", type=int, default=100, help="Number of global cells in z-direction (default: 100)") - parser.add_argument("--sim_time", type=float, default=1.0, help="How many seconds to run the simulation (default: 1.0s)") - parser.add_argument("--max_iters", type=int, default=10000, help="Maximum number of iterations (default: 10000)") - parser.add_argument("--output_freq", type=int, default=0, help="Frequency of outputting data in iterations (0 for no output, default: 0)") - parser.add_argument("--output_dir", type=str, default="output_data_charm", help="Directory to save output files (default: output_data_charm)") - + parser = argparse.ArgumentParser( + description="MiniWeather Python Numba CUDA Simulation (Charm4Py)" + ) + parser.add_argument( + "--nx_glob", + type=int, + default=200, + help="Number of global cells in x-direction (default: 200)", + ) + parser.add_argument( + "--nz_glob", + type=int, + default=100, + help="Number of global cells in z-direction (default: 100)", + ) + parser.add_argument( + "--sim_time", + type=float, + default=1.0, + help="How many seconds to run the simulation (default: 1.0s)", + ) + parser.add_argument( + "--max_iters", + type=int, + default=10000, + help="Maximum number of iterations (default: 10000)", + ) + parser.add_argument( + "--output_freq", + type=int, + default=0, + help="Frequency of outputting data in iterations (0 for no output, default: 0)", + ) + parser.add_argument( + "--output_dir", + type=str, + default="output_data_charm", + help="Directory to save output files (default: output_data_charm)", + ) + data_spec_choices_map = { - DATA_SPEC_COLLISION: "collision", DATA_SPEC_THERMAL: "thermal", - DATA_SPEC_MOUNTAIN: "mountain_waves", DATA_SPEC_TURBULENCE: "turbulence", - DATA_SPEC_DENSITY_CURRENT: "density_current", DATA_SPEC_INJECTION: "injection" + DATA_SPEC_COLLISION: "collision", + DATA_SPEC_THERMAL: "thermal", + DATA_SPEC_MOUNTAIN: "mountain_waves", + DATA_SPEC_TURBULENCE: "turbulence", + DATA_SPEC_DENSITY_CURRENT: "density_current", + DATA_SPEC_INJECTION: "injection", } - default_data_spec_name = data_spec_choices_map.get(DATA_SPEC_THERMAL, str(DATA_SPEC_THERMAL)) - parser.add_argument("--data_spec", type=str, default=default_data_spec_name, - choices=list(data_spec_choices_map.values()), - help=f"Data specification name (default: {default_data_spec_name})") - - parser.add_argument("--num_chares_x", type=int, default=1, help="Number of chares in X-direction for domain decomposition (default: 1)") - parser.add_argument("--num_chares_z", type=int, default=1, help="Number of chares in Z-direction for domain decomposition (default: 1)") - + default_data_spec_name = data_spec_choices_map.get( + DATA_SPEC_THERMAL, str(DATA_SPEC_THERMAL) + ) + parser.add_argument( + "--data_spec", + type=str, + default=default_data_spec_name, + choices=list(data_spec_choices_map.values()), + help=f"Data specification name (default: {default_data_spec_name})", + ) + + parser.add_argument( + "--num_chares_x", + type=int, + default=1, + help="Number of chares in X-direction for domain decomposition (default: 1)", + ) + parser.add_argument( + "--num_chares_z", + type=int, + default=1, + help="Number of chares in Z-direction for domain decomposition (default: 1)", + ) + args = parser.parse_args(charm_args_list[1:]) data_spec_int = None @@ -502,12 +738,14 @@ def main_charm_wrapper(charm_args_list): if name == args.data_spec: data_spec_int = val break - + if charm.myPe() == 0: - print(f"Running MiniWeather (Charm4Py) with: " - f"nx_glob={args.nx_glob}, nz_glob={args.nz_glob}, num_chares_x={args.num_chares_x}, " - f"num_chares_z={args.num_chares_z}, data_spec='{args.data_spec}' (ID: {data_spec_int}), sim_time={args.sim_time:.2f}s, " - f"max_iters={args.max_iters}, output_freq={args.output_freq}, output_dir='{args.output_dir}'") + print( + f"Running MiniWeather (Charm4Py) with: " + f"nx_glob={args.nx_glob}, nz_glob={args.nz_glob}, num_chares_x={args.num_chares_x}, " + f"num_chares_z={args.num_chares_z}, data_spec='{args.data_spec}' (ID: {data_spec_int}), sim_time={args.sim_time:.2f}s, " + f"max_iters={args.max_iters}, output_freq={args.output_freq}, output_dir='{args.output_dir}'" + ) if args.output_freq > 0: if not os.path.exists(args.output_dir): @@ -515,7 +753,9 @@ def main_charm_wrapper(charm_args_list): os.makedirs(args.output_dir) print(f"Created output directory: {args.output_dir}") except FileExistsError: - print(f"Output directory already exists or was just created: {args.output_dir}") + print( + f"Output directory already exists or was just created: {args.output_dir}" + ) else: print(f"Output directory already exists: {args.output_dir}") @@ -523,48 +763,77 @@ def main_charm_wrapper(charm_args_list): grid_dz = zlen / args.nz_glob initial_dt = min(grid_dx, grid_dz) / max_speed * cfl initial_etime = 0.0 - + num_chares_x = args.num_chares_x if num_chares_x > args.nx_glob: if charm.myPe() == 0: - print(f"Warning: num_chares_x ({num_chares_x}) > nx_glob ({args.nx_glob}). Setting num_chares_x = nx_glob.") + print( + f"Warning: num_chares_x ({num_chares_x}) > nx_glob ({args.nx_glob}). Setting num_chares_x = nx_glob." + ) num_chares_x = args.nx_glob args.num_chares_x = num_chares_x num_chares_z = args.num_chares_z if num_chares_z > args.nz_glob: if charm.myPe() == 0: - print(f"Warning: num_chares_z ({num_chares_z}) > nz_glob ({args.nz_glob}). Setting num_chares_z = nz_glob.") + print( + f"Warning: num_chares_z ({num_chares_z}) > nz_glob ({args.nz_glob}). Setting num_chares_z = nz_glob." + ) num_chares_z = args.nz_glob args.num_chares_z = num_chares_z - chare_constructor_args = (vars(args), num_chares_x, num_chares_z, args.nx_glob, args.nz_glob, data_spec_int, initial_dt, initial_etime) - - chares = Array(MiniWeatherChare, dims=(num_chares_x, num_chares_z), args=[chare_constructor_args]) - + chare_constructor_args = ( + vars(args), + num_chares_x, + num_chares_z, + args.nx_glob, + args.nz_glob, + data_spec_int, + initial_dt, + initial_etime, + ) + + chares = Array( + MiniWeatherChare, + dims=(num_chares_x, num_chares_z), + args=[chare_constructor_args], + ) + setup_completion_future = Future() - + for i in range(num_chares_x): for j in range(num_chares_z): - local_nx, i_beg_global = calculate_domain_decomposition_x(i, num_chares_x, args.nx_glob) - local_nz, k_beg_global = calculate_domain_decomposition_z(j, num_chares_z, args.nz_glob) - chares[i, j].setup_chare_domain(local_nx, i_beg_global, local_nz, k_beg_global, setup_completion_future) - + local_nx, i_beg_global = calculate_domain_decomposition_x( + i, num_chares_x, args.nx_glob + ) + local_nz, k_beg_global = calculate_domain_decomposition_z( + j, num_chares_z, args.nz_glob + ) + chares[i, j].setup_chare_domain( + local_nx, i_beg_global, local_nz, k_beg_global, setup_completion_future + ) + initial_reductions_sum = setup_completion_future.get() mass0_sum = initial_reductions_sum[0] te0_sum = initial_reductions_sum[1] if charm.myPe() == 0: - print(f"Initial Global Mass: {mass0_sum:.6e}, Initial Global Total Energy: {te0_sum:.6e}") - print("\nCUDA device array setup and initial reductions complete for all chares.") - print(f"Starting main simulation loop up to sim_time: {args.sim_time:.2f}s or max_iters: {args.max_iters}") + print( + f"Initial Global Mass: {mass0_sum:.6e}, Initial Global Total Energy: {te0_sum:.6e}" + ) + print( + "\nCUDA device array setup and initial reductions complete for all chares." + ) + print( + f"Starting main simulation loop up to sim_time: {args.sim_time:.2f}s or max_iters: {args.max_iters}" + ) main_loop_done_future = Future() chares.start_main_loop(main_loop_done_future) gathered_results = main_loop_done_future.get() - + total_final_mass = sum(res[0] for res in gathered_results) total_final_te = sum(res[1] for res in gathered_results) max_etime = 0.0 @@ -573,24 +842,28 @@ def main_charm_wrapper(charm_args_list): max_etime = max(res[2] for res in gathered_results) max_niters = max(int(res[3]) for res in gathered_results) - if charm.myPe() == 0: main_loop_wall_time = -1 print(f"\nAll chares finished main simulation loop (max_iters {max_niters}).") print(f"Max final simulation time reached: {max_etime:.4f}s") - print(f"Final Global Mass: {total_final_mass:.6e}, Final Global Total Energy: {total_final_te:.6e}") + print( + f"Final Global Mass: {total_final_mass:.6e}, Final Global Total Energy: {total_final_te:.6e}" + ) if abs(mass0_sum) > 1e-12: - print(f"Relative mass change: {(total_final_mass - mass0_sum) / mass0_sum:.6e}") + print( + f"Relative mass change: {(total_final_mass - mass0_sum) / mass0_sum:.6e}" + ) else: - print(f"Relative mass change: (initial mass was near zero)") + print("Relative mass change: (initial mass was near zero)") if abs(te0_sum) > 1e-12: print(f"Relative TE change: {(total_final_te - te0_sum) / te0_sum:.6e}") else: - print(f"Relative TE change: (initial TE was near zero)") + print("Relative TE change: (initial TE was near zero)") print("\nMiniWeather Charm4Py Numba CUDA simulation finished.") - + charm.exit() -charm.start(main_charm_wrapper) \ No newline at end of file + +charm.start(main_charm_wrapper) diff --git a/examples/mnist/mnist-mpi4py.py b/examples/mnist/mnist-mpi4py.py index c3d1902a..f6d7d331 100644 --- a/examples/mnist/mnist-mpi4py.py +++ b/examples/mnist/mnist-mpi4py.py @@ -9,8 +9,6 @@ import math import random import time -import sys -from torch.autograd import Variable from torchvision import datasets, transforms from mpi4py import MPI import numpy as np @@ -19,6 +17,7 @@ rank = comm.Get_rank() nprocs = comm.Get_size() + # Dataset partitioning helper class Partition(object): @@ -33,6 +32,7 @@ def __getitem__(self, index): data_idx = self.index[index] return self.data[data_idx] + class DataPartitioner(object): def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234): @@ -52,6 +52,7 @@ def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234): def use(self, partition): return Partition(self.data, self.partitions[partition]) + # Neural network architecture class Net(nn.Module): @@ -72,6 +73,7 @@ def forward(self, x): x = self.fc2(x) return F.log_softmax(x, dim=1) + # Worker object (1 per MPI rank) class Worker(object): @@ -84,19 +86,20 @@ def __init__(self, num_workers, epochs): # Partitioning MNIST dataset def partition_dataset(self): - dataset = datasets.MNIST('./data', train=True, download=True, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) + dataset = datasets.MNIST( + "./data", + train=True, + download=True, + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), + ) size = self.num_workers bsz = int(128 / float(size)) # my batch size partition_sizes = [1.0 / size for _ in range(size)] partition = DataPartitioner(dataset, partition_sizes) partition = partition.use(rank) - train_set = torch.utils.data.DataLoader(partition, - batch_size=bsz, - shuffle=True) + train_set = torch.utils.data.DataLoader(partition, batch_size=bsz, shuffle=True) return train_set, bsz # Distributed SGD @@ -120,17 +123,22 @@ def run(self, device): loss.backward() self.average_gradients(self.model, device) self.optimizer.step() - print(f'Rank {rank:4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}') + print( + f"Rank {rank:4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}" + ) self.epoch += 1 - print(f'Rank {rank:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}') + print( + f"Rank {rank:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}" + ) agg_time_arr = np.array([self.agg_time]) agg_time_all_arr = np.array([0.0]) comm.Allreduce(agg_time_arr, agg_time_all_arr, op=MPI.SUM) self.agg_time_all = agg_time_all_arr[0] if rank == 0: - print(f'Rank {rank:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}') - + print( + f"Rank {rank:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}" + ) # Gradient averaging def average_gradients(self, model, device): @@ -149,15 +157,17 @@ def average_gradients(self, model, device): # Restore original shape of gradient data param.grad.data = torch.from_numpy(recv_data).to(device) - param.grad.data = param.grad.data.reshape(data_shape) / float(self.num_workers) - + param.grad.data = param.grad.data.reshape(data_shape) / float( + self.num_workers + ) + def main(): # Initialize PyTorch on all PEs num_threads = 1 torch.set_num_threads(num_threads) torch.manual_seed(1234) - print(f'MPI rank {rank} initialized PyTorch with {num_threads} threads') + print(f"MPI rank {rank} initialized PyTorch with {num_threads} threads") if torch.cuda.is_available(): # if multiple devices are available (running with mpirun, not srun), should assign round-robin @@ -165,18 +175,21 @@ def main(): device = torch.device("cuda:" + str(dev_id)) else: device = torch.device("cpu") - + # Create workers and start training epochs = 6 workers = Worker(nprocs, epochs) t0 = time.time() - print(f'Starting MNIST dataset training with {nprocs} MPI processes for {epochs} epochs on device {device}') + print( + f"Starting MNIST dataset training with {nprocs} MPI processes for {epochs} epochs on device {device}" + ) workers.run(device) comm.Barrier() # Training complete if rank == 0: - print(f'Done. Elapsed time: {(time.time() - t0):9.3f} s') + print(f"Done. Elapsed time: {(time.time() - t0):9.3f} s") + main() diff --git a/examples/mnist/mnist.py b/examples/mnist/mnist.py index bfdb158d..3938f19e 100644 --- a/examples/mnist/mnist.py +++ b/examples/mnist/mnist.py @@ -11,13 +11,12 @@ import random import time import sys -from torch.autograd import Variable from torchvision import datasets, transforms from charm4py import charm, Chare, Group, Array, threaded, Reducer -import numpy as np # Add LB command line arguments -sys.argv += ['+LBOff', '+LBCommOff', '+LBObjOnly'] +sys.argv += ["+LBOff", "+LBCommOff", "+LBObjOnly"] + # Dataset partitioning helper class Partition(object): @@ -33,6 +32,7 @@ def __getitem__(self, index): data_idx = self.index[index] return self.data[data_idx] + class DataPartitioner(object): def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234): @@ -52,6 +52,7 @@ def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234): def use(self, partition): return Partition(self.data, self.partitions[partition]) + # Neural network architecture class Net(nn.Module): @@ -72,14 +73,15 @@ def forward(self, x): x = self.fc2(x) return F.log_softmax(x, dim=1) + # Initialize PyTorch on each PE class TorchInit(Chare): def init(self, num_threads): torch.set_num_threads(num_threads) torch.manual_seed(1234) - - + + # Chare array class Worker(Chare): @@ -97,25 +99,23 @@ def __init__(self, num_workers, epochs, lb_epochs): else: # is group element self.myrank = self.thisIndex - - - # Partitioning MNIST dataset def partition_dataset(self): - dataset = datasets.MNIST('./data', train=True, download=True, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])) + dataset = datasets.MNIST( + "./data", + train=True, + download=True, + transform=transforms.Compose( + [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))] + ), + ) size = self.num_workers bsz = int(128 / float(size)) # my batch size partition_sizes = [1.0 / size for _ in range(size)] partition = DataPartitioner(dataset, partition_sizes) partition = partition.use(self.myrank) - train_set = torch.utils.data.DataLoader(partition, - batch_size=bsz, - shuffle=True) + train_set = torch.utils.data.DataLoader(partition, batch_size=bsz, shuffle=True) return train_set, bsz # Distributed SGD @@ -125,9 +125,9 @@ def run(self, done_future=None): # if multiple devices are available (running with charmrun, not srun), should assign round-robin device_index = charm.myPe() % torch.cuda.device_count() device = torch.device("cuda:" + str(device_index)) - else: + else: device = torch.device("cpu") - + if done_future is not None: # Starting a new run self.done_future = done_future @@ -136,7 +136,7 @@ def run(self, done_future=None): self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.5) self.num_batches = math.ceil(len(self.train_set.dataset) / float(bsz)) self.epoch = 0 - + while self.epoch < self.epochs: if self.epoch == 0: charm.LBTurnInstrumentOn() @@ -151,17 +151,23 @@ def run(self, done_future=None): loss.backward() self.average_gradients(self.model, device) self.optimizer.step() - print(f'Chare {self.thisIndex[0]:4d} | PE {charm.myPe():4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}') + print( + f"Chare {self.thisIndex[0]:4d} | PE {charm.myPe():4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}" + ) self.epoch += 1 if (self.lb_epochs > 0) and (self.epoch % self.lb_epochs == 0): # Start load balancing self.AtSync() return - print(f'Chare {self.thisIndex[0]:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}') + print( + f"Chare {self.thisIndex[0]:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}" + ) self.agg_time_all = self.allreduce(self.agg_time, Reducer.sum).get() if self.myrank == 0: - print(f'Chare {self.thisIndex[0]:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}') + print( + f"Chare {self.thisIndex[0]:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}" + ) self.contribute(None, None, self.done_future) # Gradient averaging @@ -169,7 +175,7 @@ def average_gradients(self, model, device): for param in model.parameters(): # send param to cpu param.grad.data = param.grad.data.cpu() - + # Flatten gradient data data_shape = param.grad.data.shape reshaped_data = param.grad.data.reshape(-1) @@ -179,38 +185,41 @@ def average_gradients(self, model, device): agg_data = self.allreduce(reshaped_data, Reducer.sum).get() self.agg_time += time.time() - start_time self.time_cnt += 1 - + # convert numpy array to torch tensor agg_data = torch.from_numpy(agg_data) - + # Send to device and restore original shape of gradient data param.grad.data = agg_data.to(device) - param.grad.data = param.grad.data.reshape(data_shape) / float(self.num_workers) - - + param.grad.data = param.grad.data.reshape(data_shape) / float( + self.num_workers + ) # Return method from load balancing def resumeFromSync(self): self.thisProxy[self.thisIndex].run() + def main(args): # Initialize PyTorch on all PEs Group(TorchInit).init(1, ret=True).get() - # Create chare array and start training num_workers = charm.numPes() epochs = 6 lb_epochs = 0 - workers = Array(Worker, num_workers, args=[num_workers, epochs, lb_epochs], useAtSync=True) + workers = Array( + Worker, num_workers, args=[num_workers, epochs, lb_epochs], useAtSync=True + ) t0 = time.time() done = charm.createFuture() - + workers.run(done) done.get() # Training complete - print(f'Done. Elapsed time: {(time.time() - t0):9.3f} s') + print(f"Done. Elapsed time: {(time.time() - t0):9.3f} s") charm.exit() + charm.start(main) diff --git a/examples/multi-module/goodbye.py b/examples/multi-module/goodbye.py index 00c63769..eefbecc3 100644 --- a/examples/multi-module/goodbye.py +++ b/examples/multi-module/goodbye.py @@ -9,7 +9,7 @@ class Goodbye(Chare): def SayGoodbye(self): if charm.myPe() < 10: - print('Goodbye from PE', charm.myPe()) + print("Goodbye from PE", charm.myPe()) # goodbye chares do an empty reduction. after the reduction completes, # the 'done' method of the mainchare will be called. # mainProxy is a global of this module, set previously from the mainchare diff --git a/examples/multi-module/hello.py b/examples/multi-module/hello.py index d183564c..b22ee711 100644 --- a/examples/multi-module/hello.py +++ b/examples/multi-module/hello.py @@ -10,7 +10,7 @@ class Hello(Chare): def SayHi(self): if charm.myPe() < 10: - print('Hello from PE', charm.myPe(), 'on', time.strftime('%c')) + print("Hello from PE", charm.myPe(), "on", time.strftime("%c")) # call SayGoodbye method of the goodbye chare on my PE, bye_chares is # a global variable of this module, set previously from the mainchare bye_chares[charm.myPe()].SayGoodbye() diff --git a/examples/multi-module/main.py b/examples/multi-module/main.py index 360ccc48..2de0be1a 100644 --- a/examples/multi-module/main.py +++ b/examples/multi-module/main.py @@ -11,11 +11,13 @@ def __init__(self, args): # create Group of chares of type goodbye.Goodbye bye_chares = Group(goodbye.Goodbye) # add bye_chares proxy to globals of module hello on every process - future1 = charm.thisProxy.updateGlobals({'bye_chares': bye_chares}, - module_name='hello', awaitable=True) + future1 = charm.thisProxy.updateGlobals( + {"bye_chares": bye_chares}, module_name="hello", awaitable=True + ) # add mainchare proxy to globals of module goodbye on every process - future2 = charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy}, - module_name='goodbye', awaitable=True) + future2 = charm.thisProxy.updateGlobals( + {"mainProxy": self.thisProxy}, module_name="goodbye", awaitable=True + ) charm.wait((future1, future2)) # broadcast a message to the hello chares hello_chares.SayHi() diff --git a/examples/nqueen/nqueen-numba.py b/examples/nqueen/nqueen-numba.py index 66c4a82c..2629ab55 100644 --- a/examples/nqueen/nqueen-numba.py +++ b/examples/nqueen/nqueen-numba.py @@ -46,7 +46,7 @@ class Util(Chare): def compile(self): # trigger compilation by running the function with dummy data (but correct types) - queen_seq(NUM_ROWS-1, numpy.full(NUM_ROWS, -1, dtype=numpy.int8)) + queen_seq(NUM_ROWS - 1, numpy.full(NUM_ROWS, -1, dtype=numpy.int8)) def getSolutionCount(self): return solution_count @@ -62,14 +62,14 @@ def main(args): else: GRAINSIZE = max(1, NUM_ROWS - 2) - print('\nUsage: nqueen [numqueens] [grainsize]') - print('Number of queens is', NUM_ROWS, ', grainsize is', GRAINSIZE) + print("\nUsage: nqueen [numqueens] [grainsize]") + print("Number of queens is", NUM_ROWS, ", grainsize is", GRAINSIZE) # set NUM_ROWS and GRAINSIZE as global variables on every PE global_data = {} - global_data['NUM_ROWS'] = NUM_ROWS - global_data['GRAINSIZE'] = GRAINSIZE - global_data['solution_count'] = 0 # to count number of solutions found on each PE + global_data["NUM_ROWS"] = NUM_ROWS + global_data["GRAINSIZE"] = GRAINSIZE + global_data["solution_count"] = 0 # to count number of solutions found on each PE charm.thisProxy.updateGlobals(global_data, awaitable=True).get() # compile numba functions on every PE before starting, to get @@ -85,7 +85,15 @@ def main(args): charm.waitQD() elapsed = time() - startTime numSolutions = sum(util.getSolutionCount(ret=True).get()) - print('There are', numSolutions, 'solutions to', NUM_ROWS, 'queens. Time taken:', round(elapsed, 3), 'secs') + print( + "There are", + numSolutions, + "solutions to", + NUM_ROWS, + "queens. Time taken:", + round(elapsed, 3), + "secs", + ) exit() diff --git a/examples/nqueen/nqueen.py b/examples/nqueen/nqueen.py index 64a1cf75..2c4856b3 100644 --- a/examples/nqueen/nqueen.py +++ b/examples/nqueen/nqueen.py @@ -50,25 +50,33 @@ def main(args): else: GRAINSIZE = max(1, NUM_ROWS - 2) - print('\nUsage: nqueen [numqueens] [grainsize]') - print('Number of queens is', NUM_ROWS, ', grainsize is', GRAINSIZE) + print("\nUsage: nqueen [numqueens] [grainsize]") + print("Number of queens is", NUM_ROWS, ", grainsize is", GRAINSIZE) # set NUM_ROWS and GRAINSIZE as global variables on every PE global_data = {} - global_data['NUM_ROWS'] = NUM_ROWS - global_data['GRAINSIZE'] = GRAINSIZE - global_data['solution_count'] = 0 # to count number of solutions found on each PE + global_data["NUM_ROWS"] = NUM_ROWS + global_data["GRAINSIZE"] = GRAINSIZE + global_data["solution_count"] = 0 # to count number of solutions found on each PE charm.thisProxy.updateGlobals(global_data, awaitable=True).get() startTime = time() # initialize empty solution, solution holds the column number where a queen is placed, for each row - solution = array.array('b', [-1] * NUM_ROWS) + solution = array.array("b", [-1] * NUM_ROWS) queen(0, solution) # wait until there is no work being done on any PE (quiescence detection) charm.waitQD() elapsed = time() - startTime numSolutions = sum(Group(Util).getSolutionCount(ret=True).get()) - print('There are', numSolutions, 'solutions to', NUM_ROWS, 'queens. Time taken:', round(elapsed, 3), 'secs') + print( + "There are", + numSolutions, + "solutions to", + NUM_ROWS, + "queens. Time taken:", + round(elapsed, 3), + "secs", + ) exit() diff --git a/examples/particle/particle.py b/examples/particle/particle.py index a6b5b432..7e529872 100644 --- a/examples/particle/particle.py +++ b/examples/particle/particle.py @@ -9,7 +9,7 @@ # more info about load balancing command-line options here: # https://charm.readthedocs.io/en/latest/charm++/manual.html#compiler-and-runtime-options-to-use-load-balancing-module -sys.argv += ['+LBCommOff', '+LBObjOnly'] +sys.argv += ["+LBCommOff", "+LBObjOnly"] NUM_ITER = 100 SIM_BOX_SIZE = 100.0 @@ -21,9 +21,9 @@ def __init__(self, x, y): self.coords = [x, y] # coordinate of this particle in the 2D space def perturb(self, cellsize): - """ randomly move the particle """ + """randomly move the particle""" for i in range(len(self.coords)): - self.coords[i] += random.uniform(-cellsize[i]*0.1, cellsize[i]*0.1) + self.coords[i] += random.uniform(-cellsize[i] * 0.1, cellsize[i] * 0.1) # if particle goes out of bounds of the simulation space, appear on the other side if self.coords[i] > SIM_BOX_SIZE: self.coords[i] -= SIM_BOX_SIZE @@ -42,25 +42,42 @@ def __init__(self, array_dims, max_particles_per_cell_start, sim_done_future): # create particles in this cell, in random positions self.particles = [] - N = self.getInitialNumParticles(array_dims, max_particles_per_cell_start, cellsize) - lo_x = self.thisIndex[0] * cellsize[0] # x coordinate of lower left corner of my cell - lo_y = self.thisIndex[1] * cellsize[1] # y coordinate of lower left corner of my cell + N = self.getInitialNumParticles( + array_dims, max_particles_per_cell_start, cellsize + ) + lo_x = ( + self.thisIndex[0] * cellsize[0] + ) # x coordinate of lower left corner of my cell + lo_y = ( + self.thisIndex[1] * cellsize[1] + ) # y coordinate of lower left corner of my cell for _ in range(N): - self.particles.append(Particle(random.uniform(lo_x, lo_x + cellsize[0] - 0.001), - random.uniform(lo_y, lo_y + cellsize[1] - 0.001))) + self.particles.append( + Particle( + random.uniform(lo_x, lo_x + cellsize[0] - 0.001), + random.uniform(lo_y, lo_y + cellsize[1] - 0.001), + ) + ) # obtain list of my neighbors in the 2D cell grid, and establish a Channel with each self.neighbor_indexes = self.getNbIndexes(array_dims) - self.neighbors = [Channel(self, remote=self.thisProxy[idx]) for idx in self.neighbor_indexes] + self.neighbors = [ + Channel(self, remote=self.thisProxy[idx]) for idx in self.neighbor_indexes + ] def getInitialNumParticles(self, dims, max_particles, cellsize): # return the number of particles to create on this cell at the start of # the simulation. The cells that are closer to the grid center start # with max_particles particles, the rest start with 0 grid_center = (SIM_BOX_SIZE / 2, SIM_BOX_SIZE / 2) - cell_center = (self.thisIndex[0] * cellsize[0] + cellsize[0] / 2, - self.thisIndex[1] * cellsize[1] + cellsize[1] / 2) - dist = math.sqrt((cell_center[0] - grid_center[0])**2 + (cell_center[1] - grid_center[1])**2) + cell_center = ( + self.thisIndex[0] * cellsize[0] + cellsize[0] / 2, + self.thisIndex[1] * cellsize[1] + cellsize[1] / 2, + ) + dist = math.sqrt( + (cell_center[0] - grid_center[0]) ** 2 + + (cell_center[1] - grid_center[1]) ** 2 + ) if dist <= SIM_BOX_SIZE / 5: return max_particles else: @@ -70,8 +87,8 @@ def getNbIndexes(self, arrayDims): # return indexes of neighboring cells (N,NE,E,SE,S,SW,W,NW) with wrap around nbs = set() x, y = self.thisIndex - nb_x_coords = [(x-1)%arrayDims[0], x, (x+1)%arrayDims[0]] - nb_y_coords = [(y-1)%arrayDims[1], y, (y+1)%arrayDims[1]] + nb_x_coords = [(x - 1) % arrayDims[0], x, (x + 1) % arrayDims[0]] + nb_y_coords = [(y - 1) % arrayDims[1], y, (y + 1) % arrayDims[1]] for nb_x in nb_x_coords: for nb_y in nb_y_coords: if (nb_x, nb_y) != self.thisIndex: @@ -83,7 +100,7 @@ def getNumParticles(self): @coro def run(self): - """ this is the simulation loop of each cell """ + """this is the simulation loop of each cell""" cellsize = self.cellsize while self.iteration < NUM_ITER: # in each iteration, this cell's particles move randomly. some @@ -98,12 +115,17 @@ def run(self): # directly into a message) # we are sending an array of particle data to each neighbor - outgoingParticles = {nb_idx: array.array('d') for nb_idx in self.neighbor_indexes} + outgoingParticles = { + nb_idx: array.array("d") for nb_idx in self.neighbor_indexes + } i = 0 while i < len(self.particles): p = self.particles[i] p.perturb(cellsize) - dest_cell = (int(p.coords[0] / cellsize[0]), int(p.coords[1] / cellsize[1])) + dest_cell = ( + int(p.coords[0] / cellsize[0]), + int(p.coords[1] / cellsize[1]), + ) if dest_cell != self.thisIndex: # this particle is moving to a neighboring cell outgoingParticles[dest_cell].extend(p.coords) @@ -120,13 +142,17 @@ def run(self): # yields channels as they become ready (have data to receive) for channel in charm.iwait(self.neighbors): incoming = channel.recv() - self.particles += [Particle(float(incoming[i]), - float(incoming[i+1])) for i in range(0, len(incoming), 2)] + self.particles += [ + Particle(float(incoming[i]), float(incoming[i + 1])) + for i in range(0, len(incoming), 2) + ] if self.iteration % 10 == 0: # reduction to report the current max particles per cell. # this call is asynchronous and doesn't block me - self.reduce(self.thisProxy[(0,0)].reportMax, len(self.particles), Reducer.max) + self.reduce( + self.thisProxy[(0, 0)].reportMax, len(self.particles), Reducer.max + ) if self.iteration % 20 == 0: # tell Charm that this cell is ready for load balancing. @@ -151,11 +177,13 @@ def resumeFromSync(self): self.thisProxy[self.thisIndex].run() def reportMax(self, max_particles): - print('Max particles per cell= ' + str(max_particles)) + print("Max particles per cell= " + str(max_particles)) def main(args): - print('\nUsage: particle.py [num_chares_x num_chares_y] [max_particles_per_cell_start]') + print( + "\nUsage: particle.py [num_chares_x num_chares_y] [max_particles_per_cell_start]" + ) if len(args) >= 3: array_dims = (int(args[1]), int(args[2])) else: @@ -165,23 +193,30 @@ def main(args): else: max_particles_per_cell_start = 10000 - print('\nCell array size:', array_dims[0], 'x', array_dims[1], 'cells') + print("\nCell array size:", array_dims[0], "x", array_dims[1], "cells") # create 2D Cell chare array and start simulation sim_done = Future() - cells = Array(Cell, array_dims, - args=[array_dims, max_particles_per_cell_start, sim_done], - useAtSync=True) + cells = Array( + Cell, + array_dims, + args=[array_dims, max_particles_per_cell_start, sim_done], + useAtSync=True, + ) num_particles_per_cell = cells.getNumParticles(ret=True).get() - print('Total particles created:', sum(num_particles_per_cell)) - print('Initial conditions:\n\tmin particles per cell:', min(num_particles_per_cell), - '\n\tmax particles per cell:', max(num_particles_per_cell)) - print('\nStarting simulation') + print("Total particles created:", sum(num_particles_per_cell)) + print( + "Initial conditions:\n\tmin particles per cell:", + min(num_particles_per_cell), + "\n\tmax particles per cell:", + max(num_particles_per_cell), + ) + print("\nStarting simulation") t0 = time.time() cells.run() # this is a broadcast # wait for the simulation to finish sim_done.get() - print('Particle simulation done, elapsed time=', round(time.time() - t0, 3), 'secs') + print("Particle simulation done, elapsed time=", round(time.time() - t0, 3), "secs") exit() diff --git a/examples/pool/pool_fibonacci.py b/examples/pool/pool_fibonacci.py index f0153439..32398f23 100644 --- a/examples/pool/pool_fibonacci.py +++ b/examples/pool/pool_fibonacci.py @@ -2,14 +2,17 @@ # Recursive Parallel Fibonacci + @coro def fib(n): if n < 2: return n - return sum(charm.pool.map(fib, [n-1, n-2])) + return sum(charm.pool.map(fib, [n - 1, n - 2])) + def main(args): - print('fibonacci(13)=', fib(13)) + print("fibonacci(13)=", fib(13)) exit() + charm.start(main) diff --git a/examples/pool/pool_simple.py b/examples/pool/pool_simple.py index 0adc7dd4..2914530a 100644 --- a/examples/pool/pool_simple.py +++ b/examples/pool/pool_simple.py @@ -4,9 +4,11 @@ def square(x): return x**2 + def twice(x): return 2 * x + def main(args): ray.init() results = charm.pool.map_async(square, [4], chunksize=1, multi_future=True) @@ -15,7 +17,8 @@ def main(args): for x in results_twice: print(x.get()) - #print(result) # prints [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] + # print(result) # prints [0, 1, 4, 9, 16, 25, 36, 49, 64, 81] exit() + charm.start(main) diff --git a/examples/ray/batch_prediction.py b/examples/ray/batch_prediction.py index 1869fc41..61387a96 100644 --- a/examples/ray/batch_prediction.py +++ b/examples/ray/batch_prediction.py @@ -26,13 +26,14 @@ def make_prediction(model, shard_path): # Here we just return the size about the result in this example. return len(result) + def main(args): ray.init() # 12 files, one for each remote task. input_files = [ - f"s3://anonymous@air-example-data/ursa-labs-taxi-data/downsampled_2009_full_year_data.parquet" - f"/fe41422b01c04169af2a65a83b753e0f_{i:06d}.parquet" - for i in range(12) + f"s3://anonymous@air-example-data/ursa-labs-taxi-data/downsampled_2009_full_year_data.parquet" + f"/fe41422b01c04169af2a65a83b753e0f_{i:06d}.parquet" + for i in range(12) ] # ray.put() the model just once to local object store, and then pass the @@ -58,5 +59,6 @@ def main(args): exit() -if __name__ == '__main__': - charm.start(main) \ No newline at end of file + +if __name__ == "__main__": + charm.start(main) diff --git a/examples/ray/fib.py b/examples/ray/fib.py index 84f3ef22..65dce862 100644 --- a/examples/ray/fib.py +++ b/examples/ray/fib.py @@ -12,6 +12,7 @@ # ray version + @ray.remote def fib(n): if n < 2: @@ -21,22 +22,23 @@ def fib(n): # (tasks can execute on any PE). map will block here for the result of # fib(n-1) and fib(n-2), which is why we mark fib as a coroutine # return sum(charm.pool.map(fib, [n-1, n-2])) - result1 = fib.remote(n-1) - result2 = fib.remote(n-2) - return ray.get(result1)+ray.get(result2) + result1 = fib.remote(n - 1) + result2 = fib.remote(n - 2) + return ray.get(result1) + ray.get(result2) def main(args): ray.init() - print('\nUsage: fib.py [n]') + print("\nUsage: fib.py [n]") n = 12 if len(args) > 1: n = int(args[1]) - print('Calculating fibonacci of N=' + str(n)) + print("Calculating fibonacci of N=" + str(n)) t0 = time.time() result = fib.remote(n) - print('Result is', ray.get(result), 'elapsed=', round(time.time() - t0, 3)) - #charm.thisProxy.stop_profiling() + print("Result is", ray.get(result), "elapsed=", round(time.time() - t0, 3)) + # charm.thisProxy.stop_profiling() exit() + charm.start(main) diff --git a/examples/ray/mandelbrot/mandelbrot.py b/examples/ray/mandelbrot/mandelbrot.py index 0fb52923..d514a11f 100644 --- a/examples/ray/mandelbrot/mandelbrot.py +++ b/examples/ray/mandelbrot/mandelbrot.py @@ -3,6 +3,7 @@ import matplotlib.pyplot as plt import os + # Compute whether a point is in the Mandelbrot set def mandelbrot_fast(re, im, max_iter): zr = zi = 0.0 @@ -15,6 +16,7 @@ def mandelbrot_fast(re, im, max_iter): zr = zr2 - zi2 + re return max_iter + # Remote task to compute a tile @ray.remote def compute_tile(x_start, x_end, y_start, y_end, width, height, max_iter): @@ -26,13 +28,18 @@ def compute_tile(x_start, x_end, y_start, y_end, width, height, max_iter): tile[y - y_start, x - x_start] = mandelbrot_fast(re, im, max_iter) return tile -def generate_mandelbrot_image_optimized(width=12000, height=8000, max_iter=200, tile_size=1000, max_pending=1000): + +def generate_mandelbrot_image_optimized( + width=12000, height=8000, max_iter=200, tile_size=1000, max_pending=1000 +): # Pre-create the empty file with the correct size total_bytes = 2 * width * height # 2 bytes per pixel (uint16) with open("output/mandelbrot_large.dat", "wb") as f: f.seek(total_bytes - 1) - f.write(b'\0') - result_image = np.memmap("output/mandelbrot_large.dat", dtype=np.uint16, mode='w+', shape=(height, width)) + f.write(b"\0") + result_image = np.memmap( + "output/mandelbrot_large.dat", dtype=np.uint16, mode="w+", shape=(height, width) + ) pending = [] for y in range(0, height, tile_size): @@ -45,11 +52,11 @@ def generate_mandelbrot_image_optimized(width=12000, height=8000, max_iter=200, if len(pending) >= max_pending: (x0, y0), tile = pending.pop(0) tile = ray.get(tile) - result_image[y0:y0+tile.shape[0], x0:x0+tile.shape[1]] = tile + result_image[y0 : y0 + tile.shape[0], x0 : x0 + tile.shape[1]] = tile for (x0, y0), tile_ref in pending: tile = ray.get(tile_ref) - result_image[y0:y0+tile.shape[0], x0:x0+tile.shape[1]] = tile + result_image[y0 : y0 + tile.shape[0], x0 : x0 + tile.shape[1]] = tile return result_image @@ -59,13 +66,19 @@ def main(args): os.makedirs(os.path.dirname(output_path), exist_ok=True) ray.init() # Run the benchmark - image = generate_mandelbrot_image_optimized(width=int(args[1]), height=int(args[2]), max_iter=int(args[3]), tile_size=int(args[4])) + image = generate_mandelbrot_image_optimized( + width=int(args[1]), + height=int(args[2]), + max_iter=int(args[3]), + tile_size=int(args[4]), + ) # Optional: show the result - plt.imshow(image, cmap='hot') + plt.imshow(image, cmap="hot") plt.title("Mandelbrot Set (Ray)") - plt.axis('off') - plt.savefig("mandelbrot_ray.png", dpi=300, bbox_inches='tight') - os.remove('output/mandelbrot_large.dat') + plt.axis("off") + plt.savefig("mandelbrot_ray.png", dpi=300, bbox_inches="tight") + os.remove("output/mandelbrot_large.dat") charm.exit() + charm.start(main) diff --git a/examples/ray/model_selection.py b/examples/ray/model_selection.py index 384d54d4..1279e3ba 100644 --- a/examples/ray/model_selection.py +++ b/examples/ray/model_selection.py @@ -8,7 +8,7 @@ import torch.optim as optim from torchvision import datasets, transforms -#import ray +# import ray from charm4py import charm, ray @@ -44,6 +44,7 @@ def get_data_loaders(batch_size): ) return train_loader, test_loader + class ConvNet(nn.Module): """Simple two layer Convolutional Neural Network.""" @@ -96,6 +97,7 @@ def test(model, test_loader, device=torch.device("cpu")): return correct / total + @ray.remote def evaluate_hyperparameters(config): model = ConvNet() @@ -106,6 +108,7 @@ def evaluate_hyperparameters(config): train(model, optimizer, train_loader) return test(model, test_loader) + def main(args): ray.init() @@ -138,18 +141,18 @@ def main(args): hyperparameters = hyperparameters_mapping[result_id] accuracy = ray.get(result_id) - #print( + # print( # """We achieve accuracy {:.3}% with # learning_rate: {:.2} # batch_size: {} # momentum: {:.2} - #""".format( + # """.format( # 100 * accuracy, # hyperparameters["learning_rate"], # hyperparameters["batch_size"], # hyperparameters["momentum"], # ) - #) + # ) if accuracy > best_accuracy: best_hyperparameters = hyperparameters best_accuracy = accuracy @@ -172,4 +175,5 @@ def main(args): exit() + charm.start(main) diff --git a/examples/ray/parameter_server.py b/examples/ray/parameter_server.py index b59d5158..a7a7043f 100644 --- a/examples/ray/parameter_server.py +++ b/examples/ray/parameter_server.py @@ -52,6 +52,7 @@ def evaluate(model, test_loader): correct += (predicted == target).sum().item() return 100.0 * correct / total + class ConvNet(nn.Module): """Small ConvNet for MNIST.""" @@ -84,6 +85,7 @@ def set_gradients(self, gradients): if g is not None: p.grad = torch.from_numpy(g) + @ray.remote class ParameterServer(object): def __init__(self, lr): @@ -102,6 +104,7 @@ def apply_gradients(self, *gradients): def get_weights(self): return self.model.get_weights() + @ray.remote class DataWorker(object): def __init__(self): @@ -138,7 +141,9 @@ def sync_train(args): current_weights = ps.get_weights.remote() for i in range(iterations): - gradients = [worker.compute_gradients.remote(current_weights) for worker in workers] + gradients = [ + worker.compute_gradients.remote(current_weights) for worker in workers + ] # Calculate update after all gradients are available. current_weights = ps.apply_gradients.remote(*gradients) @@ -152,7 +157,8 @@ def sync_train(args): print("Final accuracy is {:.1f}.".format(accuracy)) exit() # Clean up Ray resources and processes before the next example. - #ray.shutdown() + # ray.shutdown() + def async_train(args): ray.init() @@ -191,8 +197,9 @@ def async_train(args): print("Final accuracy is {:.1f}.".format(accuracy)) exit() -if __name__ == '__main__': - if sys.argv[1] == 'sync': + +if __name__ == "__main__": + if sys.argv[1] == "sync": charm.start(sync_train) - elif sys.argv[1] == 'async': + elif sys.argv[1] == "async": charm.start(async_train) diff --git a/examples/ray/simple.py b/examples/ray/simple.py index 800f6b9a..1befb172 100644 --- a/examples/ray/simple.py +++ b/examples/ray/simple.py @@ -1,4 +1,4 @@ -from charm4py import charm, coro, Chare, Array, ray +from charm4py import charm, ray from time import sleep import numpy as np @@ -12,10 +12,17 @@ def add_task(a, b): print("Add task", a, b) return a + b + @ray.remote class Compute(object): def __init__(self, arg): - print('Hello from MyChare instance in processor', charm.myPe(), 'index', self.thisIndex, arg) + print( + "Hello from MyChare instance in processor", + charm.myPe(), + "index", + self.thisIndex, + arg, + ) def add(self, a, b): sleep(2) @@ -27,13 +34,13 @@ def main(args): ray.init() # create 3 instances of MyChare, distributed among cores by the runtime arr = [Compute.remote(i) for i in range(4)] - + obj1 = np.arange(100) obj2 = np.arange(100) a = ray.put(obj1) b = ray.put(obj2) - c = arr[0].add.remote(1, 2) # fut id 0 - d = arr[1].add.remote(3, c) # fut id 1 + c = arr[0].add.remote(1, 2) # fut id 0 + d = arr[1].add.remote(3, c) # fut id 1 e = arr[2].add.remote(2, d) f = arr[3].add.remote(c, 4) g = arr[3].add.remote(a, b) diff --git a/examples/simple/chares.py b/examples/simple/chares.py index 6048ed34..13143de3 100644 --- a/examples/simple/chares.py +++ b/examples/simple/chares.py @@ -4,7 +4,7 @@ class MyChare(Chare): def __init__(self): - print('Hello from MyChare instance in processor', charm.myPe()) + print("Hello from MyChare instance in processor", charm.myPe()) def main(args): diff --git a/examples/simple/hello_world.py b/examples/simple/hello_world.py index 32872ec9..bdea82e3 100644 --- a/examples/simple/hello_world.py +++ b/examples/simple/hello_world.py @@ -4,7 +4,7 @@ class Hello(Chare): def SayHi(self): - print('Hello World from element', self.thisIndex) + print("Hello World from element", self.thisIndex) def main(args): diff --git a/examples/simple/start.py b/examples/simple/start.py index 28af1d7f..333a6bc4 100644 --- a/examples/simple/start.py +++ b/examples/simple/start.py @@ -2,8 +2,8 @@ def main(args): - print('Charm program started on processor', charm.myPe()) - print('Running on', charm.numPes(), 'processors') + print("Charm program started on processor", charm.myPe()) + print("Running on", charm.numPes(), "processors") exit() diff --git a/examples/sssp/sssp.py b/examples/sssp/sssp.py index 7dc854e6..8b7df866 100644 --- a/examples/sssp/sssp.py +++ b/examples/sssp/sssp.py @@ -1,46 +1,60 @@ -from charm4py import charm, Chare, Array, coro, Channel, Future, Reducer +from charm4py import charm, Chare, Array, Future, Reducer import random import time + class SsspChares(Chare): def __init__(self): - self.local_graph = [] #[vertex_index, edge_list, distance] + self.local_graph = [] # [vertex_index, edge_list, distance] self.partition_indices = [] self.start_vertex = 0 self.num_local_vertices = 0 self.my_index = charm.myPe() - + def get_partition(self, edge_list, partition_indices, callback): self.partition_indices = partition_indices self.start_vertex = partition_indices[self.my_index] - self.num_local_vertices = partition_indices[self.my_index + 1] - partition_indices[self.my_index] - self.local_graph = [[self.start_vertex + i, [], float('inf')] for i in range(self.num_local_vertices)] + self.num_local_vertices = ( + partition_indices[self.my_index + 1] - partition_indices[self.my_index] + ) + self.local_graph = [ + [self.start_vertex + i, [], float("inf")] + for i in range(self.num_local_vertices) + ] for i in range(len(edge_list)): - self.local_graph[edge_list[i][0] - self.start_vertex][1].append((edge_list[i][1], edge_list[i][2])) + self.local_graph[edge_list[i][0] - self.start_vertex][1].append( + (edge_list[i][1], edge_list[i][2]) + ) self.reduce(callback, None, Reducer.nop) - + def calculate_destination(self, vertex_index): - for i in range(len(self.partition_indices)-1): - if vertex_index >= self.partition_indices[i] and vertex_index < self.partition_indices[i+1]: + for i in range(len(self.partition_indices) - 1): + if ( + vertex_index >= self.partition_indices[i] + and vertex_index < self.partition_indices[i + 1] + ): return i - return len(self.partition_indices)-1 - + return len(self.partition_indices) - 1 + def update_distance(self, update): - local_index = update[0]-self.start_vertex + local_index = update[0] - self.start_vertex if update[1] < self.local_graph[local_index][2]: - self.local_graph[update[0]-self.start_vertex][2] = update[1] + self.local_graph[update[0] - self.start_vertex][2] = update[1] for i in range(len(self.local_graph[local_index][1])): dest_vertex = self.local_graph[local_index][1][i][0] dest_partition = self.calculate_destination(dest_vertex) - cost = self.local_graph[local_index][2] + self.local_graph[local_index][1][i][1] + cost = ( + self.local_graph[local_index][2] + + self.local_graph[local_index][1][i][1] + ) new_update = (dest_vertex, cost) self.thisProxy[dest_partition].update_distance(new_update) - + def print_results(self, callback): max_local_cost = 0.0 for i in range(len(self.local_graph)): - #print("Final cost of vertex", self.local_graph[i][0], ":", self.local_graph[i][2]) + # print("Final cost of vertex", self.local_graph[i][0], ":", self.local_graph[i][2]) if self.local_graph[i][2] > max_local_cost: max_local_cost = self.local_graph[i][2] self.reduce(callback, max_local_cost, Reducer.max) @@ -50,32 +64,34 @@ class Main(Chare): def __init__(self, args): if len(args) != 5: - print("Wrong number of arguments. Usage: sssp.py ") + print( + "Wrong number of arguments. Usage: sssp.py " + ) exit() - #define parameters + # define parameters self.num_vertices = int(args[1]) self.num_edges = int(args[2]) self.random_seed = int(args[3]) self.source_vertex = int(args[4]) - if self.source_vertex < 0 or self.source_vertex > self.num_vertices-1: + if self.source_vertex < 0 or self.source_vertex > self.num_vertices - 1: print("Source vertex out of range") exit() - #generate edges randomly and sort them by edge source + # generate edges randomly and sort them by edge source begin_generation = time.time() random.seed(self.random_seed) self.edge_list = [] for i in range(self.num_edges): - edge_source = random.randint(0, self.num_vertices-1) - edge_dest = random.randint(0, self.num_vertices-1) - while edge_source==edge_dest: - edge_dest = random.randint(0, self.num_vertices-1) + edge_source = random.randint(0, self.num_vertices - 1) + edge_dest = random.randint(0, self.num_vertices - 1) + while edge_source == edge_dest: + edge_dest = random.randint(0, self.num_vertices - 1) edge_weight = random.random() self.edge_list.append((edge_source, edge_dest, edge_weight)) self.edge_list.sort(key=lambda a: a[0]) - #initiate worker array + # initiate worker array num_partitions = charm.numPes() self.workers = Array(SsspChares, num_partitions) - #split edges by pe + # split edges by pe send_lists = [[] for _ in range(num_partitions)] avg_partition_size = self.num_edges // num_partitions for i in range(len(self.edge_list)): @@ -83,14 +99,20 @@ def __init__(self, args): if partition_num >= num_partitions: partition_num = num_partitions - 1 send_lists[partition_num].append(self.edge_list[i]) - #move edges to keep vertices intact + # move edges to keep vertices intact for i in range(1, len(send_lists)): - if len(send_lists[i-1])!=0 and send_lists[i-1][-1][0]==send_lists[i][0][0]: - last_previous_vertex = send_lists[i-1][-1][0] - while len(send_lists[i]) > 0 and send_lists[i][0][0] == last_previous_vertex: + if ( + len(send_lists[i - 1]) != 0 + and send_lists[i - 1][-1][0] == send_lists[i][0][0] + ): + last_previous_vertex = send_lists[i - 1][-1][0] + while ( + len(send_lists[i]) > 0 + and send_lists[i][0][0] == last_previous_vertex + ): edge_to_move = send_lists[i].pop(0) - send_lists[i-1].append(edge_to_move) - #define partition indices + send_lists[i - 1].append(edge_to_move) + # define partition indices partition_indices = [] for i in range(len(send_lists)): if len(send_lists[i]) > 0: @@ -99,21 +121,24 @@ def __init__(self, args): partition_indices.append(partition_indices[-1]) partition_indices.append(self.num_vertices) generation_length = time.time() - begin_generation - #send information to pes + # send information to pes f = Future() for i in range(num_partitions): self.workers[i].get_partition(send_lists[i], partition_indices, f) f.get() - #find partition of start vertex + # find partition of start vertex source_partition = 0 - for i in range(len(partition_indices)-1): - if self.source_vertex >= partition_indices[i] and self.source_vertex < partition_indices[i+1]: + for i in range(len(partition_indices) - 1): + if ( + self.source_vertex >= partition_indices[i] + and self.source_vertex < partition_indices[i + 1] + ): source_partition = i break begin_algo = time.time() self.workers[source_partition].update_distance((self.source_vertex, 0.0)) charm.waitQD() - algo_length = time.time()-begin_algo + algo_length = time.time() - begin_algo final_stats = Future() self.workers.print_results(final_stats) global_max = final_stats.get() @@ -122,8 +147,5 @@ def __init__(self, args): print("Algorithm runtime:", algo_length) exit() - - - charm.start(Main) diff --git a/examples/wave2d/wave2d.py b/examples/wave2d/wave2d.py index f1e96140..895d3d29 100644 --- a/examples/wave2d/wave2d.py +++ b/examples/wave2d/wave2d.py @@ -1,4 +1,3 @@ - # This program solves the 2-d wave equation over a grid, displaying pretty results. # See README.rst for more information. @@ -8,12 +7,14 @@ import numpy as np import numba import random + try: import tkinter from PIL import Image, ImageTk, ImageDraw except ImportError: import sys - sys.argv += ['--NO-RENDER'] + + sys.argv += ["--NO-RENDER"] IMAGE_WIDTH, IMAGE_HEIGHT = 800, 699 @@ -29,20 +30,26 @@ class Main(Chare): def __init__(self, args): self.RENDER = True try: - args.remove('--NO-RENDER') + args.remove("--NO-RENDER") self.RENDER = False except ValueError: pass - print('\nUsage: wave2d.py [num_iterations] [max_framerate])') + print("\nUsage: wave2d.py [num_iterations] [max_framerate])") global NUM_ITERATIONS, MAX_FRAMERATE if len(args) > 1: NUM_ITERATIONS = int(args[1]) if len(args) > 2: MAX_FRAMERATE = int(args[2]) - print('Running wave2d on', charm.numPes(), 'processors for', NUM_ITERATIONS, 'iterations') - print('Max framerate is', MAX_FRAMERATE, 'frames per second') + print( + "Running wave2d on", + charm.numPes(), + "processors for", + NUM_ITERATIONS, + "iterations", + ) + print("Max framerate is", MAX_FRAMERATE, "frames per second") self.count = 0 # tracks from how many workers I have received a subimage for this iteration programStartTime = frameStartTime = time.time() @@ -54,7 +61,7 @@ def __init__(self, args): if self.RENDER: tk = tkinter.Tk() - self.frame = Image.new('RGB', (IMAGE_WIDTH, IMAGE_HEIGHT)) + self.frame = Image.new("RGB", (IMAGE_WIDTH, IMAGE_HEIGHT)) img = ImageTk.PhotoImage(self.frame) label_image = tkinter.Label(tk, image=img) label_image.pack() @@ -64,14 +71,14 @@ def __init__(self, args): self.frameReady.get() # wait for the next frame if MAX_FRAMERATE > 0: elapsed = time.time() - frameStartTime - if elapsed < 1/MAX_FRAMERATE: + if elapsed < 1 / MAX_FRAMERATE: # enforce framerate - charm.sleep(1/MAX_FRAMERATE - elapsed) + charm.sleep(1 / MAX_FRAMERATE - elapsed) if self.RENDER: - fps = round(1/(time.time() - frameStartTime)) + fps = round(1 / (time.time() - frameStartTime)) # draw frames per second value on image d = ImageDraw.Draw(self.frame) - d.text((10,10), str(fps) + ' fps', fill=(0,0,0,255)) + d.text((10, 10), str(fps) + " fps", fill=(0, 0, 0, 255)) img = ImageTk.PhotoImage(self.frame) label_image.configure(image=img) label_image.image = img @@ -79,19 +86,19 @@ def __init__(self, args): tk.update() # loop simulation every 1000 iterations - reset = (i % 1000 == 0) + reset = i % 1000 == 0 frameStartTime = time.time() array.resume(reset) # tell workers to resume self.frameReady = Future() - print('Program Done!, Total time=', time.time() - programStartTime) + print("Program Done!, Total time=", time.time() - programStartTime) exit() # every worker calls this method to deposit their subimage def depositSubImage(self, data, pos, img_size): self.count += 1 if self.RENDER: - self.frame.paste(Image.frombytes('RGB', img_size, data), box=pos) + self.frame.paste(Image.frombytes("RGB", img_size, data), box=pos) if self.count == CHARE_ARRAY_WIDTH * CHARE_ARRAY_HEIGHT: # received image data from all chares self.count = 0 @@ -103,18 +110,25 @@ class Wave(Chare): def setInitialConditions(self): # setup some initial pressure pertubations for timesteps t-1 and t self.pressure_new = np.zeros((self.myheight, self.mywidth)) # time t+1 - self.pressure = np.zeros((self.myheight, self.mywidth)) # time t + self.pressure = np.zeros((self.myheight, self.mywidth)) # time t self.pressure_old = np.zeros((self.myheight, self.mywidth)) # time t-1 - init_pressure(NUM_INITIAL_PERTURBATIONS, IMAGE_WIDTH, IMAGE_HEIGHT, - self.mywidth, self.myheight, self.thisIndex, - self.pressure, self.pressure_old) + init_pressure( + NUM_INITIAL_PERTURBATIONS, + IMAGE_WIDTH, + IMAGE_HEIGHT, + self.mywidth, + self.myheight, + self.thisIndex, + self.pressure, + self.pressure_old, + ) def resume(self, reset=False): self.resumeFuture(reset) @coro def work(self, mainProxy): - """ this is the main simulation loop for each chare """ + """this is the main simulation loop for each chare""" # size of my rectangular portion of the image self.mywidth = IMAGE_WIDTH // CHARE_ARRAY_WIDTH @@ -124,25 +138,25 @@ def work(self, mainProxy): i = self.thisIndex X, Y = CHARE_ARRAY_WIDTH, CHARE_ARRAY_HEIGHT # establish a Channel with neighbor chares in the 2D grid - left = Channel(self, remote=self.thisProxy[(i[0]-1)%X, i[1]]) - right = Channel(self, remote=self.thisProxy[(i[0]+1)%X, i[1]]) - top = Channel(self, remote=self.thisProxy[i[0], (i[1]-1)%Y]) - bottom = Channel(self, remote=self.thisProxy[i[0], (i[1]+1)%Y]) + left = Channel(self, remote=self.thisProxy[(i[0] - 1) % X, i[1]]) + right = Channel(self, remote=self.thisProxy[(i[0] + 1) % X, i[1]]) + top = Channel(self, remote=self.thisProxy[i[0], (i[1] - 1) % Y]) + bottom = Channel(self, remote=self.thisProxy[i[0], (i[1] + 1) % Y]) width, height = self.mywidth, self.myheight # coordinate where my portion of the image is located sx = self.thisIndex[0] * width sy = self.thisIndex[1] * height # data will store my portion of the image - data = np.zeros(width*height*3, dtype=np.uint8) + data = np.zeros(width * height * 3, dtype=np.uint8) buffers = [None] * 4 # run simulation now while True: - top_edge = self.pressure[[0],:].reshape(width) - bottom_edge = self.pressure[[-1],:].reshape(width) - left_edge = self.pressure[:,[0]].reshape(height) - right_edge = self.pressure[:,[-1]].reshape(height) + top_edge = self.pressure[[0], :].reshape(width) + bottom_edge = self.pressure[[-1], :].reshape(width) + left_edge = self.pressure[:, [0]].reshape(height) + right_edge = self.pressure[:, [-1]].reshape(height) # send ghost values to neighbors left.send(RIGHT, left_edge) @@ -156,12 +170,24 @@ def work(self, mainProxy): side, ghost_values = channel.recv() buffers[side] = ghost_values - check_and_compute(height, width, - buffers[LEFT], buffers[RIGHT], buffers[UP], buffers[DOWN], - self.pressure, self.pressure_old, self.pressure_new) + check_and_compute( + height, + width, + buffers[LEFT], + buffers[RIGHT], + buffers[UP], + buffers[DOWN], + self.pressure, + self.pressure_old, + self.pressure_new, + ) # advance to next step by shifting the data back one step in time - self.pressure_old, self.pressure, self.pressure_new = self.pressure, self.pressure_new, self.pressure_old + self.pressure_old, self.pressure, self.pressure_new = ( + self.pressure, + self.pressure_new, + self.pressure_old, + ) # draw my part of the image, plus a nice 1 pixel border along my # right/bottom boundary @@ -176,31 +202,42 @@ def work(self, mainProxy): @numba.jit(nopython=True, cache=False) -def check_and_compute(h, w, left, right, up, down, - pressure, pressure_old, pressure_new): +def check_and_compute( + h, w, left, right, up, down, pressure, pressure_old, pressure_new +): for i in range(h): for j in range(w): # current time's pressures for neighboring array locations - if j == 0: L = left[i] - else: L = pressure[i,j-1] - - if j == w-1: R = right[i] - else: R = pressure[i,j+1] - - if i == 0: U = up[j] - else: U = pressure[i-1,j] - - if i == h-1: D = down[j] - else: D = pressure[i+1,j] + if j == 0: + L = left[i] + else: + L = pressure[i, j - 1] + + if j == w - 1: + R = right[i] + else: + R = pressure[i, j + 1] + + if i == 0: + U = up[j] + else: + U = pressure[i - 1, j] + + if i == h - 1: + D = down[j] + else: + D = pressure[i + 1, j] # current time's pressure for this array location - curr = pressure[i,j] + curr = pressure[i, j] # previous time's pressure for this array location - old = pressure_old[i,j] + old = pressure_old[i, j] # compute the future time's pressure for this array location - pressure_new[i,j] = 0.4*0.4*(L+R+U+D - 4.0*curr)-old+2.0*curr + pressure_new[i, j] = ( + 0.4 * 0.4 * (L + R + U + D - 4.0 * curr) - old + 2.0 * curr + ) @numba.jit(nopython=True, cache=False) @@ -209,23 +246,25 @@ def fill_subimage(data, w, h, pressure): # Each RGB component is a uint8 that can have 256 possible values for i in range(h): for j in range(w): - p = int(pressure[i,j]) - if p > 255: p = 255 # Keep values in valid range - if p < -255: p = -255 # Keep values in valid range - pos = 3*(i*w+j) + p = int(pressure[i, j]) + if p > 255: + p = 255 # Keep values in valid range + if p < -255: + p = -255 # Keep values in valid range + pos = 3 * (i * w + j) if p > 0: # Positive values are red - data[pos:pos+3] = (255, 255-p, 255-p) + data[pos : pos + 3] = (255, 255 - p, 255 - p) else: # Negative values are blue - data[pos:pos+3] = (255+p, 255+p, 255) + data[pos : pos + 3] = (255 + p, 255 + p, 255) # Draw a green border on right and bottom of this chare array's pixel buffer. # This will overwrite some pressure values at these pixels. for i in range(h): - pos = 3*(i*w+w-1) - data[pos:pos+3] = (0, 255, 0) + pos = 3 * (i * w + w - 1) + data[pos : pos + 3] = (0, 255, 0) for i in range(w): - pos = 3*((h-1)*w+i) - data[pos:pos+3] = (0, 255, 0) + pos = 3 * ((h - 1) * w + i) + data[pos : pos + 3] = (0, 255, 0) @numba.jit(nopython=True, cache=False) @@ -234,20 +273,24 @@ def init_pressure(numInitialPerturbations, W, H, w, h, elemIdx, pressure, pressu random.seed(6) for s in range(numInitialPerturbations): # determine where to place a circle within the interior of the 2D domain - radius = 20 + random.randint(0,32767) % 30 - xcenter = radius + random.randint(0,32767) % (W - 2*radius) - ycenter = radius + random.randint(0,32767) % (H - 2*radius) + radius = 20 + random.randint(0, 32767) % 30 + xcenter = radius + random.randint(0, 32767) % (W - 2 * radius) + ycenter = radius + random.randint(0, 32767) % (H - 2 * radius) # draw the circle for i in range(h): for j in range(w): # the coordinate in the global data array (not just in this chare's portion) - globalx = elemIdx[0]*w + j - globaly = elemIdx[1]*h + i - distanceToCenter = math.sqrt((globalx-xcenter)**2 + (globaly-ycenter)**2) + globalx = elemIdx[0] * w + j + globaly = elemIdx[1] * h + i + distanceToCenter = math.sqrt( + (globalx - xcenter) ** 2 + (globaly - ycenter) ** 2 + ) if distanceToCenter < radius: - rscaled = (distanceToCenter/radius)*3.0*3.14159/2.0 # ranges from 0 to 3pi/2 + rscaled = ( + (distanceToCenter / radius) * 3.0 * 3.14159 / 2.0 + ) # ranges from 0 to 3pi/2 t = 700.0 * math.cos(rscaled) # range won't exceed -700 to 700 - pressure[i,j] = pressure_old[i,j] = t + pressure[i, j] = pressure_old[i, j] = t charm.start(Main) diff --git a/setup.py b/setup.py index f0cda728..38dd6d50 100644 --- a/setup.py +++ b/setup.py @@ -1,6 +1,5 @@ import sys import os -import re import shutil import platform import subprocess @@ -14,6 +13,7 @@ import distutils import Cython.Compiler.Options + Cython.Compiler.Options.annotate = True build_mpi = False @@ -22,30 +22,29 @@ def get_build_machine(): machine = platform.machine() - if machine == 'arm64' or machine == 'aarch64': - return 'arm8' + if machine == "arm64" or machine == "aarch64": + return "arm8" return machine + def get_archflag_machine(): machine = platform.machine() - if machine == 'arm64' or machine == 'aarch64': - return 'arm64' + if machine == "arm64" or machine == "aarch64": + return "arm64" return machine + def get_build_os(): os = platform.system() return os.lower() def get_build_network_type(build_mpi): - return 'netlrts' if not build_mpi else 'mpi' + return "netlrts" if not build_mpi else "mpi" def get_build_triple(build_mpi): - return (get_build_machine(), - get_build_os(), - get_build_network_type(build_mpi) - ) + return (get_build_machine(), get_build_os(), get_build_network_type(build_mpi)) machine = get_build_machine() @@ -53,44 +52,49 @@ def get_build_triple(build_mpi): libcharm_filename2 = None -if system == 'windows' or system.startswith('cygwin'): - libcharm_filename = 'charm.dll' - libcharm_filename2 = 'charm.lib' - charmrun_filename = 'charmrun.exe' -elif system == 'darwin': - os.environ['ARCHFLAGS'] = f'-arch {get_archflag_machine()}' - libcharm_filename = 'libcharm.dylib' - charmrun_filename = 'charmrun' - if 'CPPFLAGS' in os.environ: - os.environ['CPPFLAGS'] += ' -Wno-error=implicit-function-declaration' # needed because some functions used by charm4py are not exported by charm. +if system == "windows" or system.startswith("cygwin"): + libcharm_filename = "charm.dll" + libcharm_filename2 = "charm.lib" + charmrun_filename = "charmrun.exe" +elif system == "darwin": + os.environ["ARCHFLAGS"] = f"-arch {get_archflag_machine()}" + libcharm_filename = "libcharm.dylib" + charmrun_filename = "charmrun" + if "CPPFLAGS" in os.environ: + os.environ[ + "CPPFLAGS" + ] += " -Wno-error=implicit-function-declaration" # needed because some functions used by charm4py are not exported by charm. else: - os.environ['CPPFLAGS'] = '-Wno-error=implicit-function-declaration ' + os.environ["CPPFLAGS"] = "-Wno-error=implicit-function-declaration " else: # Linux - libcharm_filename = 'libcharm.so' - charmrun_filename = 'charmrun' + libcharm_filename = "libcharm.so" + charmrun_filename = "charmrun" try: - charm4py_version = subprocess.check_output(['git', 'describe']).rstrip().decode().split('-')[0] - if charm4py_version.startswith('v'): + charm4py_version = ( + subprocess.check_output(["git", "describe"]).rstrip().decode().split("-")[0] + ) + if charm4py_version.startswith("v"): charm4py_version = charm4py_version[1:] - with open(os.path.join('charm4py', '_version.py'), 'w') as f: + with open(os.path.join("charm4py", "_version.py"), "w") as f: f.write("version='" + charm4py_version + "'\n") except: try: - os.environ['PYTHONPATH'] = os.getcwd() - os.environ['CHARM_NOLOAD'] = '1' + os.environ["PYTHONPATH"] = os.getcwd() + os.environ["CHARM_NOLOAD"] = "1" from charm4py import _version + charm4py_version = _version.version except: - raise DistutilsSetupError('Could not determine Charm4py version') + raise DistutilsSetupError("Could not determine Charm4py version") def charm_built(charm_src_dir): - library_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename) + library_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename) if not os.path.exists(library_path): return False - charmrun_path = os.path.join(charm_src_dir, 'charm', 'bin', charmrun_filename) + charmrun_path = os.path.join(charm_src_dir, "charm", "bin", charmrun_filename) if not os.path.exists(charmrun_path): return False return True @@ -98,111 +102,151 @@ def charm_built(charm_src_dir): def check_libcharm_version(charm_src_dir): import ctypes - library_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename) + + library_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename) lib = ctypes.CDLL(library_path) - with open(os.path.join(os.getcwd(), 'charm4py', 'libcharm_version'), 'r') as f: - req_version = tuple(int(n) for n in f.read().split('.')) + with open(os.path.join(os.getcwd(), "charm4py", "libcharm_version"), "r") as f: + req_version = tuple(int(n) for n in f.read().split(".")) commit_id_str = ctypes.c_char_p.in_dll(lib, "CmiCommitID").value.decode() - version = [int(n) for n in commit_id_str.split('-')[0][1:].split('.')] + version = [int(n) for n in commit_id_str.split("-")[0][1:].split(".")] try: - version = tuple(version + [int(commit_id_str.split('-')[1])]) + version = tuple(version + [int(commit_id_str.split("-")[1])]) except: version = tuple(version + [0]) if version < req_version: - req_str = '.'.join([str(n) for n in req_version]) - cur_str = '.'.join([str(n) for n in version]) - raise DistutilsSetupError('Charm++ version >= ' + req_str + ' required. ' - 'Existing version is ' + cur_str) + req_str = ".".join([str(n) for n in req_version]) + cur_str = ".".join([str(n) for n in version]) + raise DistutilsSetupError( + "Charm++ version >= " + req_str + " required. " + "Existing version is " + cur_str + ) def build_libcharm(charm_src_dir, build_dir): lib_output_dirs = [] charmrun_output_dirs = [] - lib_output_dirs.append(os.path.join(build_dir, 'charm4py', '.libs')) - lib_output_dirs.append(os.path.join(os.getcwd(), 'charm4py', '.libs')) - charmrun_output_dirs.append(os.path.join(build_dir, 'charmrun')) - charmrun_output_dirs.append(os.path.join(os.getcwd(), 'charmrun')) - for output_dir in (lib_output_dirs + charmrun_output_dirs): + lib_output_dirs.append(os.path.join(build_dir, "charm4py", ".libs")) + lib_output_dirs.append(os.path.join(os.getcwd(), "charm4py", ".libs")) + charmrun_output_dirs.append(os.path.join(build_dir, "charmrun")) + charmrun_output_dirs.append(os.path.join(os.getcwd(), "charmrun")) + for output_dir in lib_output_dirs + charmrun_output_dirs: distutils.dir_util.mkpath(output_dir) if not os.path.exists(charm_src_dir) or not os.path.isdir(charm_src_dir): - raise DistutilsSetupError('charm sources dir ' + charm_src_dir + ' not found') + raise DistutilsSetupError("charm sources dir " + charm_src_dir + " not found") if not charm_built(charm_src_dir): - if system == 'windows' or system.startswith('cygwin'): - raise DistutilsSetupError('Building charm++ from setup.py not currently supported on Windows.' - ' Please download a Charm4py binary wheel (64-bit Python required)') + if system == "windows" or system.startswith("cygwin"): + raise DistutilsSetupError( + "Building charm++ from setup.py not currently supported on Windows." + " Please download a Charm4py binary wheel (64-bit Python required)" + ) - if os.path.exists(os.path.join(charm_src_dir, 'charm.tar.gz')): - log.info('Uncompressing charm.tar.gz...') - cmd = ['tar', 'xf', 'charm.tar.gz'] + if os.path.exists(os.path.join(charm_src_dir, "charm.tar.gz")): + log.info("Uncompressing charm.tar.gz...") + cmd = ["tar", "xf", "charm.tar.gz"] p = subprocess.Popen(cmd, cwd=charm_src_dir, shell=False) rc = p.wait() if rc != 0: - raise DistutilsSetupError('An error occured while building charm library') + raise DistutilsSetupError( + "An error occured while building charm library" + ) # divide by 2 to not hog the system. On systems with hyperthreading, this will likely # result in using same # cores as physical cores (therefore not all the logical cores) import multiprocessing - build_num_cores = max(int(os.environ.get('CHARM_BUILD_PROCESSES', multiprocessing.cpu_count() // 2)), 1) - extra_build_opts = os.environ.get('CHARM_EXTRA_BUILD_OPTS', '') + + build_num_cores = max( + int( + os.environ.get( + "CHARM_BUILD_PROCESSES", multiprocessing.cpu_count() // 2 + ) + ), + 1, + ) + extra_build_opts = os.environ.get("CHARM_EXTRA_BUILD_OPTS", "") if enable_tracing: - extra_build_opts += " --enable-tracing " - + extra_build_opts += " --enable-tracing " + target_machine, os_target, target_layer = get_build_triple(build_mpi) - build_triple = f'{target_layer}-{os_target}-{target_machine}' - cmd = f'./build charm4py {build_triple} -j{build_num_cores} --with-production {extra_build_opts}' + build_triple = f"{target_layer}-{os_target}-{target_machine}" + cmd = f"./build charm4py {build_triple} -j{build_num_cores} --with-production {extra_build_opts}" print(cmd) - p = subprocess.Popen(cmd.rstrip().split(' '), - cwd=os.path.join(charm_src_dir, 'charm'), - shell=False) + p = subprocess.Popen( + cmd.rstrip().split(" "), + cwd=os.path.join(charm_src_dir, "charm"), + shell=False, + ) rc = p.wait() if rc != 0: - raise DistutilsSetupError('An error occured while building charm library') + raise DistutilsSetupError("An error occured while building charm library") - if system == 'darwin': - old_file_path = os.path.join(charm_src_dir, 'charm', 'lib', 'libcharm.dylib') - new_file_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename) + if system == "darwin": + old_file_path = os.path.join( + charm_src_dir, "charm", "lib", "libcharm.dylib" + ) + new_file_path = os.path.join( + charm_src_dir, "charm", "lib", libcharm_filename + ) shutil.move(old_file_path, new_file_path) - cmd = ['install_name_tool', '-id', '@rpath/../.libs/' + libcharm_filename, new_file_path] + cmd = [ + "install_name_tool", + "-id", + "@rpath/../.libs/" + libcharm_filename, + new_file_path, + ] p = subprocess.Popen(cmd, shell=False) rc = p.wait() if rc != 0: - raise DistutilsSetupError('install_name_tool error') + raise DistutilsSetupError("install_name_tool error") # verify that the version of charm++ that was built is same or greater than the # one required by charm4py check_libcharm_version(charm_src_dir) # ---- copy libcharm ---- - lib_src_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename) + lib_src_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename) for output_dir in lib_output_dirs: - log.info('copying ' + os.path.relpath(lib_src_path) + ' to ' + os.path.relpath(output_dir)) + log.info( + "copying " + + os.path.relpath(lib_src_path) + + " to " + + os.path.relpath(output_dir) + ) shutil.copy(lib_src_path, output_dir) if libcharm_filename2 is not None: - lib_src_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename2) + lib_src_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename2) for output_dir in lib_output_dirs: - log.info('copying ' + os.path.relpath(lib_src_path) + ' to ' + os.path.relpath(output_dir)) + log.info( + "copying " + + os.path.relpath(lib_src_path) + + " to " + + os.path.relpath(output_dir) + ) shutil.copy(lib_src_path, output_dir) - # ---- copy charmrun ---- - charmrun_src_path = os.path.join(charm_src_dir, 'charm', 'bin', charmrun_filename) + charmrun_src_path = os.path.join(charm_src_dir, "charm", "bin", charmrun_filename) for output_dir in charmrun_output_dirs: - log.info('copying ' + os.path.relpath(charmrun_src_path) + ' to ' + os.path.relpath(output_dir)) + log.info( + "copying " + + os.path.relpath(charmrun_src_path) + + " to " + + os.path.relpath(output_dir) + ) shutil.copy(charmrun_src_path, output_dir) class custom_install(install, object): user_options = install.user_options + [ - ('mpi', None, 'Build libcharm with MPI'), - ('enable-tracing', None, 'Build libcharm with tracing enabled') + ("mpi", None, "Build libcharm with MPI"), + ("enable-tracing", None, "Build libcharm with tracing enabled"), ] def initialize_options(self): @@ -227,8 +271,8 @@ def run(self): class custom_build_py(build_py, object): user_options = build_py.user_options + [ - ('mpi', None, 'Build libcharm with MPI'), - ('enable-tracing', None, 'Build libcharm with tracing enabled') + ("mpi", None, "Build libcharm with MPI"), + ("enable-tracing", None, "Build libcharm with tracing enabled"), ] def initialize_options(self): @@ -247,16 +291,19 @@ def finalize_options(self): def run(self): if not self.dry_run: - build_libcharm(os.path.join(os.getcwd(), 'charm_src'), self.build_lib) - shutil.copy(os.path.join(os.getcwd(), 'LICENSE'), os.path.join(self.build_lib, 'charm4py')) + build_libcharm(os.path.join(os.getcwd(), "charm_src"), self.build_lib) + shutil.copy( + os.path.join(os.getcwd(), "LICENSE"), + os.path.join(self.build_lib, "charm4py"), + ) super(custom_build_py, self).run() class custom_build_ext(build_ext, object): user_options = build_ext.user_options + [ - ('mpi', None, 'Build libcharm with MPI'), - ('enable-tracing', None, 'Build libcharm with tracing enabled') + ("mpi", None, "Build libcharm with MPI"), + ("enable-tracing", None, "Build libcharm with tracing enabled"), ] def initialize_options(self): @@ -276,18 +323,18 @@ def finalize_options(self): def run(self): if not self.dry_run: - build_libcharm(os.path.join(os.getcwd(), 'charm_src'), self.build_lib) + build_libcharm(os.path.join(os.getcwd(), "charm_src"), self.build_lib) super(custom_build_ext, self).run() + class _renameInstalled(_install_lib): def __init__(self, *args, **kwargs): _install_lib.__init__(self, *args, **kwargs) - def install(self): log.info("Renaming libraries") outfiles = _install_lib.install(self) - ''' + """ for file in outfiles: if "c_object_store" in file and system == "darwin": direc = os.path.dirname(file) @@ -307,66 +354,78 @@ def install(self): install_name_command += "/charmlib_cython.*.so" log.info(install_name_command) os.system(install_name_command) - ''' + """ return outfiles - extensions = [] py_impl = platform.python_implementation() - log.info("Check sys version info") if sys.version_info[0] >= 3: log.info("Defining cython args") # compile C-extension module (from cython) from Cython.Build import cythonize + my_include_dirs = [] haveNumpy = False try: import numpy + haveNumpy = True my_include_dirs.append(numpy.get_include()) except: - log.warn('WARNING: Building charmlib C-extension module without numpy support (numpy not found or import failed)') + log.warn( + "WARNING: Building charmlib C-extension module without numpy support (numpy not found or import failed)" + ) extra_link_args = [] - if os.name != 'nt': - if system == 'darwin': - extra_link_args=["-Wl,-rpath,@loader_path/../.libs"] + if os.name != "nt": + if system == "darwin": + extra_link_args = ["-Wl,-rpath,@loader_path/../.libs"] else: - extra_link_args=["-Wl,-rpath,$ORIGIN/../.libs"] + extra_link_args = ["-Wl,-rpath,$ORIGIN/../.libs"] cobject_extra_args = [] log.info("Extra object args for object store") - if os.name != 'nt': - if system == 'darwin': - cobject_extra_args=["-Wl,-rpath,@loader_path/.libs"] + if os.name != "nt": + if system == "darwin": + cobject_extra_args = ["-Wl,-rpath,@loader_path/.libs"] else: - cobject_extra_args=["-Wl,-rpath,$ORIGIN/.libs"] - - cudaBuild = os.environ.get('CHARM_EXTRA_BUILD_OPTS', '').find('cuda') != -1 - - extensions.extend(cythonize(setuptools.Extension('charm4py.charmlib.charmlib_cython', - sources=['charm4py/charmlib/charmlib_cython.pyx'], - include_dirs=['charm_src/charm/include'] + my_include_dirs, - library_dirs=[os.path.join(os.getcwd(), 'charm4py', '.libs')], - libraries=["charm"], - extra_compile_args=[], - extra_link_args=extra_link_args, - ), compile_time_env={'HAVE_NUMPY': haveNumpy, - 'HAVE_CUDA_BUILD': cudaBuild})) - - extensions.extend(cythonize(setuptools.Extension('charm4py.c_object_store', - sources=['charm4py/c_object_store.pyx'], - include_dirs=['charm_src/charm/include'] + my_include_dirs, - library_dirs=[os.path.join(os.getcwd(), 'charm4py', '.libs')], - libraries=["charm"], - extra_compile_args=[], - extra_link_args=cobject_extra_args, - ), compile_time_env={'HAVE_NUMPY': haveNumpy, - 'HAVE_CUDA_BUILD': cudaBuild})) + cobject_extra_args = ["-Wl,-rpath,$ORIGIN/.libs"] + + cudaBuild = os.environ.get("CHARM_EXTRA_BUILD_OPTS", "").find("cuda") != -1 + + extensions.extend( + cythonize( + setuptools.Extension( + "charm4py.charmlib.charmlib_cython", + sources=["charm4py/charmlib/charmlib_cython.pyx"], + include_dirs=["charm_src/charm/include"] + my_include_dirs, + library_dirs=[os.path.join(os.getcwd(), "charm4py", ".libs")], + libraries=["charm"], + extra_compile_args=[], + extra_link_args=extra_link_args, + ), + compile_time_env={"HAVE_NUMPY": haveNumpy, "HAVE_CUDA_BUILD": cudaBuild}, + ) + ) + + extensions.extend( + cythonize( + setuptools.Extension( + "charm4py.c_object_store", + sources=["charm4py/c_object_store.pyx"], + include_dirs=["charm_src/charm/include"] + my_include_dirs, + library_dirs=[os.path.join(os.getcwd(), "charm4py", ".libs")], + libraries=["charm"], + extra_compile_args=[], + extra_link_args=cobject_extra_args, + ), + compile_time_env={"HAVE_NUMPY": haveNumpy, "HAVE_CUDA_BUILD": cudaBuild}, + ) + ) additional_setup_keywords = {} @@ -374,12 +433,14 @@ def install(self): version=charm4py_version, packages=setuptools.find_packages(), package_data={ - 'charm4py': ['libcharm_version'], + "charm4py": ["libcharm_version"], }, ext_modules=extensions, - cmdclass = {'build_py': custom_build_py, - 'build_ext': custom_build_ext, - 'install': custom_install, - 'install_lib': _renameInstalled,}, - **additional_setup_keywords + cmdclass={ + "build_py": custom_build_py, + "build_ext": custom_build_ext, + "install": custom_install, + "install_lib": _renameInstalled, + }, + **additional_setup_keywords, ) diff --git a/tests/array_maps/test1.py b/tests/array_maps/test1.py index 8a780adf..a5e782c5 100644 --- a/tests/array_maps/test1.py +++ b/tests/array_maps/test1.py @@ -15,7 +15,7 @@ def procNum(self, index): class MyChare(Chare): def __init__(self, last): - assert charm.myPe() == index_to_pe(self.thisIndex), 'ArrayMap failed' + assert charm.myPe() == index_to_pe(self.thisIndex), "ArrayMap failed" if last: self.contribute(None, None, charm.thisProxy[0].exit) diff --git a/tests/benchmark/pingpong.py b/tests/benchmark/pingpong.py index cb6060c2..3f1dc0c9 100644 --- a/tests/benchmark/pingpong.py +++ b/tests/benchmark/pingpong.py @@ -1,6 +1,7 @@ from charm4py import charm, Chare, Array, coro, Future from time import time -#import numpy as np + +# import numpy as np PAYLOAD = 100 # number of bytes NITER = 10000 @@ -18,7 +19,7 @@ def __init__(self): def start(self, done_future, threaded=False): self.done_future = done_future self.iter = 0 - #data = np.zeros(PAYLOAD, dtype='int8') + # data = np.zeros(PAYLOAD, dtype='int8') data = 3 self.startTime = time() if threaded: @@ -48,7 +49,7 @@ def recv_th(self, data): def main(args): threaded = False - if len(args) > 1 and args[1] == '-t': + if len(args) > 1 and args[1] == "-t": threaded = True pings = Array(Ping, 2) charm.awaitCreation(pings) diff --git a/tests/callbacks/callbacks.py b/tests/callbacks/callbacks.py index 9bc81190..5784c042 100644 --- a/tests/callbacks/callbacks.py +++ b/tests/callbacks/callbacks.py @@ -24,12 +24,12 @@ def __init__(self, main): self.main = main def getResult(self, result): - #print('[' + str(charm.myPe()) + '] got result:', result) + # print('[' + str(charm.myPe()) + '] got result:', result) assert result == (charm.numPes() * (charm.numPes() - 1)) // 2 self.main.workDone(self.thisIndex[0]) def getResultBroadcast(self, result): - #print('[' + str(charm.myPe()) + '] got result:', result) + # print('[' + str(charm.myPe()) + '] got result:', result) assert result == (charm.numPes() * (charm.numPes() - 1)) // 2 self.contribute(1, Reducer.sum, self.main.workDone) @@ -45,15 +45,15 @@ def __init__(self, args): controllers = Array(Controller, charm.numPes()) receivers = Array(CallbackReceiver, charm.numPes(), args=[self.thisProxy]) workers.work(receivers[1].getResult) - self.wait('self.done == 1') + self.wait("self.done == 1") self.done = -1 controllers[1].start(workers, receivers[2].getResult) - self.wait('self.done == 2') + self.wait("self.done == 2") self.done = -1 controllers[2].start(workers, receivers.getResultBroadcast) - self.wait('self.done == ' + str(charm.numPes())) + self.wait("self.done == " + str(charm.numPes())) self.done = -1 f = Future() diff --git a/tests/callbacks/schedule_cb.py b/tests/callbacks/schedule_cb.py index b5f1fab4..74ebd824 100644 --- a/tests/callbacks/schedule_cb.py +++ b/tests/callbacks/schedule_cb.py @@ -14,15 +14,16 @@ def start(self): charm.scheduleCallableAfter(self.thisProxy[self.thisIndex].next, 1, [-1]) def next(self, from_elem): - print(self.thisIndex, 'time=', time() - self.t0, 'from=', from_elem) + print(self.thisIndex, "time=", time() - self.t0, "from=", from_elem) assert from_elem == self.thisIndex[0] - 1 assert time() - self.t0 > self.thisIndex[0] + 0.9 if self.thisIndex[0] == NUM_CHARES - 1: - print('DONE') + print("DONE") exit() else: - charm.scheduleCallableAfter(self.thisProxy[self.thisIndex[0] + 1].next, - 1, [self.thisIndex[0]]) + charm.scheduleCallableAfter( + self.thisProxy[self.thisIndex[0] + 1].next, 1, [self.thisIndex[0]] + ) def main(args): diff --git a/tests/channels/test1.py b/tests/channels/test1.py index 72fe6878..258c0ef8 100644 --- a/tests/channels/test1.py +++ b/tests/channels/test1.py @@ -10,16 +10,16 @@ def __init__(self, id): def work(self, mainProxy, other, done_fut): me = self.thisProxy[self.thisIndex] ch = Channel(self, remote=mainProxy) - ch.send('hello from ' + str(self.id)) + ch.send("hello from " + str(self.id)) ch = Channel(self, remote=me) - ch.send('self ping', me) - assert ch.recv() == ('self ping', me) + ch.send("self ping", me) + assert ch.recv() == ("self ping", me) ch = Channel(self, remote=other) - ch.send(('hi from ' + str(self.id), me)) + ch.send(("hi from " + str(self.id), me)) data = ch.recv() - assert data[0] == 'hi from ' + str((self.id + 1) % 2) + assert data[0] == "hi from " + str((self.id + 1) % 2) assert data[1] == other done_fut() @@ -35,8 +35,8 @@ def __init__(self, args): chare1.work(self.thisProxy, chare0, done_fut) ch0 = Channel(self, remote=chare0) ch1 = Channel(self, remote=chare1) - assert ch0.recv() == 'hello from 0' - assert ch1.recv() == 'hello from 1' + assert ch0.recv() == "hello from 0" + assert ch1.recv() == "hello from 1" done_fut.get() exit() diff --git a/tests/channels/test2.py b/tests/channels/test2.py index d7f769ef..0c317c73 100644 --- a/tests/channels/test2.py +++ b/tests/channels/test2.py @@ -60,11 +60,11 @@ def main(args): for idx in range(P): chares.append(collection[idx]) - for collection, numelems in ((a1, P*8), (a2, P*10), (a3, P*4), (a4, P)): + for collection, numelems in ((a1, P * 8), (a2, P * 10), (a3, P * 4), (a4, P)): for idx in range(numelems): chares.append(collection[idx]) - print('There are', len(chares), 'chares') + print("There are", len(chares), "chares") # establish random channels between chares global gchannels @@ -79,19 +79,24 @@ def main(args): num_self_channels += 1 gchannels[level][a].append(b) gchannels[level][b].append(a) - charm.thisProxy.updateGlobals({'gchannels': gchannels}, awaitable=True).get() + charm.thisProxy.updateGlobals({"gchannels": gchannels}, awaitable=True).get() done_fut = Future(8 * NUM_LEVELS) # wait for 8 collections to finish 3 levels for collection in (g1, g2, g3, g4, a1, a2, a3, a4): collection.setup(awaitable=True).get() - print(NUM_CHANNELS * NUM_LEVELS, 'channels set up,', num_self_channels, 'self channels') + print( + NUM_CHANNELS * NUM_LEVELS, + "channels set up,", + num_self_channels, + "self channels", + ) for collection in (g1, g2, g3, g4, a1, a2, a3, a4): for lvl in range(NUM_LEVELS): collection.work(lvl, done_fut) msgs = sum(done_fut.get()) assert msgs == sum(LEVELS_NUM_ITER[:NUM_LEVELS]) * NUM_CHANNELS * 2 - print('total msgs received by chares=', msgs) + print("total msgs received by chares=", msgs) exit() diff --git a/tests/channels/test_numpy.py b/tests/channels/test_numpy.py index 26c0a783..e583e68f 100644 --- a/tests/channels/test_numpy.py +++ b/tests/channels/test_numpy.py @@ -12,9 +12,9 @@ def work(self, mainProxy, done_fut): ch = Channel(self, remote=mainProxy) for i in range(NUM_ITER): array1, array2, array3 = ch.recv() - np.testing.assert_array_equal(array1, np.arange(100, dtype='int64') + i) - np.testing.assert_array_equal(array2, np.arange(50, dtype='int64') + i) - np.testing.assert_array_equal(array3, np.arange(70, dtype='int64') + i) + np.testing.assert_array_equal(array1, np.arange(100, dtype="int64") + i) + np.testing.assert_array_equal(array2, np.arange(50, dtype="int64") + i) + np.testing.assert_array_equal(array3, np.arange(70, dtype="int64") + i) done_fut() @@ -26,9 +26,9 @@ def __init__(self, args): done_fut = Future() chare.work(self.thisProxy, done_fut) for i in range(NUM_ITER): - array1 = np.arange(100, dtype='int64') + i - array2 = np.arange(50, dtype='int64') + i - array3 = np.arange(70, dtype='int64') + i + array1 = np.arange(100, dtype="int64") + i + array2 = np.arange(50, dtype="int64") + i + array3 = np.arange(70, dtype="int64") + i ch.send(array1, array2, array3) done_fut.get() exit() diff --git a/tests/charm_remote.py b/tests/charm_remote.py index 6105409d..f0f280f2 100644 --- a/tests/charm_remote.py +++ b/tests/charm_remote.py @@ -15,8 +15,10 @@ def start(self): pe = charm.myPe() - 1 if pe == -1: pe = 0 - charm.thisProxy[pe].exec('global MY_GLOBAL; MY_GLOBAL = 7262', __name__, awaitable=True).get() - assert charm.thisProxy[pe].eval('MY_GLOBAL', __name__, ret=True).get() == 7262 + charm.thisProxy[pe].exec( + "global MY_GLOBAL; MY_GLOBAL = 7262", __name__, awaitable=True + ).get() + assert charm.thisProxy[pe].eval("MY_GLOBAL", __name__, ret=True).get() == 7262 Group(Test) diff --git a/tests/collections/proxies_same_name.py b/tests/collections/proxies_same_name.py index b10ccbf0..b9faf03b 100644 --- a/tests/collections/proxies_same_name.py +++ b/tests/collections/proxies_same_name.py @@ -26,9 +26,9 @@ def main(args): tester1 = Chare(Test, onPE=2) tester2 = Chare(proxies_same_name_aux.Test, onPE=1) charm.awaitCreation(g2, g1, tester2, tester1) - tester1.test(g2, 'check2', awaitable=True).get() - tester2.test(g1, 'check1', awaitable=True).get() + tester1.test(g2, "check2", awaitable=True).get() + tester2.test(g1, "check1", awaitable=True).get() exit() -charm.start(main, modules=['proxies_same_name_aux']) +charm.start(main, modules=["proxies_same_name_aux"]) diff --git a/tests/collections/proxy_eq.py b/tests/collections/proxy_eq.py index d5129da9..b0a04556 100644 --- a/tests/collections/proxy_eq.py +++ b/tests/collections/proxy_eq.py @@ -26,11 +26,15 @@ def __init__(self, args): assert g1 == g1[2].getProxy(ret=True).get() assert g1[2] == g1[2].getProxy(elem=True, ret=True).get() assert g1[2].getProxy(ret=True).get() == g1[3].getProxy(ret=True).get() - assert g1[2].getProxy(True, ret=True).get() != g1[3].getProxy(True, ret=True).get() + assert ( + g1[2].getProxy(True, ret=True).get() != g1[3].getProxy(True, ret=True).get() + ) assert g1 != g2 assert g1[2].getProxy(ret=True).get() != g2[2].getProxy(ret=True).get() - assert g1[2].getProxy(True, ret=True).get() != g2[2].getProxy(True, ret=True).get() + assert ( + g1[2].getProxy(True, ret=True).get() != g2[2].getProxy(True, ret=True).get() + ) assert g1 != a assert a == a diff --git a/tests/collections/test.py b/tests/collections/test.py index 928b1f0f..98ffe068 100644 --- a/tests/collections/test.py +++ b/tests/collections/test.py @@ -13,7 +13,7 @@ def __init__(self): else: myIndex = self.thisIndex if charm.numPes() <= 20 or myIndex == 0: - print('Test', self.thisIndex, 'created') + print("Test", self.thisIndex, "created") def work(self, main): self.contribute(1, Reducer.sum, main.done) @@ -31,7 +31,7 @@ def done(self, result): self.countReductions += 1 if self.countReductions == 2: assert self.count == (charm.numPes() + charm.numPes() * CHARES_PER_PE) - print('Program done') + print("Program done") exit() diff --git a/tests/dcopy/test_dcopy.py b/tests/dcopy/test_dcopy.py index 83ebd9d0..144940c8 100644 --- a/tests/dcopy/test_dcopy.py +++ b/tests/dcopy/test_dcopy.py @@ -17,7 +17,9 @@ class Main(Chare): def __init__(self, args): - charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy}, '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"mainProxy": self.thisProxy}, "__main__", awaitable=True + ).get() self.testProxy = Array(Test, charm.numPes() * CHARES_PER_PE) def start(self): @@ -27,10 +29,10 @@ def start(self): def iterationComplete(self): if self.iterations % 10 == 0: - print('Iteration', self.iterations, 'complete') + print("Iteration", self.iterations, "complete") self.iterations += 1 if self.iterations == MAX_ITER: - print('Program done. Total time =', time.time() - self.startTime) + print("Program done. Total time =", time.time() - self.startTime) charm.printStats() exit() else: @@ -40,11 +42,11 @@ def iterationComplete(self): class Test(Chare): def __init__(self): - self.x = numpy.arange(DATA_LEN, dtype='float64') + self.x = numpy.arange(DATA_LEN, dtype="float64") y = self.x * (self.thisIndex[0] + 1) self.S1 = y.tobytes() - self.S2 = array.array('d', y) + self.S2 = array.array("d", y) self.S3 = y self.msgsRcvd = 0 @@ -72,10 +74,10 @@ def recvData(self, src, d1, d2, d3): desired = self.x * (src[0] + 1) - v1 = numpy.frombuffer(d1, dtype='float64') + v1 = numpy.frombuffer(d1, dtype="float64") assert_allclose(v1, desired, atol=1e-07) - v2 = numpy.array(d2, dtype='float64') + v2 = numpy.array(d2, dtype="float64") assert_allclose(v2, desired, atol=1e-07) assert_allclose(d3, desired, atol=1e-07) diff --git a/tests/entry_methods/bcast_globals.py b/tests/entry_methods/bcast_globals.py index ab5b1b3f..ceb43182 100644 --- a/tests/entry_methods/bcast_globals.py +++ b/tests/entry_methods/bcast_globals.py @@ -22,10 +22,12 @@ def main(args): done = charm.Future() main_globals = {} - main_globals['group1_proxy'] = g1 - main_globals['group2_proxy'] = g2 - main_globals['done_future'] = done - charm.thisProxy.updateGlobals(main_globals, module_name='__main__', awaitable=True).get() + main_globals["group1_proxy"] = g1 + main_globals["group2_proxy"] = g2 + main_globals["done_future"] = done + charm.thisProxy.updateGlobals( + main_globals, module_name="__main__", awaitable=True + ).get() group1_proxy.start() done.get() diff --git a/tests/entry_methods/entrymethod_args_kwargs.py b/tests/entry_methods/entrymethod_args_kwargs.py index 652cc56b..aaf046b5 100644 --- a/tests/entry_methods/entrymethod_args_kwargs.py +++ b/tests/entry_methods/entrymethod_args_kwargs.py @@ -71,7 +71,9 @@ def __init__(self, args): else: continue collection[single_chare].recv(10, 20, 3000, b=4000, awaitable=True).get() - collection[single_chare].recv(b=4000, a=3000, y=20, x=10, awaitable=True).get() + collection[single_chare].recv( + b=4000, a=3000, y=20, x=10, awaitable=True + ).get() exit() diff --git a/tests/exceptions/pool.py b/tests/exceptions/pool.py index ea0812ee..95cff659 100644 --- a/tests/exceptions/pool.py +++ b/tests/exceptions/pool.py @@ -12,11 +12,11 @@ def __init__(self): myfunc = None -myfunc_bad_source = ''' +myfunc_bad_source = """ def myfunc(x): raise MyException return x**2 -''' +""" myfunc_good_source = """ def myfunc(x): @@ -42,15 +42,26 @@ def main(args): try: if func is None: tasks = [(myfunc, i) for i in range(num_tasks)] - result = charm.pool.submit_async(tasks, multi_future=multi_future, chunksize=chunk_size) + result = charm.pool.submit_async( + tasks, + multi_future=multi_future, + chunksize=chunk_size, + ) else: tasks = range(num_tasks) - result = charm.pool.map_async(func, tasks, multi_future=multi_future, chunksize=chunk_size) + result = charm.pool.map_async( + func, + tasks, + multi_future=multi_future, + chunksize=chunk_size, + ) if multi_future: result = [f.get() for f in result] else: result = result.get() - assert trial == 1 and result == [x**2 for x in range(num_tasks)] + assert trial == 1 and result == [ + x**2 for x in range(num_tasks) + ] except MyException: assert trial == 0 exit() diff --git a/tests/exceptions/test.py b/tests/exceptions/test.py index 06e93f27..a5d74466 100644 --- a/tests/exceptions/test.py +++ b/tests/exceptions/test.py @@ -14,7 +14,7 @@ def bad(self): # this will raise NameError exception test[3] = 3 else: - return 'good' + return "good" def allbad(self): # this will raise NameError exception @@ -47,9 +47,9 @@ def main(args): for proxy, num_chares in ((g, npes), (a, npes * 8)): for i in range(2): if i == 0: - methods = {'allbad': 'allbad', 'good': 'good', 'bad': 'bad'} + methods = {"allbad": "allbad", "good": "good", "bad": "bad"} else: - methods = {'allbad': 'allbad_th', 'good': 'good_th', 'bad': 'bad_th'} + methods = {"allbad": "allbad_th", "good": "good_th", "bad": "bad_th"} # p2p if proxy == g: @@ -58,34 +58,34 @@ def main(args): bad_idx = (num_chares // 2) + 1 for _ in range(NUM_ITER): try: - getattr(proxy[bad_idx], methods['bad'])(ret=True).get() + getattr(proxy[bad_idx], methods["bad"])(ret=True).get() assert False except NameError: - retval = getattr(proxy[bad_idx], methods['good'])(ret=True).get() + retval = getattr(proxy[bad_idx], methods["good"])(ret=True).get() assert retval == bad_idx # bcast awaitable=True for _ in range(NUM_ITER): try: - getattr(proxy, methods['allbad'])(awaitable=True).get() + getattr(proxy, methods["allbad"])(awaitable=True).get() assert False except NameError: try: - getattr(proxy, methods['bad'])(awaitable=True).get() + getattr(proxy, methods["bad"])(awaitable=True).get() assert False except NameError: - retval = getattr(proxy, methods['good'])(awaitable=True).get() + retval = getattr(proxy, methods["good"])(awaitable=True).get() assert retval is None # bcast ret=True (returns list of results) for _ in range(NUM_ITER): - retvals = getattr(proxy, methods['bad'])(ret=True).get() + retvals = getattr(proxy, methods["bad"])(ret=True).get() num_errors = 0 for retval in retvals: if isinstance(retval, NameError): num_errors += 1 else: - assert retval == 'good' + assert retval == "good" assert num_errors == (num_chares // 2) exit() diff --git a/tests/futures/multi_futures.py b/tests/futures/multi_futures.py index 0d69c11c..75119dba 100644 --- a/tests/futures/multi_futures.py +++ b/tests/futures/multi_futures.py @@ -12,9 +12,9 @@ def main(args): testProxy.getData(f) data = f.get() - print('[Main] Received data: ' + str(data)) - assert sorted(data) == list(range(numChares)), 'Multi-futures failed!' - print('[Main] All done.') + print("[Main] Received data: " + str(data)) + assert sorted(data) == list(range(numChares)), "Multi-futures failed!" + print("[Main] All done.") exit() diff --git a/tests/futures/test_different_coroutines.py b/tests/futures/test_different_coroutines.py index 8a7d5e56..95a8e712 100644 --- a/tests/futures/test_different_coroutines.py +++ b/tests/futures/test_different_coroutines.py @@ -1,9 +1,11 @@ from charm4py import charm, Chare, Future, coro + # This test will "fail" if running it results in a timeout, as any # return code generated by this program will be 1 TEST_VALUE = 42 + class TestChare(Chare): @coro def __init__(self, done_future): @@ -39,4 +41,5 @@ def main(args): charm.exit() + charm.start(main) diff --git a/tests/futures/test_futures.py b/tests/futures/test_futures.py index c39ddaa2..1d495494 100644 --- a/tests/futures/test_futures.py +++ b/tests/futures/test_futures.py @@ -12,8 +12,15 @@ def main(args): max_f = Future() testProxy.getStats((sum_f, min_f, max_f)) - print('[Main] Sum: ' + str(sum_f.get()) + ', Min: ' + str(min_f.get()) + ', Max: ' + str(max_f.get())) - print('[Main] All done.') + print( + "[Main] Sum: " + + str(sum_f.get()) + + ", Min: " + + str(min_f.get()) + + ", Max: " + + str(max_f.get()) + ) + print("[Main] All done.") exit() @@ -28,7 +35,7 @@ def getStats(self, futures): self.contribute(self.thisIndex[0], Reducer.max, self.thisProxy[0].collectStats) def collectStats(self, stat_result): - assert self.thisIndex[0] == 0, 'Reduction target incorrect!' + assert self.thisIndex[0] == 0, "Reduction target incorrect!" if stat_result == 0: self.min_future.send(stat_result) elif stat_result == (charm.numPes() * CHARES_PER_PE) - 1: diff --git a/tests/migration/chare_migration.py b/tests/migration/chare_migration.py index 50ca1152..9398d1e8 100644 --- a/tests/migration/chare_migration.py +++ b/tests/migration/chare_migration.py @@ -1,4 +1,3 @@ - """ A program to test migration of chares. """ @@ -20,7 +19,7 @@ def migrated(self): chare has migrated. """ if self.thisIndex == (0,): - print(self.thisIndex, 'migrated to PE', charm.myPe()) + print(self.thisIndex, "migrated to PE", charm.myPe()) assert charm.myPe() == self.toPe self.contribute(None, None, charm.thisProxy[0].exit) @@ -29,14 +28,14 @@ def start(self): Invoke the starter code for test. """ if charm.myPe() == 0: - print(self.thisIndex, 'on PE', charm.myPe(), 'before migration') + print(self.thisIndex, "on PE", charm.myPe(), "before migration") self.toPe = (charm.myPe() + 1) % charm.numPes() self.thisProxy[self.thisIndex].migrate(self.toPe) def main(args): if charm.numPes() == 1: - charm.abort('Run program with more than 1 PE') + charm.abort("Run program with more than 1 PE") array_proxy = Array(Migrate, CHARES_PER_PE * charm.numPes()) array_proxy.start() diff --git a/tests/migration/test_migrate.py b/tests/migration/test_migrate.py index 02e04232..2fe756b4 100644 --- a/tests/migration/test_migrate.py +++ b/tests/migration/test_migrate.py @@ -10,19 +10,23 @@ class Test(Chare): def __init__(self, home_pes_future): - assert(not all_created) # makes sure constructor is only called for creation, not migration + assert ( + not all_created + ) # makes sure constructor is only called for creation, not migration self.iteration = 0 self.originalPe = charm.myPe() - self.data = numpy.arange(100, dtype='int64') * (self.originalPe + 1) + self.data = numpy.arange(100, dtype="int64") * (self.originalPe + 1) # notify controllers that array elements are created and pass home PE of every element self.contribute(charm.myPe(), Reducer.gather, home_pes_future) def start(self): if self.thisIndex == (0,) and self.iteration % 20 == 0: - print('Iteration ' + str(self.iteration)) + print("Iteration " + str(self.iteration)) self.check() - A = numpy.arange(1000, dtype='float64') - work = 1000 * int(round(math.log(charm.myPe() + 1) + 1)) # elements in higher PEs do more work + A = numpy.arange(1000, dtype="float64") + work = 1000 * int( + round(math.log(charm.myPe() + 1) + 1) + ) # elements in higher PEs do more work for i in range(work): A += 1.33 self.iteration += 1 @@ -36,17 +40,22 @@ def start(self): def resumeFromSync(self): self.start() - def check(self): # check that my attributes haven't changed as a result of migrating - assert(self.originalPe == arrayElemHomeMap[self.thisIndex[0]]) - v = numpy.arange(100, dtype='int64') * (self.originalPe + 1) + def check( + self, + ): # check that my attributes haven't changed as a result of migrating + assert self.originalPe == arrayElemHomeMap[self.thisIndex[0]] + v = numpy.arange(100, dtype="int64") * (self.originalPe + 1) numpy.testing.assert_allclose(self.data, v) def main(args): home_pes = Future() array = Array(Test, charm.numPes() * 4, args=[home_pes], useAtSync=True) - charm.thisProxy.updateGlobals({'all_created': True, 'arrayElemHomeMap': home_pes.get()}, - '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"all_created": True, "arrayElemHomeMap": home_pes.get()}, + "__main__", + awaitable=True, + ).get() array.start() diff --git a/tests/migration/test_nonmigratables.py b/tests/migration/test_nonmigratables.py index b87b9d74..307f2bf6 100644 --- a/tests/migration/test_nonmigratables.py +++ b/tests/migration/test_nonmigratables.py @@ -1,6 +1,7 @@ from charm4py import charm, Chare, Array import sys -sys.argv += ['+balancer', 'RandCentLB'] + +sys.argv += ["+balancer", "RandCentLB"] MAX_ITER = 100 diff --git a/tests/pool/pool.py b/tests/pool/pool.py index b7f26bc5..c145f332 100644 --- a/tests/pool/pool.py +++ b/tests/pool/pool.py @@ -36,7 +36,7 @@ def main(args): for _ in range(NUM_TRIALS): result = charm.pool.map(func, tasks, chunksize=chunksize) assert result == [func(x) for x in tasks] - print('Elapsed=', time() - t0) + print("Elapsed=", time() - t0) # test charm.pool.submit() funcs = [square, square_coro, add_val, add_val_coro] @@ -48,7 +48,7 @@ def main(args): for _ in range(NUM_TRIALS): result = charm.pool.submit(tasks, chunksize=chunksize) assert result == [f(x) for f, x in tasks] - print('Elapsed=', time() - t0) + print("Elapsed=", time() - t0) exit() diff --git a/tests/pool/pool_ncores.py b/tests/pool/pool_ncores.py index c9539b5c..fee72bff 100644 --- a/tests/pool/pool_ncores.py +++ b/tests/pool/pool_ncores.py @@ -2,7 +2,7 @@ def square(x): - return x ** 2 + return x**2 def add_val(x): diff --git a/tests/qd/qd.py b/tests/qd/qd.py index 4b5ecccc..9485780c 100644 --- a/tests/qd/qd.py +++ b/tests/qd/qd.py @@ -58,7 +58,7 @@ def __init__(self, args): assert charm.numPes() > 1 numChares = charm.numPes() * CHARES_PER_PE self.workers = Array(Worker, numChares, args=[numChares]) - print('WORK_TIME=', WORK_TIME) + print("WORK_TIME=", WORK_TIME) qdGroupReceivers = Group(QDReceiver, args=[self.thisProxy]) qdArrayReceivers = Array(QDReceiver, charm.numPes(), args=[self.thisProxy]) charm.awaitCreation(self.workers, qdGroupReceivers, qdArrayReceivers) @@ -82,9 +82,9 @@ def testQD(self, callback): charm.startQD(callback) if isinstance(callback, threads.Future): callback.get() - print('QD reached') + print("QD reached") else: - self.wait('self.qdReached') + self.wait("self.qdReached") else: charm.waitQD() assert time() - t0 > WORK_TIME @@ -92,7 +92,7 @@ def testQD(self, callback): check_fut.get() def recvQD(self): - print('QD reached') + print("QD reached") self.qdReached = True diff --git a/tests/reductions/allreduce.py b/tests/reductions/allreduce.py index d060fde5..941d7115 100644 --- a/tests/reductions/allreduce.py +++ b/tests/reductions/allreduce.py @@ -41,7 +41,7 @@ def main(args): for done in wait_alldone: done.get() - print('DONE') + print("DONE") exit() diff --git a/tests/reductions/array_reduction.py b/tests/reductions/array_reduction.py index bc2833a2..b4f5b2c1 100644 --- a/tests/reductions/array_reduction.py +++ b/tests/reductions/array_reduction.py @@ -6,7 +6,9 @@ # utility methods for assertions def assert_allclose(actual, desired, tol): assert len(actual) == len(desired) - assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len(actual) + assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len( + actual + ) def assert_almost_equal(actual, desired, tol): @@ -27,53 +29,67 @@ def __init__(self, args): nElements = 1 for x in ARRAY_SIZE: nElements *= x - print('Running reduction example on ' + str(charm.numPes()) + ' processors for ' + str(nElements) + ' elements, array dims=' + str(ARRAY_SIZE)) + print( + "Running reduction example on " + + str(charm.numPes()) + + " processors for " + + str(nElements) + + " elements, array dims=" + + str(ARRAY_SIZE) + ) arrProxy = Array(Test, ARRAY_SIZE) groupProxy = Group(TestGroup) - charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, 'arrProxy': arrProxy, - 'groupProxy': groupProxy}, '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + { + "mainProxy": self.thisProxy, + "arrProxy": arrProxy, + "groupProxy": groupProxy, + }, + "__main__", + awaitable=True, + ).get() arrProxy.doReduction() def done_int(self, reduction_result): - assert reduction_result == 420, 'Array-to-singleton sum_int reduction failed' - print('[Main] All sum_int contributions done. Test passed') + assert reduction_result == 420, "Array-to-singleton sum_int reduction failed" + print("[Main] All sum_int contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_nop(self): - print('[Main] All nop contributions received. Test passed') + print("[Main] All nop contributions received. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_float(self, reduction_result): assert_allclose(reduction_result, [101.0, 134.0, 45.0], 1e-03) - print('[Main] All sum_float contributions done. Test passed') + print("[Main] All sum_float contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_array_to_array(self): - print('[Main] All array-to-array contributions done. Test passed') + print("[Main] All array-to-array contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_array_to_array_bcast(self): - print('[Main] All array-to-array bcast contributions done. Test passed') + print("[Main] All array-to-array bcast contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_array_to_group(self): - print('[Main] All array-to-group contributions done. Test passed') + print("[Main] All array-to-group contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_array_to_group_bcast(self): - print('[Main] All array-to-group bcast contributions done. Test passed') + print("[Main] All array-to-group bcast contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() @@ -82,25 +98,37 @@ def done_array_to_group_bcast(self): class Test(Chare): def __init__(self): - print('Test ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe())) + print("Test " + str(self.thisIndex) + " created on PE " + str(charm.myPe())) def doReduction(self): - print('Test element ' + str(self.thisIndex) + ' on PE ' + str(charm.myPe()) + ' is starting its contributions.') + print( + "Test element " + + str(self.thisIndex) + + " on PE " + + str(charm.myPe()) + + " is starting its contributions." + ) # test contributing single int back to Main self.contribute(42, Reducer.sum, mainProxy.done_int) # test contributing list of floats back to main num = [10.1, 13.4] - self.contribute(num+[float(self.thisIndex[0])], Reducer.sum, mainProxy.done_float) + self.contribute( + num + [float(self.thisIndex[0])], Reducer.sum, mainProxy.done_float + ) # test nop reduction to main self.contribute(None, Reducer.nop, mainProxy.done_nop) # test contributing to Test[0] self.contribute(4.2, Reducer.sum, self.thisProxy[0].reductionTarget) # test contributing to Test (broadcast) - self.contribute(numpy.array([4.2, 8.4]), Reducer.sum, self.thisProxy.reductionTargetBcast) + self.contribute( + numpy.array([4.2, 8.4]), Reducer.sum, self.thisProxy.reductionTargetBcast + ) # test contributing to TestGroup[0] self.contribute(4, Reducer.sum, groupProxy[0].reduceFromArray) # test contributing to TestGroup (broadcast) - self.contribute(array.array('i', [0, 8, 3]), Reducer.sum, groupProxy.reduceFromArrayBcast) + self.contribute( + array.array("i", [0, 8, 3]), Reducer.sum, groupProxy.reduceFromArrayBcast + ) def reductionTarget(self, reduction_result): assert self.thisIndex[0] == 0 @@ -115,15 +143,21 @@ def reductionTargetBcast(self, reduction_result): class TestGroup(Chare): def __init__(self): - print('TestGroup ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe())) + print( + "TestGroup " + str(self.thisIndex) + " created on PE " + str(charm.myPe()) + ) def reduceFromArray(self, reduction_result): assert self.thisIndex == 0 - assert reduction_result == 40, 'Array-to-group sum_int reduction failed.' + assert reduction_result == 40, "Array-to-group sum_int reduction failed." mainProxy.done_array_to_group() def reduceFromArrayBcast(self, reduction_result): - assert list(reduction_result) == [0, 80, 30], 'Array-to-group bcast sum_int reduction failed.' + assert list(reduction_result) == [ + 0, + 80, + 30, + ], "Array-to-group bcast sum_int reduction failed." self.contribute(None, None, mainProxy.done_array_to_group_bcast) diff --git a/tests/reductions/bench_reductions.py b/tests/reductions/bench_reductions.py index 380fa5af..9f324c80 100644 --- a/tests/reductions/bench_reductions.py +++ b/tests/reductions/bench_reductions.py @@ -19,8 +19,11 @@ def assert_almost_equal(actual, desired, tol): class Main(Chare): def __init__(self, args): - charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, - 'NUM_CHARES': charm.numPes() * CHARES_PER_PE}, '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"mainProxy": self.thisProxy, "NUM_CHARES": charm.numPes() * CHARES_PER_PE}, + "__main__", + awaitable=True, + ).get() self.arrayProxy = Array(Test, NUM_CHARES) self.arrayProxy.run() self.startTime = time.time() @@ -30,7 +33,7 @@ def collectSum(self, result): self.arrayProxy.run() def done(self): - print('Program done in', time.time() - self.startTime) + print("Program done in", time.time() - self.startTime) charm.printStats() exit() @@ -38,7 +41,7 @@ def done(self): class Test(Chare): def __init__(self): - self.data = numpy.arange(DATA_LEN, dtype='float64') + self.data = numpy.arange(DATA_LEN, dtype="float64") self.reductions = 0 def run(self): diff --git a/tests/reductions/custom_reduction.py b/tests/reductions/custom_reduction.py index 51c19ab8..14a55815 100644 --- a/tests/reductions/custom_reduction.py +++ b/tests/reductions/custom_reduction.py @@ -24,37 +24,52 @@ def __init__(self, args): nDims = 1 ARRAY_SIZE = [10] * nDims - lastIdx = tuple([x-1 for x in ARRAY_SIZE]) + lastIdx = tuple([x - 1 for x in ARRAY_SIZE]) self.nElements = 1 for x in ARRAY_SIZE: self.nElements *= x - print('Running reduction example on ' + str(charm.numPes()) + ' processors for ' + str(self.nElements) + ' elements, array dims=' + str(ARRAY_SIZE)) + print( + "Running reduction example on " + + str(charm.numPes()) + + " processors for " + + str(self.nElements) + + " elements, array dims=" + + str(ARRAY_SIZE) + ) arrProxy = Array(Test, ARRAY_SIZE) - charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, 'arrProxy': arrProxy, - 'lastIdx': lastIdx}, '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"mainProxy": self.thisProxy, "arrProxy": arrProxy, "lastIdx": lastIdx}, + "__main__", + awaitable=True, + ).get() arrProxy.doReduction() def done_charm_builtin(self, result): - sum_indices = (self.nElements*(self.nElements-1))/2 - assert list(result) == [10, sum_indices], 'Built-in Charm sum_int reduction failed' - print('[Main] All Charm builtin reductions done. Test passed') + sum_indices = (self.nElements * (self.nElements - 1)) / 2 + assert list(result) == [ + 10, + sum_indices, + ], "Built-in Charm sum_int reduction failed" + print("[Main] All Charm builtin reductions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_python_builtin(self, result): - sum_indices = (self.nElements*(self.nElements-1))/2 + sum_indices = (self.nElements * (self.nElements - 1)) / 2 assert type(result) == MyObject - assert result.value == sum_indices or result.value == 0, 'Built-in Python _sum or _product reduction failed' - print('[Main] All Python builtin reductions done. Test passed') + assert ( + result.value == sum_indices or result.value == 0 + ), "Built-in Python _sum or _product reduction failed" + print("[Main] All Python builtin reductions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_python_custom(self, result): - assert result == [10, lastIdx[0], 0], 'Custom Python myReduce failed' - print('[Main] All Python custom reductions done. Test passed') + assert result == [10, lastIdx[0], 0], "Custom Python myReduce failed" + print("[Main] All Python custom reductions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() @@ -66,10 +81,10 @@ def __init__(self, n): self.value = n def __add__(self, other): - return MyObject(self.value+other.value) + return MyObject(self.value + other.value) def __mul__(self, other): - return MyObject(self.value*other.value) + return MyObject(self.value * other.value) def __radd__(self, other): if other == 0: @@ -81,18 +96,24 @@ def __radd__(self, other): class Test(Chare): def __init__(self): - print('Test ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe())) + print("Test " + str(self.thisIndex) + " created on PE " + str(charm.myPe())) def doReduction(self): # test contributing using built-in Charm reducer - self.contribute([1, self.thisIndex[0]], Reducer.sum, mainProxy.done_charm_builtin) + self.contribute( + [1, self.thisIndex[0]], Reducer.sum, mainProxy.done_charm_builtin + ) a = MyObject(self.thisIndex[0]) # test contributing using built-in Python reducer self.contribute(a, Reducer.sum, mainProxy.done_python_builtin) # test product reducer self.contribute(a, Reducer.product, mainProxy.done_python_builtin) # test contributing using custom Python reducer - self.contribute([1, self.thisIndex[0], self.thisIndex[0]], Reducer.myReducer, mainProxy.done_python_custom) + self.contribute( + [1, self.thisIndex[0], self.thisIndex[0]], + Reducer.myReducer, + mainProxy.done_python_custom, + ) charm.start(Main) diff --git a/tests/reductions/future_reduction.py b/tests/reductions/future_reduction.py index 6f15a729..bf5c4049 100644 --- a/tests/reductions/future_reduction.py +++ b/tests/reductions/future_reduction.py @@ -4,7 +4,7 @@ class Test(Chare): def __init__(self, f): - data = np.arange(10, dtype='float64') + data = np.arange(10, dtype="float64") self.contribute(data, Reducer.sum, f) @@ -13,8 +13,12 @@ def main(args): f2 = Future() Group(Test, args=[f1]) Array(Test, charm.numPes() * 4, args=[f2]) - np.testing.assert_allclose(f1.get(), np.arange(10, dtype='float64') * charm.numPes()) - np.testing.assert_allclose(f2.get(), np.arange(10, dtype='float64') * charm.numPes() * 4) + np.testing.assert_allclose( + f1.get(), np.arange(10, dtype="float64") * charm.numPes() + ) + np.testing.assert_allclose( + f2.get(), np.arange(10, dtype="float64") * charm.numPes() * 4 + ) exit() diff --git a/tests/reductions/group_reduction.py b/tests/reductions/group_reduction.py index 4811eedf..92e6b675 100644 --- a/tests/reductions/group_reduction.py +++ b/tests/reductions/group_reduction.py @@ -4,7 +4,9 @@ # utility methods for assertions def assert_allclose(actual, desired, tol): assert len(actual) == len(desired) - assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len(actual) + assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len( + actual + ) def assert_almost_equal(actual, desired, tol): @@ -25,57 +27,62 @@ def __init__(self, args): nElements = 1 for x in ARRAY_SIZE: nElements *= x - print('Running reduction example on ' + str(charm.numPes()) + ' processors') + print("Running reduction example on " + str(charm.numPes()) + " processors") groupProxy = Group(TestGroup) # create an array to test group-to-array reductions arrayProxy = Array(TestArray, ARRAY_SIZE) - charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, 'arrayProxy': arrayProxy}, - '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"mainProxy": self.thisProxy, "arrayProxy": arrayProxy}, + "__main__", + awaitable=True, + ).get() groupProxy.doReduction() def done_int(self, reduction_result): - assert reduction_result == 42*charm.numPes(), 'Group-to-singleton sum_int reduction failed' - print('[Main] All sum_int contributions done. Test passed') + assert ( + reduction_result == 42 * charm.numPes() + ), "Group-to-singleton sum_int reduction failed" + print("[Main] All sum_int contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_nop(self): - print('[Main] All nop contributions received. Test passed') + print("[Main] All nop contributions received. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_float(self, reduction_result): - expected_result = [x*charm.numPes() for x in [10.1, 13.4]] - indices_sum = (charm.numPes() * (charm.numPes() - 1))/2 + expected_result = [x * charm.numPes() for x in [10.1, 13.4]] + indices_sum = (charm.numPes() * (charm.numPes() - 1)) / 2 expected_result += [float(indices_sum)] assert_allclose(reduction_result, expected_result, 1e-03) - print('[Main] All sum_float contributions done. Test passed') + print("[Main] All sum_float contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_group_to_array(self): - print('[Main] All group-to-array contributions done. Test passed') + print("[Main] All group-to-array contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_group_to_array_bcast(self): - print('[Main] All group-to-array bcast contributions done. Test passed') + print("[Main] All group-to-array bcast contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_group_to_group(self): - print('[Main] All group-to-group contributions done. Test passed') + print("[Main] All group-to-group contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() def done_group_to_group_bcast(self): - print('[Main] All group-to-group bcast contributions done. Test passed') + print("[Main] All group-to-group bcast contributions done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() @@ -84,15 +91,23 @@ def done_group_to_group_bcast(self): class TestGroup(Chare): def __init__(self): - print('TestGroup ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe())) + print( + "TestGroup " + str(self.thisIndex) + " created on PE " + str(charm.myPe()) + ) def doReduction(self): - print('TestGroup element on PE ' + str(charm.myPe()) + ' is starting its contributions.') + print( + "TestGroup element on PE " + + str(charm.myPe()) + + " is starting its contributions." + ) # test contributing single int back to Main self.contribute(42, Reducer.sum, mainProxy.done_int) # test contributing list of floats back to Main num = [10.1, 13.4] - self.contribute(num+[float(self.thisIndex)], Reducer.sum, mainProxy.done_float) + self.contribute( + num + [float(self.thisIndex)], Reducer.sum, mainProxy.done_float + ) # test nop reduction to main self.contribute(None, Reducer.nop, mainProxy.done_nop) # test contributing to TestArray[0] @@ -100,32 +115,42 @@ def doReduction(self): # test contributing to TestArray (broadcast) self.contribute(-4, Reducer.sum, arrayProxy.reduceGroupToArrayBcast) # test contributing to TestGroup[0] - self.contribute([5, 7, -3, 0], Reducer.sum, self.thisProxy[0].reduceGroupToGroup) + self.contribute( + [5, 7, -3, 0], Reducer.sum, self.thisProxy[0].reduceGroupToGroup + ) # test contributing to TestGroup (broadcast) self.contribute(-4.2, Reducer.sum, self.thisProxy.reduceGroupToGroupBcast) def reduceGroupToGroup(self, reduction_result): assert self.thisIndex == 0 - assert list(reduction_result) == [charm.numPes()*x for x in [5, 7, -3, 0]], 'Group-to-group reduction failed.' + assert list(reduction_result) == [ + charm.numPes() * x for x in [5, 7, -3, 0] + ], "Group-to-group reduction failed." mainProxy.done_group_to_group() def reduceGroupToGroupBcast(self, reduction_result): - assert_almost_equal(reduction_result, -4.2*charm.numPes(), 1e-03) + assert_almost_equal(reduction_result, -4.2 * charm.numPes(), 1e-03) self.contribute(None, None, mainProxy.done_group_to_group_bcast) class TestArray(Chare): def __init__(self): - print('TestArray ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe())) + print( + "TestArray " + str(self.thisIndex) + " created on PE " + str(charm.myPe()) + ) def reduceGroupToArray(self, reduction_result): assert self.thisIndex[0] == 0 - assert_allclose(reduction_result, [charm.numPes()*x for x in [4.2, 13.1]], 1e-03) + assert_allclose( + reduction_result, [charm.numPes() * x for x in [4.2, 13.1]], 1e-03 + ) mainProxy.done_group_to_array() def reduceGroupToArrayBcast(self, reduction_result): - assert reduction_result == -4*charm.numPes(), 'Group-to-array bcast reduction failed.' + assert ( + reduction_result == -4 * charm.numPes() + ), "Group-to-array bcast reduction failed." self.contribute(None, None, mainProxy.done_group_to_array_bcast) diff --git a/tests/reductions/section_reduction.py b/tests/reductions/section_reduction.py index a1638ffc..86c2ea3d 100644 --- a/tests/reductions/section_reduction.py +++ b/tests/reductions/section_reduction.py @@ -51,10 +51,10 @@ def test_op(done, op, vector_size, use_numpy=False): assert list(val1) == list(val2) else: assert val1 == val2 - print('[Main] Reduction with Reducer.%s passes.' % get_op_name(op)) + print("[Main] Reduction with Reducer.%s passes." % get_op_name(op)) done(True) except AssertionError: - print('[Main] Reduction with Reducer.%s is not correct.' % get_op_name(op)) + print("[Main] Reduction with Reducer.%s is not correct." % get_op_name(op)) done(False) @@ -64,7 +64,7 @@ def test_op_logical(done, op, vector_size, use_numpy=False): if use_numpy: data = np.random.rand(vector_size) p = 0.1 - data = np.random.choice(a=[False, True], size=(vector_size), p=[p, 1-p]) + data = np.random.choice(a=[False, True], size=(vector_size), p=[p, 1 - p]) else: data = list(map(bool, range(0, vector_size))) else: @@ -82,10 +82,10 @@ def test_op_logical(done, op, vector_size, use_numpy=False): assert list(val1) == list(val2) else: assert val1 == val2 - print('[Main] Reduction with Reducer.%s passes.' % get_op_name(op)) + print("[Main] Reduction with Reducer.%s passes." % get_op_name(op)) done(True) except AssertionError: - print('[Main] Reduction with Reducer.%s is not correct.' % get_op_name(op)) + print("[Main] Reduction with Reducer.%s is not correct." % get_op_name(op)) done(False) @@ -139,10 +139,10 @@ def main(args): passes = sum(map(lambda x: x.get(), test_futures)) if passes == num_tests: - print('All tests passed!') + print("All tests passed!") exit() else: - print('ERROR: Not all tests passed.') + print("ERROR: Not all tests passed.") exit(1) diff --git a/tests/reductions/test_gather.py b/tests/reductions/test_gather.py index f81423db..fa8cd606 100644 --- a/tests/reductions/test_gather.py +++ b/tests/reductions/test_gather.py @@ -15,10 +15,19 @@ def __init__(self, args): self.nElements = 1 for x in ARRAY_SIZE: self.nElements *= x - print('Running gather example on', charm.numPes(), 'processors for', self.nElements, 'elements, array dims=', ARRAY_SIZE) + print( + "Running gather example on", + charm.numPes(), + "processors for", + self.nElements, + "elements, array dims=", + ARRAY_SIZE, + ) arrProxy = Array(Test, ARRAY_SIZE) grpProxy = Group(TestGroup) - charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy}, '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"mainProxy": self.thisProxy}, "__main__", awaitable=True + ).get() arrProxy.doGather() grpProxy.doGather() red_future = charm.Future() @@ -28,8 +37,10 @@ def __init__(self, args): def done_gather_single(self, result): gather_arr_indices = list(range(self.nElements)) gather_grp_indices = list(range(charm.numPes())) - assert result == gather_arr_indices or result == gather_grp_indices, 'Gather single elements failed.' - print('[Main] Gather collective for single elements done. Test passed') + assert ( + result == gather_arr_indices or result == gather_grp_indices + ), "Gather single elements failed." + print("[Main] Gather collective for single elements done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() @@ -37,8 +48,10 @@ def done_gather_single(self, result): def done_gather_array(self, result): gather_arr_indices = [tuple([i]) for i in range(self.nElements)] gather_grp_indices = [[i, 42] for i in range(charm.numPes())] - assert result == gather_arr_indices or result == gather_grp_indices, 'Gather arrays failed.' - print('[Main] Gather collective for arrays done. Test passed') + assert ( + result == gather_arr_indices or result == gather_grp_indices + ), "Gather arrays failed." + print("[Main] Gather collective for arrays done. Test passed") self.recvdReductions += 1 if self.recvdReductions >= self.expectedReductions: exit() @@ -47,12 +60,14 @@ def done_gather_array(self, result): class Test(Chare): def __init__(self): - print('Test', self.thisIndex, 'created on PE', charm.myPe()) + print("Test", self.thisIndex, "created on PE", charm.myPe()) def doGather(self, red_future=None): if red_future is None: # gather single elements - self.contribute(self.thisIndex[0], Reducer.gather, mainProxy.done_gather_single) + self.contribute( + self.thisIndex[0], Reducer.gather, mainProxy.done_gather_single + ) # gather arrays self.contribute(self.thisIndex, Reducer.gather, mainProxy.done_gather_array) else: @@ -62,13 +77,15 @@ def doGather(self, red_future=None): class TestGroup(Chare): def __init__(self): - print('TestGroup', self.thisIndex, 'created on PE', charm.myPe()) + print("TestGroup", self.thisIndex, "created on PE", charm.myPe()) def doGather(self): # gather single elements self.contribute(self.thisIndex, Reducer.gather, mainProxy.done_gather_single) # gather arrays - self.contribute([self.thisIndex, 42], Reducer.gather, mainProxy.done_gather_array) + self.contribute( + [self.thisIndex, 42], Reducer.gather, mainProxy.done_gather_array + ) charm.start(Main) diff --git a/tests/sections/callbacks.py b/tests/sections/callbacks.py index b1b10ca7..cff12fe5 100644 --- a/tests/sections/callbacks.py +++ b/tests/sections/callbacks.py @@ -45,7 +45,7 @@ def work1(self, cb, secProxy=None): self.contribute(3, Reducer.sum, cb, secProxy) def work2(self, cb, secProxy=None): - data = numpy.arange(100, dtype='float64') + data = numpy.arange(100, dtype="float64") self.contribute(data, Reducer.sum, cb, secProxy) def work3(self, cb, secProxy=None): @@ -56,7 +56,7 @@ def work4(self, cb, secProxy=None): def work5(self, cb, secProxy=None): if self.idx == 1: - cb('test section callback') + cb("test section callback") def main(args): @@ -89,15 +89,15 @@ def main(args): assert f.get() == (numchares // 2) f = Future() - expected = numpy.arange(100, dtype='float64') + expected = numpy.arange(100, dtype="float64") expected *= numchares collection.setTest(f, expected, awaitable=True).get() collection.work2(secProxy.recvResult) assert f.get() == (numchares // 2) f = Future() - expected = numpy.arange(100, dtype='float64') - expected *= (numchares // 2) + expected = numpy.arange(100, dtype="float64") + expected *= numchares // 2 secProxy.setTest(f, expected, awaitable=True).get() secProxy.work2(secProxy.recvResult, secProxy) assert f.get() == (numchares // 2) @@ -127,7 +127,7 @@ def main(args): assert f.get() == (numchares // 2) f = Future() - expected = 'test section callback' + expected = "test section callback" collection.setTest(f, expected, awaitable=True).get() collection.work5(secProxy.recvResult) assert f.get() == (numchares // 2) diff --git a/tests/sections/constrained_groups.py b/tests/sections/constrained_groups.py index 47a59a2f..401e52d6 100644 --- a/tests/sections/constrained_groups.py +++ b/tests/sections/constrained_groups.py @@ -27,7 +27,7 @@ def main(args): assert charm.numPes() > 1 global section_pes section_pes = random.sample(range(charm.numPes()), charm.numPes() // 2) - charm.thisProxy.updateGlobals({'section_pes': section_pes}, awaitable=True).get() + charm.thisProxy.updateGlobals({"section_pes": section_pes}, awaitable=True).get() g = Group(Test, onPEs=section_pes, args=[4862]) assert g[section_pes[0]].test2(ret=True).get() == 34589 g.test(awaitable=True).get() diff --git a/tests/sections/multirand-split-combine.py b/tests/sections/multirand-split-combine.py index 5be476f8..4ede3b1c 100644 --- a/tests/sections/multirand-split-combine.py +++ b/tests/sections/multirand-split-combine.py @@ -67,8 +67,8 @@ def addElems(self, elems): def verify(self, result): if set(result) != self.elems: - print('self.elems=', self.elems) - print('result=', result) + print("self.elems=", self.elems) + print("result=", result) raise Exception def split(self, N): @@ -94,7 +94,7 @@ def split(self, N): if (cid, idx) not in insections: insections[(cid, idx)] = [] insections[(cid, idx)].append(i) - charm.thisProxy.updateGlobals({'insections': insections}, awaitable=True).get() + charm.thisProxy.updateGlobals({"insections": insections}, awaitable=True).get() assert len(sections) == N for section in sections: assert len(section) > 0 @@ -106,8 +106,7 @@ def split(self, N): def partition(elems, N): num_elems = len(elems) - return [elems[i*num_elems // N: (i+1)*num_elems // N] - for i in range(N)] + return [elems[i * num_elems // N : (i + 1) * num_elems // N] for i in range(N)] def inSections(obj): @@ -161,11 +160,16 @@ def main(args): c = Collection([], proxy) for c_ in cs: c.addElems(c_.elems) - assert hasattr(c.proxy, 'section') and c.proxy.issec + assert hasattr(c.proxy, "section") and c.proxy.issec sections_combined += 1 collections.append(c) - print(len(collections), 'collections created, sections_split=', sections_split, - 'sections_combined=', sections_combined) + print( + len(collections), + "collections created, sections_split=", + sections_split, + "sections_combined=", + sections_combined, + ) if VERBOSE: section_sizes = [] @@ -173,11 +177,11 @@ def main(args): if c.proxy.issec is not None: section_sizes.append(len(c.elems)) section_sizes = numpy.array(section_sizes) - print(len(section_sizes), 'sections, sizes:') - print('min size=', numpy.min(section_sizes)) - print('median size=', numpy.median(section_sizes)) - print('mean size=', numpy.mean(section_sizes)) - print('max size=', numpy.max(section_sizes)) + print(len(section_sizes), "sections, sizes:") + print("min size=", numpy.min(section_sizes)) + print("median size=", numpy.median(section_sizes)) + print("mean size=", numpy.mean(section_sizes)) + print("max size=", numpy.max(section_sizes)) for c in collections: if c.proxy.issec: @@ -187,7 +191,9 @@ def main(args): for _ in range(NUM_ITER): futures = [Future() for _ in range(len(collections))] - charm.thisProxy.updateGlobals({'DATA_VERIFY': random.randint(0, 100000)}, awaitable=True).get() + charm.thisProxy.updateGlobals( + {"DATA_VERIFY": random.randint(0, 100000)}, awaitable=True + ).get() data = DATA_VERIFY for i, c in enumerate(collections): sid = None @@ -198,7 +204,7 @@ def main(args): result = futures[i].get() collections[i].verify(result) - print('DONE') + print("DONE") exit() diff --git a/tests/sections/simple.py b/tests/sections/simple.py index dbaa9b74..e99f6ed3 100644 --- a/tests/sections/simple.py +++ b/tests/sections/simple.py @@ -12,7 +12,7 @@ def member(obj): class Test(Chare): def __init__(self): - self.insection = (member(self) >= 0) + self.insection = member(self) >= 0 def setSecProxy(self, proxy): self.secProxy = proxy @@ -33,7 +33,7 @@ def main(args): array3d = Array(Test, (4, 5, 3)) # for each array, create one section using member function to determine section membership - for array, size in [(array2d, 8*8), (array3d, 4*5*3)]: + for array, size in [(array2d, 8 * 8), (array3d, 4 * 5 * 3)]: secProxy = charm.split(array, 1, member)[0] array.setSecProxy(secProxy, awaitable=True).get() f = Future() @@ -41,7 +41,7 @@ def main(args): assert len(f.get()) < size # for each array, create one section passing a random list of element indexes (half the size of the array) - for array, size in [(array2d, 8*8), (array3d, 4*5*3)]: + for array, size in [(array2d, 8 * 8), (array3d, 4 * 5 * 3)]: elems = array.getElems(ret=True).get() assert len(elems) == size section_elems = random.sample(elems, size // 2) diff --git a/tests/sections/slice.py b/tests/sections/slice.py index ea0b0107..11669dfb 100644 --- a/tests/sections/slice.py +++ b/tests/sections/slice.py @@ -17,14 +17,14 @@ def main(args): elems = list(range(0, charm.numPes(), 2)) assert g[::2].getIdx(ret=True).get() == elems assert g[0::2].getIdx_th(ret=True).get() == elems - assert g[:charm.numPes():2].getIdx(ret=True).get() == elems - assert g[0:charm.numPes()].getIdx_th(ret=True).get() != elems + assert g[: charm.numPes() : 2].getIdx(ret=True).get() == elems + assert g[0 : charm.numPes()].getIdx_th(ret=True).get() != elems a1 = Array(Test, (8, 8)) a2 = Array(Test, 64) indexes = a1[0:8:2, 1:8:2].getIdx(ret=True).get() - assert len(indexes) == 8*8//4 + assert len(indexes) == 8 * 8 // 4 for idx in indexes: assert len(idx) == 2 assert idx[0] % 2 == 0 diff --git a/tests/thread_entry_methods/future_bcast.py b/tests/thread_entry_methods/future_bcast.py index 91ae31ec..0a81fc2b 100644 --- a/tests/thread_entry_methods/future_bcast.py +++ b/tests/thread_entry_methods/future_bcast.py @@ -21,7 +21,7 @@ def main(args): t0 = time.time() a.work(sleepTimes, awaitable=True).get() # wait for broadcast to complete wait_time = time.time() - t0 - assert(wait_time >= max(sleepTimes)) + assert wait_time >= max(sleepTimes) print(wait_time, max(sleepTimes)) g = Group(Test) @@ -30,7 +30,7 @@ def main(args): t0 = time.time() g.work(sleepTimes, awaitable=True).get() # wait for broadcast to complete wait_time = time.time() - t0 - assert(wait_time >= max(sleepTimes)) + assert wait_time >= max(sleepTimes) print(wait_time, max(sleepTimes)) exit() diff --git a/tests/thread_entry_methods/test1.py b/tests/thread_entry_methods/test1.py index 31614ae5..ff4a702b 100644 --- a/tests/thread_entry_methods/test1.py +++ b/tests/thread_entry_methods/test1.py @@ -36,7 +36,7 @@ def done(self): class Test2(Chare): def getVal(self): - return (73 + charm.myPe()) + return 73 + charm.myPe() def main(args): @@ -44,8 +44,9 @@ def main(args): # every chare sends to every other so don't want a ton of chares numChares = min(charm.numPes() * 8, 32) testGroup = Group(Test2) - charm.thisProxy.updateGlobals({'numChares': numChares, 'testGroup': testGroup}, - '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"numChares": numChares, "testGroup": testGroup}, "__main__", awaitable=True + ).get() Array(Test, numChares) diff --git a/tests/thread_entry_methods/test1_when.py b/tests/thread_entry_methods/test1_when.py index b9a28064..449d47df 100644 --- a/tests/thread_entry_methods/test1_when.py +++ b/tests/thread_entry_methods/test1_when.py @@ -23,10 +23,15 @@ def start(self, pes): self.contribute(None, None, self.thisProxy[0].done) @coro - @when('self.iteration == iteration') + @when("self.iteration == iteration") def getVal(self, iteration): - result = 53 * testGroup[charm.myPe()].getVal(ret=True).get() * self.thisIndex[0] * self.iteration - #assert result == 53 * (73 + charm.myPe()) * self.thisIndex[0] * self.iteration + result = ( + 53 + * testGroup[charm.myPe()].getVal(ret=True).get() + * self.thisIndex[0] + * self.iteration + ) + # assert result == 53 * (73 + charm.myPe()) * self.thisIndex[0] * self.iteration self.msgsRcvd += 1 if self.msgsRcvd == numChares: self.msgsRcvd = 0 @@ -41,15 +46,16 @@ def done(self): class Test2(Chare): def getVal(self): - return (73 + charm.myPe()) + return 73 + charm.myPe() def main(args): global numChares, testGroup numChares = min(charm.numPes() * 8, 32) testGroup = Group(Test2) - charm.thisProxy.updateGlobals({'numChares': numChares, 'testGroup': testGroup}, - '__main__', awaitable=True).get() + charm.thisProxy.updateGlobals( + {"numChares": numChares, "testGroup": testGroup}, "__main__", awaitable=True + ).get() Array(Test, numChares) diff --git a/tests/thread_entry_methods/test_wait.py b/tests/thread_entry_methods/test_wait.py index 2e654de5..c18b4c88 100644 --- a/tests/thread_entry_methods/test_wait.py +++ b/tests/thread_entry_methods/test_wait.py @@ -30,11 +30,17 @@ def __init__(self, args): self.result = 0 for i in range(NUM_ITER): workers.sendVal() - self.wait("self.num_responses1 == " + str(num_chares//2) + " and 33 == TEST_GLOBAL") - self.wait("self.num_responses2 == " + str(num_chares//2) + " and 47 == ro.X") - assert(self.result == num_chares * 237) - assert(self.num_responses1 == num_chares//2) - assert(self.num_responses2 == num_chares//2) + self.wait( + "self.num_responses1 == " + + str(num_chares // 2) + + " and 33 == TEST_GLOBAL" + ) + self.wait( + "self.num_responses2 == " + str(num_chares // 2) + " and 47 == ro.X" + ) + assert self.result == num_chares * 237 + assert self.num_responses1 == num_chares // 2 + assert self.num_responses2 == num_chares // 2 self.num_responses1 = self.num_responses2 = 0 self.result = 0 charm.printStats() diff --git a/tests/topo/topo_treeAPI.py b/tests/topo/topo_treeAPI.py index 4e4a91be..1d312967 100644 --- a/tests/topo/topo_treeAPI.py +++ b/tests/topo/topo_treeAPI.py @@ -1,7 +1,7 @@ from charm4py import charm -allPes_check = [] +allPes_check = [] evenPes_check = [] @@ -27,20 +27,24 @@ def main(args): print("\nWhole topo tree rooted at PE 0") printWholeTree(0, 0) - assert(len(allPes_check) == charm.numPes() and set(allPes_check) == set(range(charm.numPes()))) + assert len(allPes_check) == charm.numPes() and set(allPes_check) == set( + range(charm.numPes()) + ) allPes_check = [] lastPE = charm.numPes() - 1 if lastPE != 0: print("\nWhole topo tree rooted at", lastPE) printWholeTree(lastPE, lastPE) - assert(len(allPes_check) == charm.numPes() and set(allPes_check) == set(range(charm.numPes()))) + assert len(allPes_check) == charm.numPes() and set(allPes_check) == set( + range(charm.numPes()) + ) allPes_check = [] print("\nEven numbered PE tree, rooted at PE 0") evenPEs = [pe for pe in range(charm.numPes()) if pe % 2 == 0] printEvenNbTree(evenPEs, 0) - assert(len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs)) + assert len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs) evenPes_check = [] newRoot = evenPEs[-1] @@ -48,7 +52,7 @@ def main(args): evenPEs.insert(0, evenPEs.pop()) # move root from back to beginning of list print("\nEven numbered PE tree, rooted at PE", newRoot) printEvenNbTree(evenPEs, newRoot) - assert(len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs)) + assert len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs) evenPes_check = [] exit() diff --git a/tests/when/perf_test.py b/tests/when/perf_test.py index 78033114..dbfce8b7 100644 --- a/tests/when/perf_test.py +++ b/tests/when/perf_test.py @@ -9,15 +9,15 @@ class Worker(Chare): def start(self, done_future): - self.cur_id = 0 + self.cur_id = 0 self.phase_cnt = 0 self.done_future = done_future @when("self.cur_id == id") def recv_id(self, id): - #if self.thisIndex == 0: + # if self.thisIndex == 0: # return self.contribute(None, None, self.done_future) - assert(id == self.cur_id) + assert id == self.cur_id self.phase_cnt += 1 if self.phase_cnt == PHASE_NUM: self.phase_cnt = 0 @@ -33,8 +33,8 @@ def main(args): random.seed(45782) ids = [] for i in range(MAX_VALS): - #for _ in range(PHASE_NUM): - #ids.append(i) + # for _ in range(PHASE_NUM): + # ids.append(i) ids.append(i) random.shuffle(ids) @@ -42,7 +42,7 @@ def main(args): g.start(done, awaitable=True).get() t0 = time.time() for id in ids: - #g.recv_id(id) + # g.recv_id(id) for _ in range(PHASE_NUM): g.recv_id(id) done.get() diff --git a/tests/when/stencil.py b/tests/when/stencil.py index f8eb677b..5bae5c3a 100644 --- a/tests/when/stencil.py +++ b/tests/when/stencil.py @@ -25,7 +25,7 @@ def work(self, done_fut): self.iter_complete.get() self.reduce(done_fut) - @when('self.iteration == iteration') + @when("self.iteration == iteration") def recvData(self, iteration, data): self.msgs_recvd += 1 if self.msgs_recvd == len(self.nbs): diff --git a/tests/when/test_when_syntax.py b/tests/when/test_when_syntax.py index 3be34a3f..7e597a66 100644 --- a/tests/when/test_when_syntax.py +++ b/tests/when/test_when_syntax.py @@ -9,69 +9,70 @@ # NOTE: this is not a parallel program + def parseMethodArgs(s): - arg_names = re.split(', *', s[1:-1]) + arg_names = re.split(", *", s[1:-1]) method_args = {} for i in range(1, len(arg_names)): - method_args[arg_names[i]] = i-1 + method_args[arg_names[i]] = i - 1 return method_args def main(args): - when_cond = 'self.iterations == iter' - method = '(self, iter, x, y)' + when_cond = "self.iterations == iter" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.MsgTagCond) - assert cond.attrib_name == 'iterations' + assert cond.attrib_name == "iterations" assert cond.arg_idx == 0 - when_cond = 'self.x == x' - method = '(self, iter, x, y)' + when_cond = "self.x == x" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.MsgTagCond) - assert cond.attrib_name == 'x' + assert cond.attrib_name == "x" assert cond.arg_idx == 1 - when_cond = 'y == self.x ' - method = '(self, iter, x, y)' + when_cond = "y == self.x " + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.MsgTagCond) - assert cond.attrib_name == 'x' + assert cond.attrib_name == "x" assert cond.arg_idx == 2 - when_cond = 'self.x == x + y' - method = '(self, iter, x, y)' + when_cond = "self.x == x + y" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateMsgCond) - when_cond = 'x < y' - method = '(self, iter, x, y)' + when_cond = "x < y" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateMsgCond) - when_cond = 'y == y' - method = '(self, iter, x, y)' + when_cond = "y == y" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateMsgCond) - when_cond = 'iter' - method = '(self, iter, x, y)' + when_cond = "iter" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateMsgCond) - when_cond = 'self.x' - method = '(self, iter, x, y)' + when_cond = "self.x" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateCond) - when_cond = 'self.x + self.y == 3' - method = '(self, iter, x, y)' + when_cond = "self.x + self.y == 3" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateCond) - when_cond = 'self.x > (self.y + 2/3 + self.z + error)' - method = '(self, iter, x, y)' + when_cond = "self.x > (self.y + 2/3 + self.z + error)" + method = "(self, iter, x, y)" cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method)) assert isinstance(cond, wait.ChareStateCond) diff --git a/tests/when/when_test.py b/tests/when/when_test.py index 9c57b27c..6f850434 100644 --- a/tests/when/when_test.py +++ b/tests/when/when_test.py @@ -7,10 +7,10 @@ class Test(Chare): def __init__(self, numParticipants): self.numParticipants = numParticipants - self.msgsRcvd = 0 # for PE 0 - self.current = 1 # for PE 0 - self.msgsSent = 0 # for PEs != 0 - #print("Group constructed " + str(self.thisIndex)) + self.msgsRcvd = 0 # for PE 0 + self.current = 1 # for PE 0 + self.msgsSent = 0 # for PEs != 0 + # print("Group constructed " + str(self.thisIndex)) @when("self.current == id") def testWhen(self, id, msg): @@ -26,7 +26,7 @@ def testWhen(self, id, msg): def run(self): if charm.myPe() == 0 or charm.myPe() > self.numParticipants: return - #print("Group " + str(self.thisIndex) + " sending msg " + str(self.msgsSent)) + # print("Group " + str(self.thisIndex) + " sending msg " + str(self.msgsSent)) self.thisProxy[0].testWhen(charm.myPe(), "hi") self.msgsSent += 1 if self.msgsSent < GRP_TO_SEND: @@ -36,7 +36,7 @@ def run(self): def main(args): if charm.numPes() < 3: charm.abort("Run program with at least 3 PEs") - numParticipants = min(charm.numPes()-1, 31) + numParticipants = min(charm.numPes() - 1, 31) Group(Test, args=[numParticipants]).run() diff --git a/tests/when/when_test2.py b/tests/when/when_test2.py index d3dd380f..56ebd88f 100644 --- a/tests/when/when_test2.py +++ b/tests/when/when_test2.py @@ -18,20 +18,20 @@ def __init__(self, controller): @when("self.ready and (TEST_WHEN_GLOBAL == 33)") def startWork(self, x, y, z): - assert(self.ready) + assert self.ready self.ready = False self.thisProxy[self.thisIndex].doWork(x, y, z) def doWork(self, x, y, z): - assert(not self.ready) + assert not self.ready result = 0 for _ in range(WORKER_ITERS): - result += (x * y * z) + result += x * y * z self.thisProxy[self.thisIndex].workDone(result) def workDone(self, result): - assert(not self.ready) - assert(result == X*Y*Z*WORKER_ITERS) + assert not self.ready + assert result == X * Y * Z * WORKER_ITERS self.ready = True self.controller.taskDone()