diff --git a/.github/workflows/charm4py.yml b/.github/workflows/charm4py.yml
index a6a08acf..445f6194 100644
--- a/.github/workflows/charm4py.yml
+++ b/.github/workflows/charm4py.yml
@@ -31,7 +31,7 @@ jobs:
           python-version: ${{ matrix.python-version }}
       - name: Install dependencies
         run: |
-          pip install setuptools cython cffi greenlet numpy torch torchvision filelock matplotlib
+          pip install setuptools cython cffi greenlet numpy numba torch torchvision filelock matplotlib
           if [ ${{ matrix.os }} == 'macos-13' ]; then
             # pypi only distributes torch packages w/ numpy v1 for macos-x86_64
             pip install 'numpy<2'
@@ -50,3 +50,28 @@ jobs:
           # needed for param server
           export OBJC_DISABLE_INITIALIZE_FORK_SAFETY=YES
           python auto_test.py
+  
+  build:
+    name: Lint
+    runs-on: ubuntu-latest
+
+    permissions:
+      contents: read
+      packages: read
+      # To report GitHub Actions status checks
+      statuses: write
+
+    steps:
+      - name: Checkout code
+        uses: actions/checkout@v4
+        with:
+          # super-linter needs the full git history to get the
+          # list of files that changed across commits
+          fetch-depth: 0
+
+      - name: Super-linter
+        uses: super-linter/super-linter/slim@v7.4.0 # x-release-please-version
+        env:
+          # To report GitHub Actions status checks
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+          VALIDATE_PYTHON_BLACK: true
diff --git a/auto_test.py b/auto_test.py
index a4860494..04e96d7b 100644
--- a/auto_test.py
+++ b/auto_test.py
@@ -1,6 +1,7 @@
 import time
 import subprocess
 import sys
+
 if sys.version_info[0] < 3:
     print("auto_test requires Python 3")
     exit(1)
@@ -10,21 +11,23 @@
 import json
 
 
-if len(sys.argv) == 2 and sys.argv[1] == '-version_check':
+if len(sys.argv) == 2 and sys.argv[1] == "-version_check":
     exit(sys.version_info[0])
 
 
 def searchForPython(python_implementations):
     py3_exec = None
-    py3_exec = shutil.which('python3')
+    py3_exec = shutil.which("python3")
     if py3_exec is None:
-        exec_str = shutil.which('python')
+        exec_str = shutil.which("python")
         if exec_str is not None:
-            version = subprocess.call([exec_str, 'auto_test.py', '-version_check'])
+            version = subprocess.call([exec_str, "auto_test.py", "-version_check"])
             if version >= 3:
                 py3_exec = exec_str
     if py3_exec is None:
-        print("WARNING: Python 3 executable not found for auto_test. If desired, set manually")
+        print(
+            "WARNING: Python 3 executable not found for auto_test. If desired, set manually"
+        )
     else:
         python_implementations.add((3, py3_exec))
 
@@ -33,76 +36,88 @@ def searchForPython(python_implementations):
 TIMEOUT = 120  # timeout for each test (in seconds)
 CHARM_QUIET_AFTER_NUM_TESTS = 5
 
-commonArgs = ['++local']
-default_num_processes = int(os.environ.get('CHARM4PY_TEST_NUM_PROCESSES', 4))
+commonArgs = ["++local"]
+default_num_processes = int(os.environ.get("CHARM4PY_TEST_NUM_PROCESSES", 4))
 
 try:
-    import numba
+
     numbaInstalled = True
 except:
     numbaInstalled = False
 
 # search for python executables
-python_implementations = set()   # python implementations can also be added here manually
+python_implementations = set()  # python implementations can also be added here manually
 searchForPython(python_implementations)
 
-interfaces = ['cython']
+interfaces = ["cython"]
 
-with open('test_config.json', 'r') as infile:
+with open("test_config.json", "r") as infile:
     tests = json.load(infile)
 
 num_tests = 0
 durations = defaultdict(dict)
 for test in tests:
-    if 'condition' in test:
-        if test['condition'] == 'numbaInstalled' and not numbaInstalled:
+    if "condition" in test:
+        if test["condition"] == "numbaInstalled" and not numbaInstalled:
             continue
-        if test['condition'] == 'not numbaInstalled' and numbaInstalled:
+        if test["condition"] == "not numbaInstalled" and numbaInstalled:
             continue
-    if 'timeout_override' in test:
-        TIMEOUT = test['timeout_override']
+    if "timeout_override" in test:
+        TIMEOUT = test["timeout_override"]
     else:
         TIMEOUT = 120
-    num_processes = max(test.get('force_min_processes', default_num_processes), default_num_processes)
+    num_processes = max(
+        test.get("force_min_processes", default_num_processes), default_num_processes
+    )
     for interface in interfaces:
-        durations[interface][test['path']] = []
+        durations[interface][test["path"]] = []
         for version, python in sorted(python_implementations):
-            if version < test.get('requires_py_version', -1):
+            if version < test.get("requires_py_version", -1):
                 continue
             additionalArgs = []
-            if num_tests >= CHARM_QUIET_AFTER_NUM_TESTS and '++quiet' not in commonArgs:
-                additionalArgs.append('++quiet')
-            cmd = ['charmrun/charmrun']
-            if test.get('prefix'):
-                cmd += [test['prefix']]
-            if not test.get('interactive', False):
-                cmd += [python] + [test['path']]
+            if num_tests >= CHARM_QUIET_AFTER_NUM_TESTS and "++quiet" not in commonArgs:
+                additionalArgs.append("++quiet")
+            cmd = ["charmrun/charmrun"]
+            if test.get("prefix"):
+                cmd += [test["prefix"]]
+            if not test.get("interactive", False):
+                cmd += [python] + [test["path"]]
             else:
-                cmd += [python] + ['-m', 'charm4py.interactive']
-            if 'args' in test:
-                cmd += test['args'].split(' ')
+                cmd += [python] + ["-m", "charm4py.interactive"]
+            if "args" in test:
+                cmd += test["args"].split(" ")
             cmd += commonArgs
-            cmd += ['+p' + str(num_processes), '+libcharm_interface', interface]
+            cmd += ["+p" + str(num_processes), "+libcharm_interface", interface]
             cmd += additionalArgs
-            print('Test command is ' + ' '.join(cmd))
+            print("Test command is " + " ".join(cmd))
             startTime = time.time()
             stdin = None
-            if test.get('interactive', False):
-                stdin = open(test['path'])
+            if test.get("interactive", False):
+                stdin = open(test["path"])
             p = subprocess.Popen(cmd, stdin=stdin)
             try:
                 rc = p.wait(TIMEOUT)
             except subprocess.TimeoutExpired:
-                print("Timeout (" + str(TIMEOUT) + " secs) expired when running " + test['path'] + ", Killing process")
+                print(
+                    "Timeout ("
+                    + str(TIMEOUT)
+                    + " secs) expired when running "
+                    + test["path"]
+                    + ", Killing process"
+                )
                 p.kill()
                 rc = -1
             if rc != 0:
-                print("ERROR running test " + test['path'] + " with " + python)
+                print("ERROR running test " + test["path"] + " with " + python)
                 exit(1)
             else:
                 elapsed = round(time.time() - startTime, 3)
-                durations[interface][test['path']].append(elapsed)
-                print("\n\n--------------------- TEST PASSED (in " + str(elapsed) + " secs) ---------------------\n\n")
+                durations[interface][test["path"]].append(elapsed)
+                print(
+                    "\n\n--------------------- TEST PASSED (in "
+                    + str(elapsed)
+                    + " secs) ---------------------\n\n"
+                )
                 num_tests += 1
 
 
diff --git a/charm4py/__init__.py b/charm4py/__init__.py
index 441a3011..47b5f066 100644
--- a/charm4py/__init__.py
+++ b/charm4py/__init__.py
@@ -1,28 +1,35 @@
 import sys
+
 if sys.version_info < (3, 8, 0):
-    raise RuntimeError('Charm4py requires Python 3.8 or higher')
+    raise RuntimeError("Charm4py requires Python 3.8 or higher")
 import atexit
 import os
+
 try:
     import greenlet
 except ImportError:
-    print('Charm4py requires the greenlet package. It can be installed via pip')
+    print("Charm4py requires the greenlet package. It can be installed via pip")
     exit(-1)
 
 
-charm4py_version = 'unknown'
+charm4py_version = "unknown"
 try:
     from ._version import version as charm4py_version
 except:
     try:
         import subprocess
-        charm4py_version = subprocess.check_output(['git', 'describe'],
-                                 cwd=os.path.dirname(__file__)).rstrip().decode()
+
+        charm4py_version = (
+            subprocess.check_output(["git", "describe"], cwd=os.path.dirname(__file__))
+            .rstrip()
+            .decode()
+        )
     except:
         pass
 
-if os.environ.get('CHARM_NOLOAD', '0') == '0':
+if os.environ.get("CHARM_NOLOAD", "0") == "0":
     from .charm import register, charm, readonlies, Options
+
     Reducer = charm.reducers
     Future = charm.createFuture
 
@@ -34,7 +41,9 @@
 
     def checkCharmStarted():
         if not charm.started:
-            print('Program is exiting but charm was not started: charm.start() was not '
-                  'called or error happened before start')
+            print(
+                "Program is exiting but charm was not started: charm.start() was not "
+                "called or error happened before start"
+            )
 
     atexit.register(checkCharmStarted)
diff --git a/charm4py/channel.py b/charm4py/channel.py
index aaf058ec..1809518f 100644
--- a/charm4py/channel.py
+++ b/charm4py/channel.py
@@ -4,7 +4,7 @@
 class Channel(object):
 
     def __new__(cls, chare, remote, local=None):
-        if not hasattr(chare, '__channels__'):
+        if not hasattr(chare, "__channels__"):
             chare.__initchannelattrs__()
         ch = chare.__findPendingChannel__(remote, False)
         if ch is None:
@@ -16,7 +16,7 @@ def __new__(cls, chare, remote, local=None):
             ch.setEstablished()
         if local is None:
             # if local is None, we assume local endpoint is the individual chare
-            if hasattr(chare, 'thisIndex'):
+            if hasattr(chare, "thisIndex"):
                 local = chare.thisProxy[chare.thisIndex]
             else:
                 local = chare.thisProxy
@@ -26,6 +26,7 @@ def __new__(cls, chare, remote, local=None):
 
 CHAN_BUF_SIZE = 40000
 
+
 class _Channel(object):
 
     def __init__(self, port, remote, locally_initiated):
@@ -36,7 +37,9 @@ def __init__(self, port, remote, locally_initiated):
         self.recv_seqno = 0
         self.data = {}
         self.recv_fut = None  # this future is used to block on self.recv()
-        self.wait_ready = None  # this future is used to block on ready (by charm.iwait())
+        self.wait_ready = (
+            None  # this future is used to block on ready (by charm.iwait())
+        )
         self.established = False
         self.established_fut = None
         self.locally_initiated = locally_initiated
diff --git a/charm4py/chare.py b/charm4py/chare.py
index bc64f4ea..b75187ec 100644
--- a/charm4py/chare.py
+++ b/charm4py/chare.py
@@ -1,5 +1,4 @@
 from . import wait
-from charm4py import ray
 import sys
 from greenlet import getcurrent
 from collections import defaultdict
@@ -12,9 +11,7 @@
 CHARM_TYPES = (MAINCHARE, GROUP, ARRAY)
 
 # Constants to detect type of contributors for reduction. Order should match enum extContributorType
-(CONTRIBUTOR_TYPE_ARRAY,
- CONTRIBUTOR_TYPE_GROUP,
- CONTRIBUTOR_TYPE_NODEGROUP) = range(3)
+(CONTRIBUTOR_TYPE_ARRAY, CONTRIBUTOR_TYPE_GROUP, CONTRIBUTOR_TYPE_NODEGROUP) = range(3)
 
 
 class Chare(object):
@@ -27,13 +24,13 @@ def __new__(cls, chare_type=None, args=[], onPE=-1):
             arr.ckInsert(0, args, onPE, single=True)
             arr.ckDoneInserting()
             proxy = arr[0]
-            if hasattr(arr, 'creation_future'):
+            if hasattr(arr, "creation_future"):
                 proxy.creation_future = arr.creation_future
             return proxy
         return object.__new__(cls)
 
     def __init__(self):
-        if hasattr(self, '_local'):
+        if hasattr(self, "_local"):
             return
         # messages to this chare from chares in the same PE are stored here without copying
         # or pickling. _local is a fixed size array that implements a mem pool, where msgs
@@ -52,7 +49,7 @@ def __init__(self):
 
     def __addLocal__(self, msg):
         if self._local_free_head is None:
-            raise Charm4PyError('Local msg buffer full. Increase LOCAL_MSG_BUF_SIZE')
+            raise Charm4PyError("Local msg buffer full. Increase LOCAL_MSG_BUF_SIZE")
         h = self._local_free_head
         self._local_free_head = self._local[self._local_free_head]
         self._local[h] = msg
@@ -118,7 +115,7 @@ def contribute(self, data, reducer, callback, section=None):
         charm.contribute(data, reducer, callback, self, section)
 
     def reduce(self, callback, data=None, reducer=None, section=None):
-        assert callable(callback), 'First argument to reduce must be a callback'
+        assert callable(callback), "First argument to reduce must be a callback"
         charm.contribute(data, reducer, callback, self, section)
 
     def allreduce(self, data=None, reducer=None, section=None):
@@ -148,7 +145,9 @@ def allreduce(self, data=None, reducer=None, section=None):
 
     def AtSync(self):
         # NOTE this will fail if called from a chare that is not in an array (as it should be)
-        charm.CkArraySend(self.thisProxy.aid, self.thisIndex, self.thisProxy.AtSync.ep, (b'', []))
+        charm.CkArraySend(
+            self.thisProxy.aid, self.thisIndex, self.thisProxy.AtSync.ep, (b"", [])
+        )
 
     def migrate(self, toPe):
         charm.lib.CkMigrate(self.thisProxy.aid, self.thisIndex, toPe)
@@ -167,7 +166,7 @@ def _coll_future_deposit_result(self, fid, result=None):
 
     def __getRedNo__(self):
         proxy = self.thisProxy
-        if hasattr(proxy, 'aid'):
+        if hasattr(proxy, "aid"):
             return charm.lib.getArrayElementRedNo(proxy.aid, self.thisIndex)
         else:
             return charm.lib.getGroupRedNo(proxy.gid)
@@ -176,7 +175,9 @@ def __addThreadEventSubscriber__(self, target, args):
         self._thread_notify_target = target
         self._thread_notify_data = args
 
-    def _getSectionLocations_(self, sid0, numsections, member_func, slicing, section_elems, f, proxy):
+    def _getSectionLocations_(
+        self, sid0, numsections, member_func, slicing, section_elems, f, proxy
+    ):
         # list of sections in which this element participates (sections
         # numbered from 0 to numsections - 1)
         sections = []
@@ -203,9 +204,12 @@ def _getSectionLocations_(self, sid0, numsections, member_func, slicing, section
             for sec_num, elems in enumerate(section_elems):
                 if self.thisIndex in elems:
                     sections.append(sec_num)
-        assert len(sections) <= numsections, 'Element ' + str(self.thisIndex) + \
-                                             ' participates in more sections than were specified'
-        if len(sections) > 0 and not hasattr(self, '_scookies'):
+        assert len(sections) <= numsections, (
+            "Element "
+            + str(self.thisIndex)
+            + " participates in more sections than were specified"
+        )
+        if len(sections) > 0 and not hasattr(self, "_scookies"):
             # chares that participate in sections need this dict to store their
             # reduction numbers for each section
             self._scookies = defaultdict(int)
@@ -226,7 +230,9 @@ def _getSectionLocations_(self, sid0, numsections, member_func, slicing, section
 
     def __initchannelattrs__(self):
         self.__channels__ = []  # port -> channel._Channel object
-        self.__pendingChannels__ = []  # channels that have not finished establishing connections
+        self.__pendingChannels__ = (
+            []
+        )  # channels that have not finished establishing connections
 
     def __findPendingChannel__(self, remote, started_locally):
         for i, ch in enumerate(self.__pendingChannels__):
@@ -236,7 +242,7 @@ def __findPendingChannel__(self, remote, started_locally):
         return None
 
     def _channelConnect__(self, remote_proxy, remote_port):  # entry method
-        if not hasattr(self, '__channels__'):
+        if not hasattr(self, "__channels__"):
             self.__initchannelattrs__()
         ch = self.__findPendingChannel__(remote_proxy, True)
         if ch is not None:
@@ -248,6 +254,7 @@ def _channelConnect__(self, remote_proxy, remote_port):  # entry method
                 ch.setEstablished()
         else:
             from .channel import _Channel
+
             local_port = len(self.__channels__)
             ch = _Channel(local_port, remote_proxy, False)
             self.__channels__.append(ch)
@@ -267,58 +274,88 @@ def _channelRecv__(self, port, seqno, *msg):  # entry method
         elif ch.recv_fut is not None and seqno == ch.recv_seqno:
             ch.recv_fut.send(msg)
         else:
-            assert seqno not in ch.data, 'Channel buffer is full'
+            assert seqno not in ch.data, "Channel buffer is full"
             ch.data[seqno] = msg
 
 
 method_restrictions = {
     # reserved methods are those that can't be redefined in user subclass
-    'reserved': {'__addLocal__', '__removeLocal__', '__flush_wait_queues__',
-                 '__waitEnqueue__', 'wait', 'contribute', 'reduce', 'allreduce',
-                 'AtSync', 'migrate', 'setMigratable',
-                 '_coll_future_deposit_result', '__getRedNo__',
-                 '__addThreadEventSubscriber__', '_getSectionLocations_',
-                 '__initchannelattrs__', '__findPendingChannel__',
-                 '_channelConnect__', '_channelRecv__'},
-
+    "reserved": {
+        "__addLocal__",
+        "__removeLocal__",
+        "__flush_wait_queues__",
+        "__waitEnqueue__",
+        "wait",
+        "contribute",
+        "reduce",
+        "allreduce",
+        "AtSync",
+        "migrate",
+        "setMigratable",
+        "_coll_future_deposit_result",
+        "__getRedNo__",
+        "__addThreadEventSubscriber__",
+        "_getSectionLocations_",
+        "__initchannelattrs__",
+        "__findPendingChannel__",
+        "_channelConnect__",
+        "_channelRecv__",
+    },
     # these methods of Chare cannot be entry methods. NOTE that any methods starting
     # and ending with '__' are automatically excluded from being entry methods
-    'non_entry_method': {'wait', 'contribute', 'reduce', 'allreduce',
-                         'AtSync', 'migrated'}
+    "non_entry_method": {
+        "wait",
+        "contribute",
+        "reduce",
+        "allreduce",
+        "AtSync",
+        "migrated",
+    },
 }
 
 
 def getEntryMethodInfo(cls, method_name):
     func = getattr(cls, method_name)
     argcount = func.__code__.co_argcount - 1  # - 1 to disregard "self" argument
-    argnames = tuple(func.__code__.co_varnames[1:argcount + 1])
-    assert 'ret' not in argnames, '"ret" keyword for entry method parameters is reserved'
+    argnames = tuple(func.__code__.co_varnames[1 : argcount + 1])
+    assert (
+        "ret" not in argnames
+    ), '"ret" keyword for entry method parameters is reserved'
     defaults = func.__defaults__
     if defaults is None:
         defaults = ()
     return argcount, argnames, defaults
 
+
 # ----------------- Mainchare and Proxy -----------------
 
+
 def mainchare_proxy_ctor(proxy, cid):
     proxy.cid = cid
 
+
 def mainchare_proxy__getstate__(proxy):
     return proxy.cid
 
+
 def mainchare_proxy__setstate__(proxy, state):
     proxy.cid = state
 
+
 def mainchare_proxy__eq__(proxy, other):
     if isinstance(other, proxy.__class__):
         return proxy.cid == other.cid
     else:
         return False
 
+
 def mainchare_proxy__hash__(proxy):
     return hash(proxy.cid)
 
-def mainchare_proxy_method_gen(ep, argcount, argnames, defaults):  # decorator, generates proxy entry methods
+
+def mainchare_proxy_method_gen(
+    ep, argcount, argnames, defaults
+):  # decorator, generates proxy entry methods
     def proxy_entry_method(proxy, *args, **kwargs):
         num_args = len(args)
         if num_args < argcount and len(kwargs) > 0:
@@ -331,23 +368,31 @@ def proxy_entry_method(proxy, *args, **kwargs):
                 else:
                     # if not there, see if there is a default value
                     def_idx = i - argcount + len(defaults)
-                    assert def_idx >= 0, 'Value not found for parameter \'' + argname + '\' of entry method'
+                    assert def_idx >= 0, (
+                        "Value not found for parameter '"
+                        + argname
+                        + "' of entry method"
+                    )
                     args.append(defaults[def_idx])
 
         header = {}
         blockFuture = None
         cid = proxy.cid  # chare ID
-        if ('ret' in kwargs and kwargs['ret']) or ('awaitable' in kwargs and kwargs['awaitable']):
-            header[b'block'] = blockFuture = charm.Future()
+        if ("ret" in kwargs and kwargs["ret"]) or (
+            "awaitable" in kwargs and kwargs["awaitable"]
+        ):
+            header[b"block"] = blockFuture = charm.Future()
         destObj = None
         if Options.local_msg_optim and (cid in charm.chares) and (len(args) > 0):
             destObj = charm.chares[cid]
         msg = charm.packMsg(destObj, args, header)
         charm.CkChareSend(cid, ep, msg)
         return blockFuture
+
     proxy_entry_method.ep = ep
     return proxy_entry_method
 
+
 def mainchare_proxy_contribute(proxy, contributeInfo):
     charm.CkContributeToChare(contributeInfo, proxy.cid)
 
@@ -362,32 +407,36 @@ def initMember(cls, obj, cid):
 
     @classmethod
     def __baseEntryMethods__(cls):
-        return ['__init__']
+        return ["__init__"]
 
     @classmethod
     def __getProxyClass__(C, cls):
         # print("Creating mainchare proxy class for class " + cls.__name__)
-        proxyClassName = cls.__name__ + 'Proxy'
+        proxyClassName = cls.__name__ + "Proxy"
         M = dict()  # proxy methods
         for m in charm.classEntryMethods[MAINCHARE][cls]:
             if m.epIdx == -1:
-                raise Charm4PyError('Unregistered entry method')
-            if m.name == '__init__':
+                raise Charm4PyError("Unregistered entry method")
+            if m.name == "__init__":
                 continue
             argcount, argnames, defaults = getEntryMethodInfo(m.C, m.name)
             if Options.profiling:
-                f = profile_send_function(mainchare_proxy_method_gen(m.epIdx, argcount, argnames, defaults))
+                f = profile_send_function(
+                    mainchare_proxy_method_gen(m.epIdx, argcount, argnames, defaults)
+                )
             else:
                 f = mainchare_proxy_method_gen(m.epIdx, argcount, argnames, defaults)
-            f.__qualname__ = proxyClassName + '.' + m.name
+            f.__qualname__ = proxyClassName + "." + m.name
             f.__name__ = m.name
             M[m.name] = f
-        M['__init__'] = mainchare_proxy_ctor
-        M['ckContribute'] = mainchare_proxy_contribute  # function called when target proxy is Mainchare
-        M['__getstate__'] = mainchare_proxy__getstate__
-        M['__setstate__'] = mainchare_proxy__setstate__
-        M['__eq__'] = mainchare_proxy__eq__
-        M['__hash__'] = mainchare_proxy__hash__
+        M["__init__"] = mainchare_proxy_ctor
+        M["ckContribute"] = (
+            mainchare_proxy_contribute  # function called when target proxy is Mainchare
+        )
+        M["__getstate__"] = mainchare_proxy__getstate__
+        M["__setstate__"] = mainchare_proxy__setstate__
+        M["__eq__"] = mainchare_proxy__eq__
+        M["__hash__"] = mainchare_proxy__hash__
         return type(proxyClassName, (), M)  # create and return proxy class
 
 
@@ -398,19 +447,23 @@ def __init__(self, args):
 
 # ------------------ Group and Proxy  ------------------
 
+
 def group_proxy_ctor(proxy, gid):
     proxy.gid = gid
     proxy.elemIdx = -1  # entry method calls will be to elemIdx PE (broadcast if -1)
 
+
 def group_proxy__getstate__(proxy):
     return (proxy.gid, proxy.elemIdx)
 
+
 def group_proxy__setstate__(proxy, state):
     proxy.gid, proxy.elemIdx = state
 
+
 def group_proxy__eq__(proxy, other):
     if proxy.issec:
-        if hasattr(other, 'issec'):
+        if hasattr(other, "issec"):
             return proxy.section == other.section
         else:
             return False
@@ -419,12 +472,14 @@ def group_proxy__eq__(proxy, other):
     else:
         return False
 
+
 def group_proxy__hash__(proxy):
     if proxy.issec:
         return hash(proxy.section)
     else:
         return hash((proxy.gid, proxy.elemIdx))
 
+
 def group_getsecproxy(proxy, sinfo):
     if proxy.issec:
         secproxy = proxy.__class__(proxy.gid)
@@ -433,12 +488,15 @@ def group_getsecproxy(proxy, sinfo):
     secproxy.section = sinfo
     return secproxy
 
+
 def groupsecproxy__getstate__(proxy):
     return (proxy.gid, proxy.elemIdx, proxy.section)
 
+
 def groupsecproxy__setstate__(proxy, state):
     proxy.gid, proxy.elemIdx, proxy.section = state
 
+
 def group_proxy_elem(proxy, pe):  # group proxy [] overload method
     if not isinstance(pe, slice):
         proxy_clone = proxy.__class__(proxy.gid)
@@ -454,7 +512,10 @@ def group_proxy_elem(proxy, pe):  # group proxy [] overload method
             step = 1
         return charm.split(proxy, 1, elems=[list(range(start, stop, step))])[0]
 
-def group_proxy_method_gen(ep, argcount, argnames, defaults):  # decorator, generates proxy entry methods
+
+def group_proxy_method_gen(
+    ep, argcount, argnames, defaults
+):  # decorator, generates proxy entry methods
     def proxy_entry_method(proxy, *args, **kwargs):
         num_args = len(args)
         if num_args < argcount and len(kwargs) > 0:
@@ -467,20 +528,24 @@ def proxy_entry_method(proxy, *args, **kwargs):
                 else:
                     # if not there, see if there is a default value
                     def_idx = i - argcount + len(defaults)
-                    assert def_idx >= 0, 'Value not found for parameter \'' + argname + '\' of entry method'
+                    assert def_idx >= 0, (
+                        "Value not found for parameter '"
+                        + argname
+                        + "' of entry method"
+                    )
                     args.append(defaults[def_idx])
 
         header = {}
         blockFuture = None
         elemIdx = proxy.elemIdx
-        if 'ret' in kwargs and kwargs['ret']:
-            header[b'block'] = blockFuture = charm.Future()
+        if "ret" in kwargs and kwargs["ret"]:
+            header[b"block"] = blockFuture = charm.Future()
             if elemIdx == -1:
-                header[b'bcast'] = header[b'bcastret'] = True
-        elif 'awaitable' in kwargs and kwargs['awaitable']:
-            header[b'block'] = blockFuture = charm.Future()
+                header[b"bcast"] = header[b"bcastret"] = True
+        elif "awaitable" in kwargs and kwargs["awaitable"]:
+            header[b"block"] = blockFuture = charm.Future()
             if elemIdx == -1:
-                header[b'bcast'] = True
+                header[b"bcast"] = True
         if not proxy.issec or elemIdx != -1:
             destObj = None
             gid = proxy.gid
@@ -490,15 +555,19 @@ def proxy_entry_method(proxy, *args, **kwargs):
             charm.CkGroupSend(gid, elemIdx, ep, msg)
         else:
             root, sid = proxy.section
-            header[b'sid'] = sid
+            header[b"sid"] = sid
             if Options.local_msg_optim and root == charm._myPe:
-                charm.sectionMgr.thisProxy[root].sendToSectionLocal(sid, ep, header, *args)
+                charm.sectionMgr.thisProxy[root].sendToSectionLocal(
+                    sid, ep, header, *args
+                )
             else:
                 charm.sectionMgr.thisProxy[root].sendToSection(sid, ep, header, *args)
         return blockFuture
+
     proxy_entry_method.ep = ep
     return proxy_entry_method
 
+
 def update_globals_proxy_method_gen(ep):
     def proxy_entry_method(proxy, *args, **kwargs):
         new_args = []
@@ -507,22 +576,22 @@ def proxy_entry_method(proxy, *args, **kwargs):
             new_args.append(var)
         if len(args) >= 2:
             new_args.append(args[1])
-        elif 'module_name' in kwargs:
-            new_args.append(kwargs['module_name'])
+        elif "module_name" in kwargs:
+            new_args.append(kwargs["module_name"])
         else:
-            new_args.append('__main__')  # default value for 'module_name' parameter
+            new_args.append("__main__")  # default value for 'module_name' parameter
         args = new_args
         header = {}
         blockFuture = None
         elemIdx = proxy.elemIdx
-        if 'ret' in kwargs and kwargs['ret']:
-            header[b'block'] = blockFuture = charm.Future()
+        if "ret" in kwargs and kwargs["ret"]:
+            header[b"block"] = blockFuture = charm.Future()
             if elemIdx == -1:
-                header[b'bcast'] = header[b'bcastret'] = True
-        elif 'awaitable' in kwargs and kwargs['awaitable']:
-            header[b'block'] = blockFuture = charm.Future()
+                header[b"bcast"] = header[b"bcastret"] = True
+        elif "awaitable" in kwargs and kwargs["awaitable"]:
+            header[b"block"] = blockFuture = charm.Future()
             if elemIdx == -1:
-                header[b'bcast'] = True
+                header[b"bcast"] = True
         if not proxy.issec or elemIdx != -1:
             destObj = None
             gid = proxy.gid
@@ -532,88 +601,101 @@ def proxy_entry_method(proxy, *args, **kwargs):
             charm.CkGroupSend(gid, elemIdx, ep, msg)
         else:
             root, sid = proxy.section
-            header[b'sid'] = sid
+            header[b"sid"] = sid
             if Options.local_msg_optim and root == charm._myPe:
-                charm.sectionMgr.thisProxy[root].sendToSectionLocal(sid, ep, header, *args)
+                charm.sectionMgr.thisProxy[root].sendToSectionLocal(
+                    sid, ep, header, *args
+                )
             else:
                 charm.sectionMgr.thisProxy[root].sendToSection(sid, ep, header, *args)
         return blockFuture
+
     proxy_entry_method.ep = ep
     return proxy_entry_method
 
+
 def group_ckNew_gen(C, epIdx):
-    @classmethod    # make ckNew a class (not instance) method of proxy
+    @classmethod  # make ckNew a class (not instance) method of proxy
     def group_ckNew(cls, args, onPEs):
         # print("GROUP calling ckNew for class " + C.__name__ + " cIdx=", C.idx[GROUP], "epIdx=", epIdx)
         header = {}
         creation_future = None
         if not charm.threadMgr.isMainThread() and ArrayMap not in C.mro():
             creation_future = charm.Future()
-            header[b'block'] = creation_future
-            header[b'bcast'] = True
-            header[b'creation'] = True
+            header[b"block"] = creation_future
+            header[b"bcast"] = True
+            header[b"creation"] = True
         if onPEs is None:
             msg = charm.packMsg(None, args, header)
             gid = charm.lib.CkCreateGroup(C.idx[GROUP], epIdx, msg)
             proxy = cls(gid)
         else:
             # send empty msg for Charm++ group creation (on every PE)
-            msg = charm.packMsg(None, [], {b'constrained': True})
+            msg = charm.packMsg(None, [], {b"constrained": True})
             gid = charm.lib.CkCreateGroup(C.idx[GROUP], epIdx, msg)
             proxy = cls(gid)
             # real msg goes only to section elements
-            proxy = charm.split(proxy, 1, elems=[onPEs], cons=[-1, epIdx, header, args])[0]
+            proxy = charm.split(
+                proxy, 1, elems=[onPEs], cons=[-1, epIdx, header, args]
+            )[0]
         if creation_future is not None:
             proxy.creation_future = creation_future
         return proxy
+
     return group_ckNew
 
+
 def group_proxy_contribute(proxy, contributeInfo):
     charm.CkContributeToGroup(contributeInfo, proxy.gid, proxy.elemIdx)
 
+
 def groupsecproxy_contribute(proxy, contributeInfo):
     charm.CkContributeToSection(contributeInfo, proxy.section[1], proxy.section[0])
 
+
 def group_proxy_localbranch(proxy):
     return charm.groups[proxy.gid]
 
+
 class Group(object):
 
     type_id = GROUP
 
     def __new__(cls, C, args=[], onPEs=None):
-        if (not hasattr(C, 'mro')) or (Chare not in C.mro()):
-            raise Charm4PyError('Only subclasses of Chare can be member of Group')
+        if (not hasattr(C, "mro")) or (Chare not in C.mro()):
+            raise Charm4PyError("Only subclasses of Chare can be member of Group")
         if C not in charm.proxyClasses[GROUP]:
-            raise Charm4PyError(str(C) + ' not registered for use in Groups')
+            raise Charm4PyError(str(C) + " not registered for use in Groups")
         return charm.proxyClasses[GROUP][C].ckNew(args, onPEs)
 
     @classmethod
     def initMember(cls, obj, gid):
         obj.thisIndex = charm.myPe()
         obj.thisProxy = charm.proxyClasses[GROUP][obj.__class__](gid)
-        obj._contributeInfo = charm.lib.initContributeInfo(gid, obj.thisIndex, CONTRIBUTOR_TYPE_GROUP)
+        obj._contributeInfo = charm.lib.initContributeInfo(
+            gid, obj.thisIndex, CONTRIBUTOR_TYPE_GROUP
+        )
         obj._scookies = defaultdict(int)
 
     @classmethod
     def __baseEntryMethods__(cls):
-        return ['__init__']
+        return ["__init__"]
 
     @classmethod
     def __getProxyClass__(C, cls, sectionProxy=False):
         # print("Creating group proxy class for class " + cls.__name__)
         if not sectionProxy:
-            proxyClassName = cls.__name__ + 'GroupProxy'
+            proxyClassName = cls.__name__ + "GroupProxy"
         else:
-            proxyClassName = cls.__name__ + 'GroupSecProxy'
+            proxyClassName = cls.__name__ + "GroupSecProxy"
         M = dict()  # proxy methods
         entryMethods = charm.classEntryMethods[GROUP][cls]
         for m in entryMethods:
             if m.epIdx == -1:
-                raise Charm4PyError('Unregistered entry method')
-            if m.name == '__init__':
+                raise Charm4PyError("Unregistered entry method")
+            if m.name == "__init__":
                 continue
-            if m.name == 'updateGlobals' and cls == CharmRemote:
+            if m.name == "updateGlobals" and cls == CharmRemote:
                 if Options.profiling:
                     f = profile_send_function(update_globals_proxy_method_gen(m.epIdx))
                 else:
@@ -621,30 +703,36 @@ def __getProxyClass__(C, cls, sectionProxy=False):
             else:
                 argcount, argnames, defaults = getEntryMethodInfo(m.C, m.name)
                 if Options.profiling:
-                    f = profile_send_function(group_proxy_method_gen(m.epIdx, argcount, argnames, defaults))
+                    f = profile_send_function(
+                        group_proxy_method_gen(m.epIdx, argcount, argnames, defaults)
+                    )
                 else:
                     f = group_proxy_method_gen(m.epIdx, argcount, argnames, defaults)
-            f.__qualname__ = proxyClassName + '.' + m.name
+            f.__qualname__ = proxyClassName + "." + m.name
             f.__name__ = m.name
             M[m.name] = f
         if cls == CharmRemote and sys.version_info >= (3, 0, 0):
             # TODO remove this and change rexec to exec when Python 2 support is dropped
-            M['exec'] = M['rexec']
-        M['__init__'] = group_proxy_ctor
-        M['__getitem__'] = group_proxy_elem
-        M['__eq__'] = group_proxy__eq__
-        M['__hash__'] = group_proxy__hash__
-        M['ckNew'] = group_ckNew_gen(cls, entryMethods[0].epIdx)
-        M['ckLocalBranch'] = group_proxy_localbranch
-        M['__getsecproxy__'] = group_getsecproxy
+            M["exec"] = M["rexec"]
+        M["__init__"] = group_proxy_ctor
+        M["__getitem__"] = group_proxy_elem
+        M["__eq__"] = group_proxy__eq__
+        M["__hash__"] = group_proxy__hash__
+        M["ckNew"] = group_ckNew_gen(cls, entryMethods[0].epIdx)
+        M["ckLocalBranch"] = group_proxy_localbranch
+        M["__getsecproxy__"] = group_getsecproxy
         if not sectionProxy:
-            M['ckContribute'] = group_proxy_contribute  # function called when target proxy is Group
-            M['__getstate__'] = group_proxy__getstate__
-            M['__setstate__'] = group_proxy__setstate__
+            M["ckContribute"] = (
+                group_proxy_contribute  # function called when target proxy is Group
+            )
+            M["__getstate__"] = group_proxy__getstate__
+            M["__setstate__"] = group_proxy__setstate__
         else:
-            M['ckContribute'] = groupsecproxy_contribute  # function called when target proxy is Group
-            M['__getstate__'] = groupsecproxy__getstate__
-            M['__setstate__'] = groupsecproxy__setstate__
+            M["ckContribute"] = (
+                groupsecproxy_contribute  # function called when target proxy is Group
+            )
+            M["__getstate__"] = groupsecproxy__getstate__
+            M["__setstate__"] = groupsecproxy__setstate__
         proxyCls = type(proxyClassName, (), M)  # create and return proxy class
         proxyCls.issec = sectionProxy
         return proxyCls
@@ -657,20 +745,24 @@ def __init__(self):
 
 # -------------------- Array and Proxy --------------------
 
+
 def array_proxy_ctor(proxy, aid, ndims):
     proxy.aid = aid
     proxy.ndims = ndims
     proxy.elemIdx = ()  # entry method calls will be to elemIdx array element (broadcast if empty tuple)
 
+
 def array_proxy__getstate__(proxy):
     return (proxy.aid, proxy.ndims, proxy.elemIdx)
 
+
 def array_proxy__setstate__(proxy, state):
     proxy.aid, proxy.ndims, proxy.elemIdx = state
 
+
 def array_proxy__eq__(proxy, other):
     if proxy.issec:
-        if hasattr(other, 'issec'):
+        if hasattr(other, "issec"):
             return proxy.section == other.section
         else:
             return False
@@ -679,12 +771,14 @@ def array_proxy__eq__(proxy, other):
     else:
         return False
 
+
 def array_proxy__hash__(proxy):
     if proxy.issec:
         return hash(proxy.section)
     else:
         return hash((proxy.aid, proxy.elemIdx))
 
+
 def array_getsecproxy(proxy, sinfo):
     if proxy.issec:
         secproxy = proxy.__class__(proxy.aid, proxy.ndims)
@@ -693,12 +787,15 @@ def array_getsecproxy(proxy, sinfo):
     secproxy.section = sinfo
     return secproxy
 
+
 def arraysecproxy__getstate__(proxy):
     return (proxy.aid, proxy.ndims, proxy.elemIdx, proxy.section)
 
+
 def arraysecproxy__setstate__(proxy, state):
     proxy.aid, proxy.ndims, proxy.elemIdx, proxy.section = state
 
+
 def array_proxy_elem(proxy, idx):  # array proxy [] overload method
     ndims = proxy.ndims
     isslice = True
@@ -708,17 +805,24 @@ def array_proxy_elem(proxy, idx):  # array proxy [] overload method
         isslice = False
     elif idxtype == slice:
         idx = (idx,)
-    assert len(idx) == ndims, "Dimensions of index " + str(idx) + " don't match array dimensions"
+    assert len(idx) == ndims, (
+        "Dimensions of index " + str(idx) + " don't match array dimensions"
+    )
     if not isslice or not isinstance(idx[0], slice):
         proxy_clone = proxy.__class__(proxy.aid, ndims)
         proxy_clone.elemIdx = tuple(idx)
         return proxy_clone
     else:
         for _slice in idx:
-            assert _slice.start is not None and _slice.stop is not None, 'Must specify start and stop indexes for array slicing'
+            assert (
+                _slice.start is not None and _slice.stop is not None
+            ), "Must specify start and stop indexes for array slicing"
         return charm.split(proxy, 1, slicing=idx)[0]
 
-def array_proxy_method_gen(ep, argcount, argnames, defaults):  # decorator, generates proxy entry methods
+
+def array_proxy_method_gen(
+    ep, argcount, argnames, defaults
+):  # decorator, generates proxy entry methods
     def proxy_entry_method(proxy, *args, **kwargs):
         num_args = len(args)
         if num_args < argcount and len(kwargs) > 0:
@@ -731,22 +835,26 @@ def proxy_entry_method(proxy, *args, **kwargs):
                 else:
                     # if not there, see if there is a default value
                     def_idx = i - argcount + len(defaults)
-                    assert def_idx >= 0, 'Value not found for parameter \'' + argname + '\' of entry method'
+                    assert def_idx >= 0, (
+                        "Value not found for parameter '"
+                        + argname
+                        + "' of entry method"
+                    )
                     args.append(defaults[def_idx])
 
         header = {}
-        is_ray = kwargs.pop('is_ray', False)
-        header['is_ray'] = is_ray
+        is_ray = kwargs.pop("is_ray", False)
+        header["is_ray"] = is_ray
         blockFuture = None
         elemIdx = proxy.elemIdx
-        if 'ret' in kwargs and kwargs['ret']:
-            header[b'block'] = blockFuture = charm.Future()
+        if "ret" in kwargs and kwargs["ret"]:
+            header[b"block"] = blockFuture = charm.Future()
             if elemIdx == ():
-                header[b'bcast'] = header[b'bcastret'] = True
-        elif 'awaitable' in kwargs and kwargs['awaitable']:
-            header[b'block'] = blockFuture = charm.Future()
+                header[b"bcast"] = header[b"bcastret"] = True
+        elif "awaitable" in kwargs and kwargs["awaitable"]:
+            header[b"block"] = blockFuture = charm.Future()
             if elemIdx == ():
-                header[b'bcast'] = True
+                header[b"bcast"] = True
         if not proxy.issec or elemIdx != ():
             destObj = None
             aid = proxy.aid
@@ -763,31 +871,42 @@ def proxy_entry_method(proxy, *args, **kwargs):
             charm.CkArraySend(aid, elemIdx, ep, msg)
         else:
             root, sid = proxy.section
-            header[b'sid'] = sid
+            header[b"sid"] = sid
             if Options.local_msg_optim and root == charm._myPe:
-                charm.sectionMgr.thisProxy[root].sendToSectionLocal(sid, ep, header, *args)
+                charm.sectionMgr.thisProxy[root].sendToSectionLocal(
+                    sid, ep, header, *args
+                )
             else:
                 charm.sectionMgr.thisProxy[root].sendToSection(sid, ep, header, *args)
         return blockFuture
+
     proxy_entry_method.ep = ep
     return proxy_entry_method
 
+
 def array_ckNew_gen(C, epIdx):
-    @classmethod    # make ckNew a class (not instance) method of proxy
-    def array_ckNew(cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, is_ray=False):
+    @classmethod  # make ckNew a class (not instance) method of proxy
+    def array_ckNew(
+        cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, is_ray=False
+    ):
         # if charm.myPe() == 0: print("calling array ckNew for class " + C.__name__ + " cIdx=" + str(C.idx[ARRAY]))
-        if type(dims) == int: dims = (dims,)
+        if type(dims) == int:
+            dims = (dims,)
 
         if dims is None and ndims == -1:
-            raise Charm4PyError('Bounds and number of dimensions for array cannot be empty in ckNew')
+            raise Charm4PyError(
+                "Bounds and number of dimensions for array cannot be empty in ckNew"
+            )
         elif dims is not None and ndims != -1 and ndims != len(dims):
-            raise Charm4PyError('Number of bounds should match number of dimensions')
+            raise Charm4PyError("Number of bounds should match number of dimensions")
         elif dims is None and ndims != -1:  # create an empty array
             dims = (0,) * ndims
 
         # this is a restriction in Charm++. Charm++ won't tell you unless
         # error checking is enabled, resulting in obscure errors otherwise
-        assert charm._myPe == 0, 'Cannot create arrays from PE != 0. Use charm.thisProxy[0].createArray() instead'
+        assert (
+            charm._myPe == 0
+        ), "Cannot create arrays from PE != 0. Use charm.thisProxy[0].createArray() instead"
 
         map_gid = -1
         if map is not None:
@@ -796,42 +915,53 @@ def array_ckNew(cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, is
         header, creation_future = {}, None
         if sum(dims) > 0 and not charm.threadMgr.isMainThread():
             creation_future = charm.Future()
-            header[b'block'] = creation_future
-            header[b'bcast'] = True
-            header[b'creation'] = True
-            header[b'is_ray'] = is_ray
+            header[b"block"] = creation_future
+            header[b"bcast"] = True
+            header[b"creation"] = True
+            header[b"is_ray"] = is_ray
 
         msg = charm.packMsg(None, args, header)
-        aid = charm.lib.CkCreateArray(C.idx[ARRAY], dims, epIdx, msg, map_gid, useAtSync)
+        aid = charm.lib.CkCreateArray(
+            C.idx[ARRAY], dims, epIdx, msg, map_gid, useAtSync
+        )
         proxy = cls(aid, len(dims))
         if creation_future is not None:
             proxy.creation_future = creation_future
         return proxy
+
     return array_ckNew
 
+
 def array_ckInsert_gen(epIdx):
-    def array_ckInsert(proxy, index, args=[], onPE=-1, useAtSync=False, single=False, is_ray=False):
-        if type(index) == int: index = (index,)
-        assert len(index) == proxy.ndims, 'Invalid index dimensions passed to ckInsert'
+    def array_ckInsert(
+        proxy, index, args=[], onPE=-1, useAtSync=False, single=False, is_ray=False
+    ):
+        if type(index) == int:
+            index = (index,)
+        assert len(index) == proxy.ndims, "Invalid index dimensions passed to ckInsert"
         header = {}
         if single:
-            header[b'single'] = True
+            header[b"single"] = True
             if not charm.threadMgr.isMainThread():
                 proxy.creation_future = charm.Future()
-                header[b'block'] = proxy.creation_future
-                header[b'bcast'] = True
-                header[b'creation'] = True
-                header[b'is_ray'] = is_ray
+                header[b"block"] = proxy.creation_future
+                header[b"bcast"] = True
+                header[b"creation"] = True
+                header[b"is_ray"] = is_ray
         msg = charm.packMsg(None, args, header)
         charm.lib.CkInsert(proxy.aid, index, epIdx, onPE, msg, useAtSync)
+
     return array_ckInsert
 
+
 def array_proxy_contribute(proxy, contributeInfo):
     charm.CkContributeToArray(contributeInfo, proxy.aid, proxy.elemIdx)
 
+
 def arraysecproxy_contribute(proxy, contributeInfo):
     charm.CkContributeToSection(contributeInfo, proxy.section[1], proxy.section[0])
 
+
 def array_proxy_doneInserting(proxy):
     charm.lib.CkDoneInserting(proxy.aid)
 
@@ -841,10 +971,10 @@ class Array(object):
     type_id = ARRAY
 
     def __new__(cls, C, dims=None, ndims=-1, args=[], map=None, useAtSync=False):
-        if (not hasattr(C, 'mro')) or (Chare not in C.mro()):
-            raise Charm4PyError('Only subclasses of Chare can be member of Array')
+        if (not hasattr(C, "mro")) or (Chare not in C.mro()):
+            raise Charm4PyError("Only subclasses of Chare can be member of Array")
         if C not in charm.proxyClasses[ARRAY]:
-            raise Charm4PyError(str(C) + ' not registered for use in Arrays')
+            raise Charm4PyError(str(C) + " not registered for use in Arrays")
         return charm.proxyClasses[ARRAY][C].ckNew(dims, ndims, args, map, useAtSync)
 
     @classmethod
@@ -854,8 +984,12 @@ def initMember(cls, obj, aid, index, single=False):
             proxy = charm.proxyClasses[ARRAY][obj.__class__](aid, len(obj.thisIndex))
             obj.thisProxy = proxy[index]
         else:
-            obj.thisProxy = charm.proxyClasses[ARRAY][obj.__class__](aid, len(obj.thisIndex))
-        obj._contributeInfo = charm.lib.initContributeInfo(aid, obj.thisIndex, CONTRIBUTOR_TYPE_ARRAY)
+            obj.thisProxy = charm.proxyClasses[ARRAY][obj.__class__](
+                aid, len(obj.thisIndex)
+            )
+        obj._contributeInfo = charm.lib.initContributeInfo(
+            aid, obj.thisIndex, CONTRIBUTOR_TYPE_ARRAY
+        )
         obj.migratable = True
 
     @classmethod
@@ -864,49 +998,56 @@ def __baseEntryMethods__(cls):
         # - to register the migration constructor on Charm++ side (note that this migration constructor does nothing)
         # - Chare.migrated() is called whenever a chare has completed migration.
         #   The EntryMethod object with this name is used to profile Chare.migrated() calls.
-        return ['__init__', 'migrated', 'AtSync']
+        return ["__init__", "migrated", "AtSync"]
 
     @classmethod
     def __getProxyClass__(C, cls, sectionProxy=False):
         if not sectionProxy:
-            proxyClassName = cls.__name__ + 'ArrayProxy'
+            proxyClassName = cls.__name__ + "ArrayProxy"
         else:
-            proxyClassName = cls.__name__ + 'ArraySecProxy'
+            proxyClassName = cls.__name__ + "ArraySecProxy"
         M = dict()  # proxy methods
         entryMethods = charm.classEntryMethods[ARRAY][cls]
         for m in entryMethods:
             if m.epIdx == -1:
-                raise Charm4PyError('Unregistered entry method')
-            if m.name in {'__init__', 'migrated'}:
+                raise Charm4PyError("Unregistered entry method")
+            if m.name in {"__init__", "migrated"}:
                 continue
             argcount, argnames, defaults = getEntryMethodInfo(m.C, m.name)
             if Options.profiling:
-                f = profile_send_function(array_proxy_method_gen(m.epIdx, argcount, argnames, defaults))
+                f = profile_send_function(
+                    array_proxy_method_gen(m.epIdx, argcount, argnames, defaults)
+                )
             else:
                 f = array_proxy_method_gen(m.epIdx, argcount, argnames, defaults)
-            f.__qualname__ = proxyClassName + '.' + m.name
+            f.__qualname__ = proxyClassName + "." + m.name
             f.__name__ = m.name
             M[m.name] = f
-        M['__init__'] = array_proxy_ctor
-        M['__getitem__'] = array_proxy_elem
-        M['__eq__'] = array_proxy__eq__
-        M['__hash__'] = array_proxy__hash__
-        M['ckNew'] = array_ckNew_gen(cls, entryMethods[0].epIdx)
-        M['__getsecproxy__'] = array_getsecproxy
-        M['ckInsert'] = array_ckInsert_gen(entryMethods[0].epIdx)
-        M['ckDoneInserting'] = array_proxy_doneInserting
+        M["__init__"] = array_proxy_ctor
+        M["__getitem__"] = array_proxy_elem
+        M["__eq__"] = array_proxy__eq__
+        M["__hash__"] = array_proxy__hash__
+        M["ckNew"] = array_ckNew_gen(cls, entryMethods[0].epIdx)
+        M["__getsecproxy__"] = array_getsecproxy
+        M["ckInsert"] = array_ckInsert_gen(entryMethods[0].epIdx)
+        M["ckDoneInserting"] = array_proxy_doneInserting
         if not sectionProxy:
-            M['ckContribute'] = array_proxy_contribute  # function called when target proxy is Array
-            M['__getstate__'] = array_proxy__getstate__
-            M['__setstate__'] = array_proxy__setstate__
+            M["ckContribute"] = (
+                array_proxy_contribute  # function called when target proxy is Array
+            )
+            M["__getstate__"] = array_proxy__getstate__
+            M["__setstate__"] = array_proxy__setstate__
         else:
-            M['ckContribute'] = arraysecproxy_contribute  # function called when target proxy is Array
-            M['__getstate__'] = arraysecproxy__getstate__
-            M['__setstate__'] = arraysecproxy__setstate__
+            M["ckContribute"] = (
+                arraysecproxy_contribute  # function called when target proxy is Array
+            )
+            M["__getstate__"] = arraysecproxy__getstate__
+            M["__setstate__"] = arraysecproxy__setstate__
         proxyCls = type(proxyClassName, (), M)  # create and return proxy class
         proxyCls.issec = sectionProxy
         return proxyCls
 
+
 # ---------------------------------------------------
 
 charm_type_id_to_class = [None] * len(CHARM_TYPES)
@@ -922,5 +1063,6 @@ def __getProxyClass__(C, cls, sectionProxy=False):
 def charmStarting():
     global charm, Options, Reducer, Charm4PyError, CharmRemote, profile_send_function
     from .charm import charm, Charm4PyError, CharmRemote, profile_send_function
+
     Options = charm.options
     Reducer = charm.reducers
diff --git a/charm4py/charm.py b/charm4py/charm.py
index c343eb62..17196230 100644
--- a/charm4py/charm.py
+++ b/charm4py/charm.py
@@ -7,6 +7,7 @@
 #
 import sys
 import os
+
 if sys.version_info < (3, 0, 0):
     import cPickle
     from cStringIO import StringIO
@@ -21,7 +22,7 @@
 import traceback
 from . import chare
 from .chare import MAINCHARE, GROUP, ARRAY, CHARM_TYPES
-from .chare import CONTRIBUTOR_TYPE_GROUP, CONTRIBUTOR_TYPE_ARRAY
+from .chare import CONTRIBUTOR_TYPE_ARRAY
 from .chare import Chare, Mainchare, Group, ArrayMap, Array
 from . import entry_method
 from . import threads
@@ -29,14 +30,15 @@
 from . import reduction
 from . import wait
 from charm4py.c_object_store import MessageBuffer
-from . import ray
 import array
+
 try:
     import numpy
 except ImportError:
     # this is to avoid numpy dependency
     class NumpyDummy:
         ndarray = None
+
     numpy = NumpyDummy()
 
 
@@ -57,19 +59,25 @@ def register(C):
 class Options(object):
 
     def __str__(self):
-        output = ''
+        output = ""
         for varname in dir(self):
             var = getattr(self, varname)
-            if isinstance(var, Options) or varname.startswith('__') or callable(var):
+            if isinstance(var, Options) or varname.startswith("__") or callable(var):
                 continue
-            output += varname + ': ' + str(var) + '\n'
+            output += varname + ": " + str(var) + "\n"
         return output
 
     def check_deprecated(self):
-        old_options = {'PROFILING', 'PICKLE_PROTOCOL', 'LOCAL_MSG_OPTIM',
-                       'LOCAL_MSG_BUF_SIZE', 'AUTO_FLUSH_WAIT_QUEUES', 'QUIET'}
+        old_options = {
+            "PROFILING",
+            "PICKLE_PROTOCOL",
+            "LOCAL_MSG_OPTIM",
+            "LOCAL_MSG_BUF_SIZE",
+            "AUTO_FLUSH_WAIT_QUEUES",
+            "QUIET",
+        }
         if len(old_options.intersection(set(dir(self.__class__)))) != 0:
-            raise Charm4PyError('Options API has changed. Use charm.options instead')
+            raise Charm4PyError("Options API has changed. Use charm.options instead")
 
 
 class Charm4PyError(Exception):
@@ -82,10 +90,12 @@ def __init__(self, msg):
 # per process)
 class Charm(object):
 
-    if os.name == 'nt':
+    if os.name == "nt":
+
         class PrintStream(object):
             def write(self, msg):
                 charm.lib.CkPrintf(msg.encode())
+
             def flush(self):
                 pass
 
@@ -93,15 +103,27 @@ def __init__(self):
         self.started = False
         self._myPe = -1
         self._numPes = -1
-        self.registered = {}      # class -> set of Charm types (Mainchare, Group, Array) for which this class is registered
-        self.register_order = []  # list of classes in registration order (all processes must use same order)
+        self.registered = (
+            {}
+        )  # class -> set of Charm types (Mainchare, Group, Array) for which this class is registered
+        self.register_order = (
+            []
+        )  # list of classes in registration order (all processes must use same order)
         self.chares = {}
-        self.groups = {}          # group ID -> group instance on this PE
-        self.arrays = defaultdict(dict)  # aid -> idx -> array element instance with index idx on this PE
-        self.entryMethods = {}    # ep_idx -> EntryMethod object
-        self.classEntryMethods = [{} for _ in CHARM_TYPES]  # charm_type_id -> class -> list of EntryMethod objects
-        self.proxyClasses      = [{} for _ in CHARM_TYPES]  # charm_type_id -> class -> proxy class
-        self.groupMsgBuf = defaultdict(list)  # gid -> list of msgs received for constrained groups that haven't been created yet
+        self.groups = {}  # group ID -> group instance on this PE
+        self.arrays = defaultdict(
+            dict
+        )  # aid -> idx -> array element instance with index idx on this PE
+        self.entryMethods = {}  # ep_idx -> EntryMethod object
+        self.classEntryMethods = [
+            {} for _ in CHARM_TYPES
+        ]  # charm_type_id -> class -> list of EntryMethod objects
+        self.proxyClasses = [
+            {} for _ in CHARM_TYPES
+        ]  # charm_type_id -> class -> proxy class
+        self.groupMsgBuf = defaultdict(
+            list
+        )  # gid -> list of msgs received for constrained groups that haven't been created yet
         self.section_counter = 0
         self.rebuildFuncs = (rebuildByteArray, rebuildArray, rebuildNumpyArray)
         self.sched_tagpool = set(range(1, 128))  # pool of tags for scheduling callables
@@ -133,7 +155,7 @@ def __init__(self):
         self.mainchareRegistered = False
         # entry point to Charm program. can be used in place of defining a Mainchare
         self.entry_func = None
-        if self.lib.name == 'cython':
+        if self.lib.name == "cython":
             # replace these methods with the fast Cython versions
             self.packMsg = self.lib.packMsg
             self.unpackMsg = self.lib.unpackMsg
@@ -141,7 +163,7 @@ def __init__(self):
         self.last_exception_timestamp = time.time()
         # store chare types defined after program start and other objects created
         # in interactive mode
-        self.dynamic_register = sys.modules['__main__'].__dict__
+        self.dynamic_register = sys.modules["__main__"].__dict__
         self.lb_requested = False
         self.threadMgr = threads.EntryMethodThreadManager(self)
         self.createFuture = self.Future = self.threadMgr.createFuture
@@ -154,7 +176,7 @@ def __init__(self):
         # TODO: maybe implement this buffer in c++
         self.future_get_buffer = {}
 
-        #registered methods for ccs
+        # registered methods for ccs
         self.ccs_methods = {}
 
     def __init_profiling__(self):
@@ -170,13 +192,12 @@ def __init_profiling__(self):
         # chares created on this PE
         self.activeChares = set()
 
-    
     def print_dbg(self, *args, **kwargs):
         print("PE", self.myPe(), ":", *args, **kwargs)
-    
+
     @entry_method.coro
     def get_future_value(self, fut):
-        #self.print_dbg("Getting data for object", fut.id)
+        # self.print_dbg("Getting data for object", fut.id)
         obj = fut.lookup_object()
         if obj is None:
             local_f = LocalFuture()
@@ -186,7 +207,7 @@ def get_future_value(self, fut):
             return fut.lookup_object()
         else:
             return obj
-        
+
     @entry_method.coro
     def getany_future_value(self, futs, num_returns):
         ready_count = 0
@@ -209,7 +230,7 @@ def getany_future_value(self, futs, num_returns):
             for f in not_local:
                 self.future_get_buffer.pop(f.store_id, None)
             return ready_list + result
-        
+
     def check_futures_buffer(self, obj_id):
         if obj_id in self.future_get_buffer:
             local_f, fut = self.future_get_buffer.pop(obj_id)
@@ -219,7 +240,7 @@ def check_send_buffer(self, obj_id):
         completed = self.send_buffer.check(obj_id)
 
     def check_receive_buffer(self, obj_id):
-        #print("Received result for", obj_id, "on pe", self._myPe)
+        # print("Received result for", obj_id, "on pe", self._myPe)
         completed = self.receive_buffer.check(obj_id)
         for args in completed:
             args = list(args)
@@ -232,39 +253,49 @@ def check_receive_buffer(self, obj_id):
     def handleGeneralError(self):
         errorType, error, stacktrace = sys.exc_info()
         if not self.interactive:
-            if hasattr(error, 'remote_stacktrace'):
+            if hasattr(error, "remote_stacktrace"):
                 origin, stacktrace = error.remote_stacktrace
-                print('----------------- Python Stack Traceback PE ' + str(origin) + ' -----------------')
+                print(
+                    "----------------- Python Stack Traceback PE "
+                    + str(origin)
+                    + " -----------------"
+                )
                 print(stacktrace)
             else:
-                print('----------------- Python Stack Traceback PE ' + str(self.myPe()) + ' -----------------')
+                print(
+                    "----------------- Python Stack Traceback PE "
+                    + str(self.myPe())
+                    + " -----------------"
+                )
                 traceback.print_tb(stacktrace, limit=None)
-            self.abort(errorType.__name__ + ': ' + str(error))
+            self.abort(errorType.__name__ + ": " + str(error))
         else:
-            self.thisProxy[self.myPe()].propagateException(self.prepareExceptionForSend(error))
+            self.thisProxy[self.myPe()].propagateException(
+                self.prepareExceptionForSend(error)
+            )
 
     def prepareExceptionForSend(self, e):
-        if not hasattr(e, 'remote_stacktrace'):
+        if not hasattr(e, "remote_stacktrace"):
             f = StringIO()
             traceback.print_tb(sys.exc_info()[2], limit=None, file=f)
             e.remote_stacktrace = (self.myPe(), f.getvalue())
         return e
 
     def process_em_exc(self, e, obj, header):
-        if b'block' not in header:
+        if b"block" not in header:
             raise e
         # remote is expecting a response via a future, send exception to the future
-        blockFuture = header[b'block']
+        blockFuture = header[b"block"]
         sid = None
-        if b'sid' in header:
-            sid = header[b'sid']
-        if b'creation' in header:
+        if b"sid" in header:
+            sid = header[b"sid"]
+        if b"creation" in header:
             # don't send anything in this case (future is not guaranteed to be used)
             obj.contribute(None, None, blockFuture, sid)
             raise e
         self.prepareExceptionForSend(e)
-        if b'bcast' in header:
-            if b'bcastret' in header:
+        if b"bcast" in header:
+            if b"bcastret" in header:
                 obj.contribute(e, self.reducers.gather, blockFuture, sid)
             else:
                 # NOTE: it will work if some elements contribute with an exception (here)
@@ -276,8 +307,9 @@ def process_em_exc(self, e, obj, header):
     def recvReadOnly(self, msg):
         roData = cPickle.loads(msg)
         for name, obj in roData.items():
-            if name == 'charm_pool_proxy__h':
+            if name == "charm_pool_proxy__h":
                 from .pool import Pool
+
                 self.pool = Pool(obj)
             else:
                 setattr(readonlies, name, obj)
@@ -286,9 +318,11 @@ def recvReadOnly(self, msg):
     def buildMainchare(self, onPe, objPtr, ep, args):
         cid = (onPe, objPtr)  # chare ID (objPtr should be a Python int)
         assert onPe == self.myPe()
-        assert cid not in self.chares, 'Chare ' + str(cid) + ' already instantiated'
+        assert cid not in self.chares, "Chare " + str(cid) + " already instantiated"
         em = self.entryMethods[ep]
-        assert em.name == '__init__', 'Specified mainchare entry method is not constructor'
+        assert (
+            em.name == "__init__"
+        ), "Specified mainchare entry method is not constructor"
         self._createInternalChares()
         obj = object.__new__(em.C)  # create object but don't call __init__
         Mainchare.initMember(obj, cid)
@@ -304,11 +338,11 @@ def buildMainchare(self, onPe, objPtr, ep, args):
         if self.myPe() == 0:  # broadcast readonlies
             roData = {}
             for attr in dir(readonlies):  # attr is string
-                if not attr.startswith('_') and not attr.endswith('_'):
+                if not attr.startswith("_") and not attr.endswith("_"):
                     roData[attr] = getattr(readonlies, attr)
             msg = cPickle.dumps(roData, self.options.pickle_protocol)
             # print("Registering readonly data of size " + str(len(msg)))
-            self.lib.CkRegisterReadonly(b'charm4py_ro', b'charm4py_ro', msg)
+            self.lib.CkRegisterReadonly(b"charm4py_ro", b"charm4py_ro", msg)
         gc.collect()
 
     def invokeEntryMethod(self, obj, ep, header, args, ret_fut=False):
@@ -329,18 +363,18 @@ def recvGroupMsg(self, gid, ep, msg, dcopy_start):
         if gid in self.groups:
             obj = self.groups[gid]
             header, args = self.unpackMsg(msg, dcopy_start, obj)
-            self.invokeEntryMethod(obj, ep, header, args, ret_fut=False)        
+            self.invokeEntryMethod(obj, ep, header, args, ret_fut=False)
         else:
             em = self.entryMethods[ep]
             header, args = self.unpackMsg(msg, dcopy_start, None)
-            if em.name != '__init__':
+            if em.name != "__init__":
                 # this is not a constructor msg and the group hasn't been
                 # created yet. this should only happen for constrained groups
                 # (buffering of msgs for regular groups that haven't
                 # been created yet is done inside Charm++)
                 self.groupMsgBuf[gid].append((ep, header, args))
                 return
-            if b'constrained' in header:
+            if b"constrained" in header:
                 # constrained group instances are created by SectionManager
                 return
             assert gid not in self.groupMsgBuf
@@ -361,7 +395,7 @@ def recvArrayMsg(self, aid, index, ep, msg, dcopy_start):
             obj = self.arrays[aid][index]
             header, args = self.unpackMsg(msg, dcopy_start, obj)
             dep_ids = []
-            is_ray = 'is_ray' in header and header['is_ray']
+            is_ray = "is_ray" in header and header["is_ray"]
             if is_ray:
                 for i, arg in enumerate(args[:-1]):
                     if isinstance(arg, Future):
@@ -377,19 +411,23 @@ def recvArrayMsg(self, aid, index, ep, msg, dcopy_start):
                 self.invokeEntryMethod(obj, ep, header, args, ret_fut=is_ray)
         else:
             em = self.entryMethods[ep]
-            assert em.name == '__init__', 'Specified array entry method not constructor'
+            assert em.name == "__init__", "Specified array entry method not constructor"
             header, args = self.unpackMsg(msg, dcopy_start, None)
             if self.options.profiling:
                 self.activeChares.add((em.C, Array))
             if isinstance(args, Chare):  # obj migrating in
-                em = self.entryMethods[ep + 1]  # get 'migrated' EntryMethod object instead of __init__
+                em = self.entryMethods[
+                    ep + 1
+                ]  # get 'migrated' EntryMethod object instead of __init__
                 obj = args
-                obj._contributeInfo = self.lib.initContributeInfo(aid, index, CONTRIBUTOR_TYPE_ARRAY)
+                obj._contributeInfo = self.lib.initContributeInfo(
+                    aid, index, CONTRIBUTOR_TYPE_ARRAY
+                )
                 self.arrays[aid][index] = obj
                 em.run(obj, {}, ())
             else:
-                obj = object.__new__(em.C)   # create object but don't call __init__
-                if b'single' in header:
+                obj = object.__new__(em.C)  # create object but don't call __init__
+                if b"single" in header:
                     Array.initMember(obj, aid, index, single=True)
                 else:
                     Array.initMember(obj, aid, index)
@@ -404,19 +442,19 @@ def recvArrayBcast(self, aid, indexes, ep, msg, dcopy_start):
             self.invokeEntryMethod(array[index], ep, header, args)
 
     def unpackMsg(self, msg, dcopy_start, dest_obj):
-        if msg[:7] == b'_local:':
+        if msg[:7] == b"_local:":
             header, args = dest_obj.__removeLocal__(int(msg[7:]))
         else:
             header, args = cPickle.loads(msg)
-            if b'dcopy' in header:
+            if b"dcopy" in header:
                 rel_offset = dcopy_start
                 buf = memoryview(msg)
-                for arg_pos, typeId, rebuildArgs, size in header[b'dcopy']:
-                    arg_buf = buf[rel_offset:rel_offset + size]
+                for arg_pos, typeId, rebuildArgs, size in header[b"dcopy"]:
+                    arg_buf = buf[rel_offset : rel_offset + size]
                     args[arg_pos] = self.rebuildFuncs[typeId](arg_buf, *rebuildArgs)
                     rel_offset += size
-            elif b'custom_reducer' in header:
-                reducer = getattr(self.reducers, header[b'custom_reducer'])
+            elif b"custom_reducer" in header:
+                reducer = getattr(self.reducers, header[b"custom_reducer"])
                 # reduction result won't always be in position 0, but will always be last
                 # (e.g. if reduction target is a future, the reduction result will be 2nd argument)
                 if reducer.hasPostprocess:
@@ -427,31 +465,31 @@ def unpackMsg(self, msg, dcopy_start, dest_obj):
     def packMsg(self, destObj, msgArgs, header):
         """Prepares a message for sending, given arguments to an entry method invocation.
 
-          The message is the result of pickling `(header,args)` where header is a dict,
-          and args the list of arguments. If direct-copy is enabled, arguments supporting
-          the buffer interface will bypass pickling and their place in 'args' will be
-          made empty. Instead, metadata to reconstruct these args at the destination will be
-          put in the header, and this method will return a list of buffers for
-          direct-copying of these args into a CkMessage at Charm side.
+        The message is the result of pickling `(header,args)` where header is a dict,
+        and args the list of arguments. If direct-copy is enabled, arguments supporting
+        the buffer interface will bypass pickling and their place in 'args' will be
+        made empty. Instead, metadata to reconstruct these args at the destination will be
+        put in the header, and this method will return a list of buffers for
+        direct-copying of these args into a CkMessage at Charm side.
 
-          If destination object exists on same PE as source, the args will be stored in
-          '_local' buffer of destination obj (without copying), and the msg will be a
-          small integer tag to retrieve the args from '_local' when the msg is delivered.
+        If destination object exists on same PE as source, the args will be stored in
+        '_local' buffer of destination obj (without copying), and the msg will be a
+        small integer tag to retrieve the args from '_local' when the msg is delivered.
 
-          Args:
-              destObj: destination object if it exists on the same PE as source, otherwise None
-              msgArgs: arguments to entry method
-              header: msg header
+        Args:
+            destObj: destination object if it exists on the same PE as source, otherwise None
+            msgArgs: arguments to entry method
+            header: msg header
 
-          Returns:
-              2-tuple containing msg and list of direct-copy buffers
+        Returns:
+            2-tuple containing msg and list of direct-copy buffers
 
         """
         direct_copy_buffers = []
         dcopy_size = 0
         if destObj is not None:  # if dest obj is local
             localTag = destObj.__addLocal__((header, msgArgs))
-            msg = ('_local:' + str(localTag)).encode()
+            msg = ("_local:" + str(localTag)).encode()
         else:
             direct_copy_hdr = []  # goes to msg header
             args = list(msgArgs)
@@ -474,15 +512,20 @@ def packMsg(self, destObj, msgArgs, header):
                         # C-contiguous", which seems to be a CPython error (not cffi related)
                         nbytes = arg.nbytes
                         if arg.dtype.isbuiltin:
-                            direct_copy_hdr.append((i, 2, (arg.shape, arg.dtype.char), nbytes))
+                            direct_copy_hdr.append(
+                                (i, 2, (arg.shape, arg.dtype.char), nbytes)
+                            )
                         else:
-                            direct_copy_hdr.append((i, 2, (arg.shape, arg.dtype.name), nbytes))
+                            direct_copy_hdr.append(
+                                (i, 2, (arg.shape, arg.dtype.name), nbytes)
+                            )
                     else:
                         continue
                     args[i] = None  # will direct-copy this arg so remove from args list
                     direct_copy_buffers.append(memoryview(arg))
                     dcopy_size += nbytes
-                if len(direct_copy_hdr) > 0: header[b'dcopy'] = direct_copy_hdr
+                if len(direct_copy_hdr) > 0:
+                    header[b"dcopy"] = direct_copy_hdr
             msg = (header, args)
             msg = cPickle.dumps(msg, self.options.pickle_protocol)
         if self.options.profiling:
@@ -494,7 +537,9 @@ def registerInCharmAs(self, C, charm_type, libRegisterFunc):
         charm_type_id = charm_type.type_id
         entryMethods = self.classEntryMethods[charm_type_id][C]
         entryNames = [method.name for method in entryMethods]
-        C.idx[charm_type_id], startEpIdx = libRegisterFunc(C.__name__ + str(charm_type_id), entryNames, len(entryMethods))
+        C.idx[charm_type_id], startEpIdx = libRegisterFunc(
+            C.__name__ + str(charm_type_id), entryNames, len(entryMethods)
+        )
         for i, em in enumerate(entryMethods):
             em.epIdx = startEpIdx + i
             self.entryMethods[em.epIdx] = em
@@ -527,41 +572,54 @@ def registerInCharm(self, C):
     # first callback from Charm++ shared library
     # this method registers classes with the shared library
     def registerMainModule(self):
-        self._myPe   = self.lib.CkMyPe()
+        self._myPe = self.lib.CkMyPe()
         self._numPes = self.lib.CkNumPes()
 
         # Charm++ library captures stdout/stderr. here we reset the streams with a buffering
         # policy that ensures that messages reach Charm++ in a timely fashion
-        if os.name == 'nt':
+        if os.name == "nt":
             sys.stdout = Charm.PrintStream()
         else:
-            sys.stdout = os.fdopen(1, 'wt', 1)
-            sys.stderr = os.fdopen(2, 'wt', 1)
+            sys.stdout = os.fdopen(1, "wt", 1)
+            sys.stderr = os.fdopen(2, "wt", 1)
         if self.myPe() != 0:
-            self.lib.CkRegisterReadonly(b'python_null', b'python_null', None)
+            self.lib.CkRegisterReadonly(b"python_null", b"python_null", None)
 
         if (self.myPe() == 0) and (not self.options.quiet):
             import platform
             from . import charm4py_version
+
             py_impl = platform.python_implementation()
-            print("Charm4py> Running Charm4py version " + charm4py_version +
-                       " on Python " + str(platform.python_version()) + " (" +
-                       py_impl + "). Using '" +
-                       self.lib.name + "' interface to access Charm++")
-            if py_impl != 'CPython':
-                raise Charm4PyError('PyPy is no longer supported. Use CPython instead')
-            if sys.version_info < (3,8,0):
-                raise Charm4PyError('Python 2 is no longer supported. Use Python 3.8 or above instead')
+            print(
+                "Charm4py> Running Charm4py version "
+                + charm4py_version
+                + " on Python "
+                + str(platform.python_version())
+                + " ("
+                + py_impl
+                + "). Using '"
+                + self.lib.name
+                + "' interface to access Charm++"
+            )
+            if py_impl != "CPython":
+                raise Charm4PyError("PyPy is no longer supported. Use CPython instead")
+            if sys.version_info < (3, 8, 0):
+                raise Charm4PyError(
+                    "Python 2 is no longer supported. Use Python 3.8 or above instead"
+                )
             if self.options.profiling:
-                print('Charm4py> Profiling is ON (this affects performance)')
+                print("Charm4py> Profiling is ON (this affects performance)")
 
         for C in self.register_order:
             self.registerInCharm(C)
 
     def registerAs(self, C, charm_type_id):
         from .sections import SectionManager
+
         if charm_type_id == MAINCHARE:
-            assert not self.mainchareRegistered, 'More than one entry point has been specified'
+            assert (
+                not self.mainchareRegistered
+            ), "More than one entry point has been specified"
             self.mainchareRegistered = True
             # make mainchare constructor always a coroutine
             if sys.version_info < (3, 0, 0):
@@ -571,25 +629,30 @@ def registerAs(self, C, charm_type_id):
         charm_type = chare.charm_type_id_to_class[charm_type_id]
         # print("charm4py: Registering class " + C.__name__, "as", charm_type.__name__, "type_id=", charm_type_id, charm_type)
         profilingOn = self.options.profiling
-        ems = [entry_method.EntryMethod(C, m, profilingOn) for m in charm_type.__baseEntryMethods__()]
+        ems = [
+            entry_method.EntryMethod(C, m, profilingOn)
+            for m in charm_type.__baseEntryMethods__()
+        ]
 
         members = dir(C)
         if C == SectionManager:
-            ems.append(entry_method.EntryMethod(C, 'sendToSection', profilingOn))
-            members.remove('sendToSection')
+            ems.append(entry_method.EntryMethod(C, "sendToSection", profilingOn))
+            members.remove("sendToSection")
         self.classEntryMethods[charm_type_id][C] = ems
 
         for m in members:
             m_obj = getattr(C, m)
             if not callable(m_obj) or inspect.isclass(m_obj):
                 continue
-            if m in chare.method_restrictions['reserved'] and m_obj != getattr(Chare, m):
-                raise Charm4PyError(str(C) + " redefines reserved method '"  + m + "'")
-            if m.startswith('__') and m.endswith('__'):
+            if m in chare.method_restrictions["reserved"] and m_obj != getattr(
+                Chare, m
+            ):
+                raise Charm4PyError(str(C) + " redefines reserved method '" + m + "'")
+            if m.startswith("__") and m.endswith("__"):
                 continue  # filter out non-user methods
-            if m in chare.method_restrictions['non_entry_method']:
+            if m in chare.method_restrictions["non_entry_method"]:
                 continue
-            if charm_type_id != ARRAY and m in {'migrate', 'setMigratable'}:
+            if charm_type_id != ARRAY and m in {"migrate", "setMigratable"}:
                 continue
             # print(m)
             em = entry_method.EntryMethod(C, m, profilingOn)
@@ -601,8 +664,8 @@ def registerAs(self, C, charm_type_id):
     def register(self, C, collections=(GROUP, ARRAY)):
         if C in self.registered:
             return
-        if (not hasattr(C, 'mro')) or (Chare not in C.mro()):
-            raise Charm4PyError('Only subclasses of Chare can be registered')
+        if (not hasattr(C, "mro")) or (Chare not in C.mro()):
+            raise Charm4PyError("Only subclasses of Chare can be registered")
 
         # cache of template condition objects for `chare.wait(cond_str)` calls
         # maps cond_str to condition object. the condition object stores the lambda function associated with cond_str
@@ -616,11 +679,13 @@ def register(self, C, collections=(GROUP, ARRAY)):
     def _registerInternalChares(self):
         global SectionManager
         from .sections import SectionManager
+
         self.register(SectionManager, (GROUP,))
 
         self.register(CharmRemote, (GROUP,))
 
         from .pool import PoolScheduler, Worker
+
         if self.interactive:
             if sys.version_info < (3, 0, 0):
                 entry_method.coro(PoolScheduler.start.im_func)
@@ -632,14 +697,16 @@ def _registerInternalChares(self):
         self.register(Worker, (GROUP,))
 
         if self.options.profiling:
-            self.internalChareTypes.update({SectionManager, CharmRemote,
-                                            PoolScheduler, Worker})
+            self.internalChareTypes.update(
+                {SectionManager, CharmRemote, PoolScheduler, Worker}
+            )
 
     def _createInternalChares(self):
         Group(CharmRemote)
         Group(SectionManager)
 
         from .pool import Pool, PoolScheduler
+
         pool_proxy = Chare(PoolScheduler, onPE=0)
         self.pool = Pool(pool_proxy)
         readonlies.charm_pool_proxy__h = pool_proxy
@@ -667,40 +734,53 @@ def start(self, entry=None, classes=[], modules=[], interactive=False):
         if interactive:
             from .interactive import InteractiveConsole as entry
             from .channel import Channel
+
             self.options.remote_exec = True
             self.origStdinFd = os.dup(0)
             self.origStoutFd = os.dup(1)
             self.interactive = True
-            self.dynamic_register.update({'charm': charm, 'Chare': Chare, 'Group': Group,
-                                          'Array': Array, 'Reducer': self.reducers,
-                                          'threaded': entry_method.coro, 'coro': entry_method.coro,
-                                          'Channel': Channel})
+            self.dynamic_register.update(
+                {
+                    "charm": charm,
+                    "Chare": Chare,
+                    "Group": Group,
+                    "Array": Array,
+                    "Reducer": self.reducers,
+                    "threaded": entry_method.coro,
+                    "coro": entry_method.coro,
+                    "Channel": Channel,
+                }
+            )
 
         if self.started:
-            raise Charm4PyError('charm.start() can only be called once')
+            raise Charm4PyError("charm.start() can only be called once")
         self.started = True
 
         if self.options.profiling:
             self.__init_profiling__()
             self.contribute = profile_send_function(self.contribute)
-            self.triggerCallableEM = entry_method.EntryMethod(self.__class__,
-                                                              'triggerCallable',
-                                                              True)
-        if self.options.quiet and '++quiet' not in sys.argv:
-            sys.argv += ['++quiet']
-        elif '++quiet' in sys.argv:
+            self.triggerCallableEM = entry_method.EntryMethod(
+                self.__class__, "triggerCallable", True
+            )
+        if self.options.quiet and "++quiet" not in sys.argv:
+            sys.argv += ["++quiet"]
+        elif "++quiet" in sys.argv:
             self.options.quiet = True
 
         self._registerInternalChares()
 
-        if hasattr(entry, 'mro') and Chare in entry.mro():
+        if hasattr(entry, "mro") and Chare in entry.mro():
             if entry.__init__.__code__.co_argcount != 2:
-                raise Charm4PyError('Mainchare constructor must take one (and only one) parameter')
+                raise Charm4PyError(
+                    "Mainchare constructor must take one (and only one) parameter"
+                )
             self.register(entry, (MAINCHARE,))
         else:
-            assert callable(entry), 'Given entry point is not a function or Chare'
+            assert callable(entry), "Given entry point is not a function or Chare"
             if entry.__code__.co_argcount != 1:
-                raise Charm4PyError('Main function must have one (and only one) parameter')
+                raise Charm4PyError(
+                    "Main function must have one (and only one) parameter"
+                )
             self.entry_func = entry
             self.register(chare.DefaultMainchare, (MAINCHARE,))
 
@@ -713,37 +793,46 @@ def start(self, entry=None, classes=[], modules=[], interactive=False):
                 raise Charm4PyError("Class", C, "is not a Chare (can't register)")
 
         import importlib
+
         M = list(modules)
-        if '__main__' not in M:
-            M.append('__main__')
+        if "__main__" not in M:
+            M.append("__main__")
         for module_name in M:
             if module_name not in sys.modules:
                 importlib.import_module(module_name)
-            for C_name, C in inspect.getmembers(sys.modules[module_name], inspect.isclass):
-                if C.__module__ != chare.__name__ and hasattr(C, 'mro'):
+            for C_name, C in inspect.getmembers(
+                sys.modules[module_name], inspect.isclass
+            ):
+                if C.__module__ != chare.__name__ and hasattr(C, "mro"):
                     if ArrayMap in C.mro():
                         self.register(C, (GROUP,))  # register ArrayMap only as Group
                     elif Chare in C.mro():
                         self.register(C)
                     elif Group in C.mro() or Array in C.mro() or Mainchare in C.mro():
-                        raise Charm4PyError('Chares must not inherit from Group, Array or'
-                                            ' Mainchare. Refer to new API')
+                        raise Charm4PyError(
+                            "Chares must not inherit from Group, Array or"
+                            " Mainchare. Refer to new API"
+                        )
 
         for module in (chare, entry_method, wait):
             module.charmStarting()
         self.threadMgr.start()
 
-        self.lb_requested = '+balancer' in sys.argv
+        self.lb_requested = "+balancer" in sys.argv
         self.lib.start()
 
     def arrayElemLeave(self, aid, index):
         obj = self.arrays[aid].pop(index)
-        if hasattr(obj, '_scookies'):
-            charm.abort('Cannot migrate elements that are part of a section '
-                        '(this will be supported in a future version)')
+        if hasattr(obj, "_scookies"):
+            charm.abort(
+                "Cannot migrate elements that are part of a section "
+                "(this will be supported in a future version)"
+            )
         self.threadMgr.objMigrating(obj)
-        if hasattr(obj, '__channels__'):
-            assert len(obj.__pendingChannels__) == 0, 'Cannot migrate chares that did not complete channel establishment'
+        if hasattr(obj, "__channels__"):
+            assert (
+                len(obj.__pendingChannels__) == 0
+            ), "Cannot migrate chares that did not complete channel establishment"
         del obj._contributeInfo  # don't want to pickle this
         pickled_chare = cPickle.dumps(({}, obj), self.options.pickle_protocol)
         # facilitate garbage collection (especially by removing cyclical references)
@@ -763,7 +852,9 @@ def contribute(self, data, reducer, target, chare, section=None):
             if isinstance(target, Future):
                 fid = target.fid
                 target = target.getTargetProxyEntryMethod()
-            contributeInfo = self.lib.getContributeInfo(target.ep, fid, contribution, chare)
+            contributeInfo = self.lib.getContributeInfo(
+                target.ep, fid, contribution, chare
+            )
             if self.options.profiling:
                 self.recordSend(contributeInfo.getDataSize())
             target.__self__.ckContribute(contributeInfo)
@@ -783,7 +874,9 @@ def contribute(self, data, reducer, target, chare, section=None):
             try:
                 redno = chare._scookies[sid]
             except:
-                raise Charm4PyError('Chare doing section reduction but is not part of a section')
+                raise Charm4PyError(
+                    "Chare doing section reduction but is not part of a section"
+                )
             self.sectionMgr.contrib(sid, redno, data, reducer, target)
             chare._scookies[sid] += 1
 
@@ -796,7 +889,9 @@ def combine(self, *proxies):
             secproxy = None
             if proxy.issec:
                 secproxy = proxy
-            proxy._getSectionLocations_(sid, 1, SECTION_ALL, None, None, futures[i], secproxy)
+            proxy._getSectionLocations_(
+                sid, 1, SECTION_ALL, None, None, futures[i], secproxy
+            )
         for f in futures:
             pes.update(f.get()[0])
         assert len(pes) > 0
@@ -804,8 +899,12 @@ def combine(self, *proxies):
         self.sectionMgr.thisProxy[root].createSectionDown(sid, pes, None)
         return proxies[0].__getsecproxy__((root, sid))
 
-    def split(self, proxy, numsections, section_func=None, elems=None, slicing=None, cons=None):
-        assert (hasattr(proxy, 'gid') and proxy.elemIdx == -1) or (hasattr(proxy, 'aid') and proxy.elemIdx == ())
+    def split(
+        self, proxy, numsections, section_func=None, elems=None, slicing=None, cons=None
+    ):
+        assert (hasattr(proxy, "gid") and proxy.elemIdx == -1) or (
+            hasattr(proxy, "aid") and proxy.elemIdx == ()
+        )
         sid0 = (self._myPe, self.section_counter)
         self.section_counter += numsections
         secproxy = None
@@ -813,22 +912,30 @@ def split(self, proxy, numsections, section_func=None, elems=None, slicing=None,
             secproxy = proxy
         if elems is None:
             f = self.Future()
-            proxy._getSectionLocations_(sid0, numsections, section_func, slicing, None, f, secproxy)
+            proxy._getSectionLocations_(
+                sid0, numsections, section_func, slicing, None, f, secproxy
+            )
             section_pes = f.get()
         else:
-            if numsections == 1 and not isinstance(elems[0], list) and not isinstance(elems[0], set):
+            if (
+                numsections == 1
+                and not isinstance(elems[0], list)
+                and not isinstance(elems[0], set)
+            ):
                 elems = [elems]
             try:
                 assert len(elems) == numsections
             except AssertionError:
                 print(len(elems), numsections)
-            if hasattr(proxy, 'gid') and not proxy.issec:
+            if hasattr(proxy, "gid") and not proxy.issec:
                 # in this case the elements are guaranteed to be PEs, so I don't
                 # have to collect locations
                 section_pes = elems
             else:
                 f = self.Future()
-                proxy._getSectionLocations_(sid0, numsections, None, None, elems, f, secproxy)
+                proxy._getSectionLocations_(
+                    sid0, numsections, None, None, elems, f, secproxy
+                )
                 section_pes = f.get()
         secProxies = []
         # TODO if there are many many sections, should do a stateless multicast to the roots with the section info
@@ -841,8 +948,10 @@ def split(self, proxy, numsections, section_func=None, elems=None, slicing=None,
                 pes = set(pes)
             assert len(pes) > 0
             root = min(pes)
-            if not proxy.issec and hasattr(proxy, 'gid'):
-                self.sectionMgr.thisProxy[root].createGroupSectionDown(sid, proxy.gid, pes, None, cons)
+            if not proxy.issec and hasattr(proxy, "gid"):
+                self.sectionMgr.thisProxy[root].createGroupSectionDown(
+                    sid, proxy.gid, pes, None, cons
+                )
             else:
                 self.sectionMgr.thisProxy[root].createSectionDown(sid, pes, None)
             secProxies.append(proxy.__getsecproxy__((root, sid)))
@@ -854,12 +963,18 @@ def startQD(self, callback):
             fid = callback.fid
             callback = callback.getTargetProxyEntryMethod()
         cb_proxy = callback.__self__
-        if hasattr(cb_proxy, 'section'):
-            self.lib.CkStartQD_SectionCallback(cb_proxy.section[1], cb_proxy.section[0], callback.ep)
-        elif hasattr(cb_proxy, 'gid'):
-            self.lib.CkStartQD_GroupCallback(cb_proxy.gid, cb_proxy.elemIdx, callback.ep, fid)
-        elif hasattr(cb_proxy, 'aid'):
-            self.lib.CkStartQD_ArrayCallback(cb_proxy.aid, cb_proxy.elemIdx, callback.ep, fid)
+        if hasattr(cb_proxy, "section"):
+            self.lib.CkStartQD_SectionCallback(
+                cb_proxy.section[1], cb_proxy.section[0], callback.ep
+            )
+        elif hasattr(cb_proxy, "gid"):
+            self.lib.CkStartQD_GroupCallback(
+                cb_proxy.gid, cb_proxy.elemIdx, callback.ep, fid
+            )
+        elif hasattr(cb_proxy, "aid"):
+            self.lib.CkStartQD_ArrayCallback(
+                cb_proxy.aid, cb_proxy.elemIdx, callback.ep, fid
+            )
         else:
             self.lib.CkStartQD_ChareCallback(cb_proxy.cid, callback.ep, fid)
 
@@ -880,10 +995,14 @@ def sleep(self, secs):
 
     def awaitCreation(self, *proxies):
         for proxy in proxies:
-            if not hasattr(proxy, 'creation_future'):
-                if not proxy.__class__.__name__.endswith('Proxy'):
-                    raise Charm4PyError('Did not pass a proxy to awaitCreation? ' + str(type(proxy)))
-                raise Charm4PyError('awaitCreation can only be used if creation triggered from a coroutine entry method')
+            if not hasattr(proxy, "creation_future"):
+                if not proxy.__class__.__name__.endswith("Proxy"):
+                    raise Charm4PyError(
+                        "Did not pass a proxy to awaitCreation? " + str(type(proxy))
+                    )
+                raise Charm4PyError(
+                    "awaitCreation can only be used if creation triggered from a coroutine entry method"
+                )
             proxy.creation_future.get()
             del proxy.creation_future
 
@@ -933,7 +1052,7 @@ def recordSendRecv(self, stats, size):
         stats[2] = max(size, stats[2])
         stats[3] += size
         stats[4] = size
-        
+
     # deposit value of one of the futures that was created on this PE
     def _future_deposit_result(self, fid, result=None):
         self.threadMgr.depositFuture(fid, result)
@@ -941,13 +1060,20 @@ def _future_deposit_result(self, fid, result=None):
     def __printTable__(self, table, sep):
         col_width = [max(len(x) for x in col) for col in zip(*table)]
         for j, line in enumerate(table):
-            if j in sep: print(sep[j])
-            print("| " + " | ".join("{:{}}".format(x, col_width[i]) for i, x in enumerate(line)) + " |")
+            if j in sep:
+                print(sep[j])
+            print(
+                "| "
+                + " | ".join(
+                    "{:{}}".format(x, col_width[i]) for i, x in enumerate(line)
+                )
+                + " |"
+            )
 
     def printStats(self):
-        assert self.started, 'charm was not started'
+        assert self.started, "charm was not started"
         if not self.options.profiling:
-            print('NOTE: called charm.printStats() but profiling is disabled')
+            print("NOTE: called charm.printStats() but profiling is disabled")
             return
 
         em = self.runningEntryMethod
@@ -957,29 +1083,41 @@ def printStats(self):
             em.stopMeasuringTime()
             em.startMeasuringTime()
 
-        print('Timings for PE', self.myPe(), ':')
-        table = [['', 'em', 'send', 'recv', 'total']]
+        print("Timings for PE", self.myPe(), ":")
+        table = [["", "em", "send", "recv", "total"]]
         lineNb = 1
         sep = {}
         row_totals = [0.0] * 4
-        chares_sorted = sorted([(C.__module__, C.__name__,
-                                 charm_type.type_id, C, charm_type)
-                                 for C, charm_type in self.activeChares])
+        chares_sorted = sorted(
+            [
+                (C.__module__, C.__name__, charm_type.type_id, C, charm_type)
+                for C, charm_type in self.activeChares
+            ]
+        )
         for _, _, _, C, charm_type in chares_sorted:
             if C in self.internalChareTypes:
                 totaltime = 0.0
                 for em in self.classEntryMethods[charm_type.type_id][C]:
-                    if em.name == '__init__':
+                    if em.name == "__init__":
                         continue
                     totaltime += sum(em.times)
                 if totaltime < 0.001:
                     continue
-            sep[lineNb] = '------ ' + str(C) + ' as ' + charm_type.__name__ + ' ------'
+            sep[lineNb] = "------ " + str(C) + " as " + charm_type.__name__ + " ------"
             for em in self.classEntryMethods[charm_type.type_id][C]:
-                if not hasattr(em, 'times'):
+                if not hasattr(em, "times"):
                     continue
-                if C == chare.DefaultMainchare and self.entry_func is not None and em.name == '__init__':
-                    em_name = self.entry_func.__module__ + '.' + self.entry_func.__name__ + ' (main function)'
+                if (
+                    C == chare.DefaultMainchare
+                    and self.entry_func is not None
+                    and em.name == "__init__"
+                ):
+                    em_name = (
+                        self.entry_func.__module__
+                        + "."
+                        + self.entry_func.__name__
+                        + " (main function)"
+                    )
                 else:
                     em_name = em.name
                 vals = em.times + [sum(em.times)]
@@ -987,14 +1125,16 @@ def printStats(self):
                     row_totals[i] += vals[i]
                 table.append([em_name] + [str(round(v, 3)) for v in vals])
                 lineNb += 1
-        sep[lineNb] = '-----------------------------------------------------------'
-        table.append([''] + [str(round(v, 3)) for v in row_totals])
+        sep[lineNb] = "-----------------------------------------------------------"
+        table.append([""] + [str(round(v, 3)) for v in row_totals])
         lineNb += 1
-        sep[lineNb] = '-----------------------------------------------------------'
+        sep[lineNb] = "-----------------------------------------------------------"
         misc_overheads = [str(round(v, 3)) for v in self.lib.times]
-        table.append(['reductions', ' ', ' ', misc_overheads[0], misc_overheads[0]])
-        table.append(['custom reductions',   ' ', ' ', misc_overheads[1], misc_overheads[1]])
-        table.append(['migrating out',  ' ', ' ', misc_overheads[2], misc_overheads[2]])
+        table.append(["reductions", " ", " ", misc_overheads[0], misc_overheads[0]])
+        table.append(
+            ["custom reductions", " ", " ", misc_overheads[1], misc_overheads[1]]
+        )
+        table.append(["migrating out", " ", " ", misc_overheads[2], misc_overheads[2]])
         lineNb += 3
         triggerCallableTotalTime = sum(self.triggerCallableEM.times)
         if triggerCallableTotalTime > 0:
@@ -1002,12 +1142,12 @@ def printStats(self):
             for i, v in enumerate(vals):
                 row_totals[i] += v
             times = [str(round(v, 3)) for v in vals]
-            table.append(['triggerCallable'] + times)
+            table.append(["triggerCallable"] + times)
             lineNb += 1
-        sep[lineNb] = '-----------------------------------------------------------'
+        sep[lineNb] = "-----------------------------------------------------------"
         row_totals[2] += sum(self.lib.times)
         row_totals[3] += sum(self.lib.times)
-        table.append([''] + [str(round(v, 3)) for v in row_totals])
+        table.append([""] + [str(round(v, 3)) for v in row_totals])
         lineNb += 1
         self.__printTable__(table, sep)
 
@@ -1016,45 +1156,62 @@ def printStats(self):
                 num_msgs = self.msg_send_stats[0]
                 min_msgsize, max_msgsize, sum_msgsize = self.msg_send_stats[1:4]
                 avg_msgsize = sum_msgsize / num_msgs
-                print('\nMessages sent: ' + str(num_msgs))
+                print("\nMessages sent: " + str(num_msgs))
             else:
                 num_msgs = self.msg_recv_stats[0]
                 min_msgsize, max_msgsize, sum_msgsize = self.msg_recv_stats[1:4]
                 avg_msgsize = sum_msgsize / num_msgs
-                print('\nMessages received: ' + str(num_msgs))
+                print("\nMessages received: " + str(num_msgs))
             msgSizeStats = [min_msgsize, avg_msgsize, max_msgsize]
             msgSizeStats = [round(val, 3) for val in msgSizeStats]
-            print('Message size in bytes (min / mean / max): ' + ' / '.join([str(v) for v in msgSizeStats]))
-            print('Total bytes = ' + str(round(sum_msgsize / 1024.0 / 1024.0, 3)) + ' MB')
-        print('')
+            print(
+                "Message size in bytes (min / mean / max): "
+                + " / ".join([str(v) for v in msgSizeStats])
+            )
+            print(
+                "Total bytes = " + str(round(sum_msgsize / 1024.0 / 1024.0, 3)) + " MB"
+            )
+        print("")
 
     def lib_version_check(self, commit_id_str):
-        req_version = tuple([int(n) for n in open(os.path.dirname(__file__) + '/libcharm_version', 'r').read().split('.')])
-        version = [int(n) for n in commit_id_str.split('-')[0][1:].split('.')]
+        req_version = tuple(
+            [
+                int(n)
+                for n in open(os.path.dirname(__file__) + "/libcharm_version", "r")
+                .read()
+                .split(".")
+            ]
+        )
+        version = [int(n) for n in commit_id_str.split("-")[0][1:].split(".")]
         try:
-            version = tuple(version + [int(commit_id_str.split('-')[1])])
+            version = tuple(version + [int(commit_id_str.split("-")[1])])
         except:
             version = tuple(version + [0])
         if version < req_version:
-            req_str = '.'.join([str(n) for n in req_version])
-            cur_str = '.'.join([str(n) for n in version])
-            raise Charm4PyError('Charm++ version >= ' + req_str + ' required. ' +
-                                'Existing version is ' + cur_str)
+            req_str = ".".join([str(n) for n in req_version])
+            cur_str = ".".join([str(n) for n in version])
+            raise Charm4PyError(
+                "Charm++ version >= "
+                + req_str
+                + " required. "
+                + "Existing version is "
+                + cur_str
+            )
 
     def getTopoTreeEdges(self, pe, root_pe, pes=None, bfactor=4):
-        """ Returns (parent, children) of 'pe' in a tree spanning the given 'pes',
-            or all PEs if 'pes' is None
-            If 'pes' is specified, 'root_pe' must be in the first position of 'pes',
-            and 'pe' must be a member of 'pes' """
+        """Returns (parent, children) of 'pe' in a tree spanning the given 'pes',
+        or all PEs if 'pes' is None
+        If 'pes' is specified, 'root_pe' must be in the first position of 'pes',
+        and 'pe' must be a member of 'pes'"""
         return self.lib.getTopoTreeEdges(pe, root_pe, pes, bfactor)
 
     def getTopoSubtrees(self, root_pe, pes, bfactor=4):
-        """ Returns a list of subtrees of root_pe in a spanning tree containing
-            all given pes. Subtrees are returned as lists of pes in the
-            subtree: the first PE in the list is the root of the subtree, but
-            otherwise the list doesn't specify the structure of the subtree
-            (the subtree structure can be extracted by recursively calling this
-            method). """
+        """Returns a list of subtrees of root_pe in a spanning tree containing
+        all given pes. Subtrees are returned as lists of pes in the
+        subtree: the first PE in the list is the root of the subtree, but
+        otherwise the list doesn't specify the structure of the subtree
+        (the subtree structure can be extracted by recursively calling this
+        method)."""
         return self.lib.getTopoSubtrees(root_pe, pes, bfactor)
 
     def myPe(self):
@@ -1099,20 +1256,20 @@ def LBTurnInstrumentOn(self):
     def LBTurnInstrumentOff(self):
         self.lib.LBTurnInstrumentOff()
 
-    #functions for ccs 
+    # functions for ccs
     def CcsRegisterHandler(self, handlername, handler):
         self.ccs_methods[handlername] = handler
         self.lib.CcsRegisterHandler(handlername, handler)
 
     def CcsIsRemoteRequest(self):
         self.lib.isRemoteRequest()
-    
+
     def CcsSendReply(self, message):
         self.lib.CcsSendReply(message)
 
     def CcsDelayReply(self):
         return self.lib.CcsDelayReply()
-    
+
     def CcsSendDelayedReply(self, d, message):
         self.lib.CcsSendDelayedReply(d, message)
 
@@ -1120,8 +1277,7 @@ def callHandler(self, handlername, data):
         if handlername in self.ccs_methods:
             self.ccs_methods[handlername](data)
         else:
-            raise Charm4PyError('Handler ' + handlername + ' not registered')
-
+            raise Charm4PyError("Handler " + handlername + " not registered")
 
 
 class CharmRemote(Chare):
@@ -1153,25 +1309,33 @@ def updateGlobals(self, *args):
 
         # TODO remove this warning and related code when the new lb framework is merged
         if charm.myPe() == 0 and charm.lb_requested:
-            print('WARNING> updateGlobals with load balancing enabled can lead to unexpected behavior '
-                  'due to a bug in Charm++ load balancing. This will be fixed in an upcoming release.')
+            print(
+                "WARNING> updateGlobals with load balancing enabled can lead to unexpected behavior "
+                "due to a bug in Charm++ load balancing. This will be fixed in an upcoming release."
+            )
             charm.lb_requested = False
         sys.modules[module_name].__dict__.update(global_dict)
 
-    def createArray(self, cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, cb=None):
+    def createArray(
+        self, cls, dims=None, ndims=-1, args=[], map=None, useAtSync=False, cb=None
+    ):
         proxy = Array(cls, dims, ndims, args, map, useAtSync)
         if cb is not None:
             cb(proxy)
         return proxy
 
-    def rexec(self, code, module_name='__main__'):
+    def rexec(self, code, module_name="__main__"):
         if charm.options.remote_exec is not True:
-            raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True')
+            raise Charm4PyError(
+                "Remote code execution is disabled. Set charm.options.remote_exec to True"
+            )
         exec(code, sys.modules[module_name].__dict__)
 
-    def eval(self, expression, module_name='__main__'):
+    def eval(self, expression, module_name="__main__"):
         if charm.options.remote_exec is not True:
-            raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True')
+            raise Charm4PyError(
+                "Remote code execution is disabled. Set charm.options.remote_exec to True"
+            )
         return eval(expression, sys.modules[module_name].__dict__)
 
     # deposit value of one of the futures that was created on this PE
@@ -1185,9 +1349,11 @@ def notify_future_deletion(self, store_id, depth):
             # if yes, remove it
             fut = charm.threadMgr.borrowed_futures[(store_id, depth)]
             refcount = ctypes.c_long.from_address(id(fut)).value
-            #print(store_id, "on pe", charm.myPe(), "depth", depth, "ref count =", refcount)
-            if (fut.parent == None and refcount == 3) or (fut.parent != None and refcount == 2):
-                #print("Real deletion of", store_id, "from", charm.myPe())
+            # print(store_id, "on pe", charm.myPe(), "depth", depth, "ref count =", refcount)
+            if (fut.parent == None and refcount == 3) or (
+                fut.parent != None and refcount == 2
+            ):
+                # print("Real deletion of", store_id, "from", charm.myPe())
                 if fut.parent == None:
                     charm.threadMgr.futures.pop(fut.fid)
                 charm.threadMgr.borrowed_futures.pop((store_id, depth))
@@ -1197,17 +1363,24 @@ def propagateException(self, error):
             charm.last_exception_timestamp = time.time()
             if charm.myPe() == 0:
                 origin, remote_stacktrace = error.remote_stacktrace
-                print('----------------- Python Stack Traceback from PE', origin, '-----------------\n', remote_stacktrace)
-                print(type(error).__name__ + ':', error, '(PE ' + str(origin) + ')')
+                print(
+                    "----------------- Python Stack Traceback from PE",
+                    origin,
+                    "-----------------\n",
+                    remote_stacktrace,
+                )
+                print(type(error).__name__ + ":", error, "(PE " + str(origin) + ")")
             else:
-                self.thisProxy[(charm.myPe()-1) // 2].propagateException(error)
+                self.thisProxy[(charm.myPe() - 1) // 2].propagateException(error)
 
     def printStats(self):
         charm.printStats()
 
     def registerNewChareType(self, name, source):
         if charm.options.remote_exec is not True:
-            raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True')
+            raise Charm4PyError(
+                "Remote code execution is disabled. Set charm.options.remote_exec to True"
+            )
         exec(source, charm.dynamic_register)
         chare_type = charm.dynamic_register[name]
         charm.register(chare_type)
@@ -1221,24 +1394,24 @@ def registerNewChareTypes(self, classes):
 
 def load_charm_library(charm):
     args = sys.argv
-    libcharm_path = os.path.join(os.path.dirname(__file__), '.libs')
-    if os.name == 'nt':
-        os.environ['PATH'] += ';' + libcharm_path
-    if '+libcharm_interface' in args:
-        arg_idx = args.index('+libcharm_interface')
+    libcharm_path = os.path.join(os.path.dirname(__file__), ".libs")
+    if os.name == "nt":
+        os.environ["PATH"] += ";" + libcharm_path
+    if "+libcharm_interface" in args:
+        arg_idx = args.index("+libcharm_interface")
         interface = args.pop(arg_idx + 1)
         args.pop(arg_idx)
-        if interface == 'cython':
+        if interface == "cython":
             from .charmlib.charmlib_cython import CharmLib
         else:
-            raise Charm4PyError('Unrecognized interface ' + interface)
+            raise Charm4PyError("Unrecognized interface " + interface)
     else:
         # pick best available interface
         import platform
+
         py_impl = platform.python_implementation()
         from .charmlib.charmlib_cython import CharmLib
-        
-      
+
     return CharmLib(charm, charm.options, libcharm_path)
 
 
@@ -1252,7 +1425,8 @@ def func_with_profiling(*args, **kwargs):
         else:
             ret = func(*args, **kwargs)
         return ret
-    if hasattr(func, 'ep'):
+
+    if hasattr(func, "ep"):
         func_with_profiling.ep = func.ep
     return func_with_profiling
 
@@ -1266,7 +1440,7 @@ def rebuildByteArray(data):
 
 
 def rebuildArray(data, typecode):
-    #a = array.array('d', data.cast(typecode))  # this is slow
+    # a = array.array('d', data.cast(typecode))  # this is slow
     a = array.array(typecode)
     a.frombytes(data)
     return a
diff --git a/charm4py/entry_method.py b/charm4py/entry_method.py
index 7cf9a623..cac140d1 100644
--- a/charm4py/entry_method.py
+++ b/charm4py/entry_method.py
@@ -16,12 +16,14 @@ def __init__(self, C, name, profile=False):
             self.running = False
 
         method = getattr(C, name)
-        if hasattr(method, '_ck_coro'):
+        if hasattr(method, "_ck_coro"):
             if not profile:
                 self.run = self._run_th
             else:
                 self.run = self._run_th_prof
-            self.thread_notify = hasattr(method, '_ck_coro_notify') and method._ck_coro_notify
+            self.thread_notify = (
+                hasattr(method, "_ck_coro_notify") and method._ck_coro_notify
+            )
         else:
             if not profile:
                 self.run = self._run
@@ -29,25 +31,25 @@ def __init__(self, C, name, profile=False):
                 self.run = self._run_prof
 
         self.when_cond = None
-        if hasattr(method, 'when_cond'):
+        if hasattr(method, "when_cond"):
             # template object specifying the 'when' condition clause
             # for this entry method
-            self.when_cond = getattr(method, 'when_cond')
+            self.when_cond = getattr(method, "when_cond")
             if isinstance(self.when_cond, wait.ChareStateMsgCond):
                 self.when_cond_func = self.when_cond.cond_func
 
     def _run(self, obj, header, args, ret_fut=False):
-        """ run entry method of the given object in the current thread """
+        """run entry method of the given object in the current thread"""
         # set last entry method executed (note that 'last_em_exec' won't
         # necessarily always coincide with the currently running entry method)
         charm.last_em_exec = self
         try:
-            #print(args)
+            # print(args)
             if ret_fut:
                 fut = args[-1]
                 args = args[:-1]
             ret = getattr(obj, self.name)(*args)
-            if ret_fut and not (ret is None):
+            if ret_fut and ret is not None:
                 fut.create_object(ret)
         except SystemExit:
             exit_code = sys.exc_info()[1].code
@@ -60,13 +62,13 @@ def _run(self, obj, header, args, ret_fut=False):
         except Exception as e:
             charm.process_em_exc(e, obj, header)
             return
-        if b'block' in header:
-            blockFuture = header[b'block']
-            if b'bcast' in header:
+        if b"block" in header:
+            blockFuture = header[b"block"]
+            if b"bcast" in header:
                 sid = None
-                if b'sid' in header:
-                    sid = header[b'sid']
-                if b'bcastret' in header:
+                if b"sid" in header:
+                    sid = header[b"sid"]
+                if b"bcastret" in header:
                     charm.contribute(ret, charm.reducers.gather, blockFuture, obj, sid)
                 else:
                     charm.contribute(None, None, blockFuture, obj, sid)
@@ -172,9 +174,10 @@ def when(cond_str):
     def _when(func):
         method_args = {}
         for i in range(1, func.__code__.co_argcount):
-            method_args[func.__code__.co_varnames[i]] = i-1
+            method_args[func.__code__.co_varnames[i]] = i - 1
         func.when_cond = wait.parse_cond_str(cond_str, func.__module__, method_args)
         return func
+
     return _when
 
 
@@ -188,6 +191,7 @@ def _coro(func):
         func._ck_coro = True
         func._ck_coro_notify = event_notify
         return func
+
     return _coro
 
 
diff --git a/charm4py/interactive.py b/charm4py/interactive.py
index 0ddade16..f5421be7 100644
--- a/charm4py/interactive.py
+++ b/charm4py/interactive.py
@@ -13,7 +13,7 @@
 
 def future_():
     f = Future()
-    charm.dynamic_register['_f'] = f
+    charm.dynamic_register["_f"] = f
     return f
 
 
@@ -22,36 +22,37 @@ class InteractiveConsole(Chare, InteractiveInterpreter):
     def __init__(self, args):
         global Charm4PyError
         from .charm import Charm4PyError
+
         # restore original tty stdin and stdout (else readline won't work correctly)
         os.dup2(charm.origStdinFd, 0)
         os.dup2(charm.origStoutFd, 1)
-        charm.dynamic_register['future'] = future_
-        charm.dynamic_register['self'] = self
+        charm.dynamic_register["future"] = future_
+        charm.dynamic_register["self"] = self
         InteractiveInterpreter.__init__(self, locals=charm.dynamic_register)
-        self.filename = '<console>'
+        self.filename = "<console>"
         self.resetbuffer()
         # regexp to detect when user defines a new chare type
-        self.regexpChareDefine = re.compile('class\s*(\S+)\s*\(.*Chare.*\)\s*:')
+        self.regexpChareDefine = re.compile("class\s*(\S+)\s*\(.*Chare.*\)\s*:")
         # regexps to detect import statements
-        self.regexpImport1 = re.compile('\s*from\s*(\S+) import')
-        self.regexpImport2 = re.compile('import\s*(\S+)')
+        self.regexpImport1 = re.compile("\s*from\s*(\S+) import")
+        self.regexpImport2 = re.compile("import\s*(\S+)")
         self.options = charm.options.interactive
 
         try:
             import readline
-            import rlcompleter
-            readline.parse_and_bind('tab: complete')
+
+            readline.parse_and_bind("tab: complete")
         except:
             pass
 
         try:
             sys.ps1
         except AttributeError:
-            sys.ps1 = '>>> '
+            sys.ps1 = ">>> "
         try:
             sys.ps2
         except AttributeError:
-            sys.ps2 = '... '
+            sys.ps2 = "... "
         self.thisProxy.start()
 
     def resetbuffer(self):
@@ -69,8 +70,10 @@ def write(self, data, sched=True):
 
     @coro
     def start(self):
-        self.write('\nCharm4py interactive shell (beta)\n')
-        self.write('charm.options.interactive.verbose = ' + str(self.options.verbose) + '\n')
+        self.write("\nCharm4py interactive shell (beta)\n")
+        self.write(
+            "charm.options.interactive.verbose = " + str(self.options.verbose) + "\n"
+        )
 
         charm.scheduleCallableAfter(self.thisProxy.hang_check_phase1, HANG_CHECK_FREQ)
         self.monitorFutures = []
@@ -89,18 +92,18 @@ def start(self):
                         line = self.raw_input(prompt)
                         tick = time.time()
                     except EOFError:
-                        self.write('\n')
+                        self.write("\n")
                         break
                     else:
                         more = self.push(line)
                 except KeyboardInterrupt:
-                    self.write('\nKeyboardInterrupt\n')
+                    self.write("\nKeyboardInterrupt\n")
                     self.resetbuffer()
                     more = 0
 
     def push(self, line):
         self.buffer.append(line)
-        source = '\n'.join(self.buffer)
+        source = "\n".join(self.buffer)
         more = self.runsource(source, self.filename)
         if not more:
             self.resetbuffer()
@@ -113,15 +116,17 @@ def runcode(self, code):
                 m = self.regexpChareDefine.search(line)
                 if m is not None:
                     newChareTypeName = m.group(1)
-                    source = '\n'.join(self.buffer)
-                    charm.thisProxy.registerNewChareType(newChareTypeName, source, awaitable=True).get()
+                    source = "\n".join(self.buffer)
+                    charm.thisProxy.registerNewChareType(
+                        newChareTypeName, source, awaitable=True
+                    ).get()
                     if self.options.verbose > 0:
-                        self.write('Charm4py> Broadcasted Chare definition\n')
+                        self.write("Charm4py> Broadcasted Chare definition\n")
                     return
 
             line = self.buffer[0]
             module_name = None
-            if 'import' in line:
+            if "import" in line:
                 m = self.regexpImport1.search(line)
                 if m is not None:
                     module_name = m.group(1)
@@ -135,34 +140,50 @@ def runcode(self, code):
                 if module_name not in sys.modules:  # error importing the module
                     return
                 if self.options.broadcast_imports:
-                    charm.thisProxy.rexec('\n'.join(self.buffer), awaitable=True).get()
+                    charm.thisProxy.rexec("\n".join(self.buffer), awaitable=True).get()
                     if self.options.verbose > 0:
-                        self.write('Charm4py> Broadcasted import statement\n')
+                        self.write("Charm4py> Broadcasted import statement\n")
 
                 new_modules = set(sys.modules.keys()) - prev_modules
                 chare_types = []
                 for module_name in new_modules:
                     try:
-                        members = inspect.getmembers(sys.modules[module_name], inspect.isclass)
+                        members = inspect.getmembers(
+                            sys.modules[module_name], inspect.isclass
+                        )
                     except:
                         # some modules can throw exceptions with inspect.getmembers, ignoring them for now
                         continue
                     for C_name, C in members:
-                        if C.__module__ != chare.__name__ and hasattr(C, 'mro'):
+                        if C.__module__ != chare.__name__ and hasattr(C, "mro"):
                             if chare.ArrayMap in C.mro():
                                 chare_types.append(C)
                             elif Chare in C.mro():
                                 chare_types.append(C)
-                            elif chare.Group in C.mro() or chare.Array in C.mro() or chare.Mainchare in C.mro():
-                                raise Charm4PyError('Chares must not inherit from Group, Array or'
-                                                    ' Mainchare. Refer to new API')
+                            elif (
+                                chare.Group in C.mro()
+                                or chare.Array in C.mro()
+                                or chare.Mainchare in C.mro()
+                            ):
+                                raise Charm4PyError(
+                                    "Chares must not inherit from Group, Array or"
+                                    " Mainchare. Refer to new API"
+                                )
                 if len(chare_types) > 0:
                     if self.options.broadcast_imports:
-                        charm.thisProxy.registerNewChareTypes(chare_types, awaitable=True).get()
+                        charm.thisProxy.registerNewChareTypes(
+                            chare_types, awaitable=True
+                        ).get()
                         if self.options.verbose > 0:
-                            self.write('Broadcasted the following chare definitions: ' + str([str(C) for C in chare_types]) + '\n')
+                            self.write(
+                                "Broadcasted the following chare definitions: "
+                                + str([str(C) for C in chare_types])
+                                + "\n"
+                            )
                     else:
-                        self.write('Charm4py> ERROR: import module(s) contain Chare definitions but the import was not broadcasted\n')
+                        self.write(
+                            "Charm4py> ERROR: import module(s) contain Chare definitions but the import was not broadcasted\n"
+                        )
                 return
         except:
             self.showtraceback()
@@ -171,14 +192,18 @@ def runcode(self, code):
         InteractiveInterpreter.runcode(self, code)
         self.interactive_running = False
 
-    def raw_input(self, prompt=''):
+    def raw_input(self, prompt=""):
         return input(prompt)
 
     def hang_check_phase1(self):
         self.monitorFutures = [f for f in self.monitorFutures if f.blocked]
         if self.interactive_running:
             for f in charm.threadMgr.futures.values():
-                if f.blocked and not hasattr(f, 'ignorehang') and not hasattr(f, 'timestamp'):
+                if (
+                    f.blocked
+                    and not hasattr(f, "ignorehang")
+                    and not hasattr(f, "timestamp")
+                ):
                     f.timestamp = time.time()
                     self.monitorFutures.append(f)
             for f in self.monitorFutures:
@@ -193,19 +218,27 @@ def hang_check_phase2(self):
         charm.scheduleCallableAfter(self.thisProxy.hang_check_phase1, HANG_CHECK_FREQ)
         for f in monitor_futures:
             if f.blocked:
-                self.write('\nError: system is idle, canceling block on future\n', sched=False)
+                self.write(
+                    "\nError: system is idle, canceling block on future\n", sched=False
+                )
                 charm.threadMgr.cancelFuture(f)
 
     def showtraceback(self):
         error_type, error, tb = sys.exc_info()
-        if hasattr(error, 'remote_stacktrace'):
+        if hasattr(error, "remote_stacktrace"):
             origin, stacktrace = error.remote_stacktrace
-            self.write('----------------- Python Stack Traceback from PE ' + str(origin) + ' -----------------\n')
-            self.write(stacktrace + '\n')
-            self.write(error_type.__name__ + ': ' + str(error) + ' (PE ' + str(origin) + ')\n')
+            self.write(
+                "----------------- Python Stack Traceback from PE "
+                + str(origin)
+                + " -----------------\n"
+            )
+            self.write(stacktrace + "\n")
+            self.write(
+                error_type.__name__ + ": " + str(error) + " (PE " + str(origin) + ")\n"
+            )
         else:
             super(InteractiveConsole, self).showtraceback()
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     charm.start(interactive=True)
diff --git a/charm4py/liveviz.py b/charm4py/liveviz.py
index 0ebe7c19..b675edc8 100644
--- a/charm4py/liveviz.py
+++ b/charm4py/liveviz.py
@@ -3,228 +3,272 @@
 from collections import deque
 import struct
 from itertools import chain
+
 Reducer = charm.reducers
 
 group = None
 
+
 def viz_gather(contribs):
     return list(chain(*contribs))
 
+
 def viz_gather_preprocess(data, contributor):
     return [data]
 
+
 Reducer.addReducer(viz_gather, pre=viz_gather_preprocess)
 
+
 @dataclass
 class Config:
-  version: int = 1
-  isColor: bool = True
-  isPush: bool = True
-  is3d: bool = False
-  min: tuple = field(default_factory=lambda: (0.0, 0.0, 0.0))
-  max: tuple = field(default_factory=lambda: (1.0, 1.0, 1.0))
-  
-  def to_binary(self):
-    # Format: int, int, int, int, [double, double, double, double, double, double]
-    binary_data = struct.pack(">iiii", 
-                            self.version,
-                            1 if self.isColor else 0,
-                            1 if self.isPush else 0,
-                            1 if self.is3d else 0)
-    if self.is3d:
-      binary_data += struct.pack(">dddddd",
-                              self.min[0], self.min[1], self.min[2],
-                              self.max[0], self.max[1], self.max[2])
-    return binary_data
-    
+    version: int = 1
+    isColor: bool = True
+    isPush: bool = True
+    is3d: bool = False
+    min: tuple = field(default_factory=lambda: (0.0, 0.0, 0.0))
+    max: tuple = field(default_factory=lambda: (1.0, 1.0, 1.0))
+
+    def to_binary(self):
+        # Format: int, int, int, int, [double, double, double, double, double, double]
+        binary_data = struct.pack(
+            ">iiii",
+            self.version,
+            1 if self.isColor else 0,
+            1 if self.isPush else 0,
+            1 if self.is3d else 0,
+        )
+        if self.is3d:
+            binary_data += struct.pack(
+                ">dddddd",
+                self.min[0],
+                self.min[1],
+                self.min[2],
+                self.max[0],
+                self.max[1],
+                self.max[2],
+            )
+        return binary_data
+
+
 class Vector3d:
-  def __init__(self, x=0.0, y=0.0, z=0.0):
-    self.x = x
-    self.y = y
-    self.z = z
-  
-  @classmethod
-  def from_bytes(cls, data, offset=0):
-    # Read 3 doubles from the data starting at offset
-    x, y, z = struct.unpack_from(">ddd", data, offset)
-    return cls(x, y, z), offset + 24  # 24 = 3 * 8 bytes (double)
-  
+    def __init__(self, x=0.0, y=0.0, z=0.0):
+        self.x = x
+        self.y = y
+        self.z = z
+
+    @classmethod
+    def from_bytes(cls, data, offset=0):
+        # Read 3 doubles from the data starting at offset
+        x, y, z = struct.unpack_from(">ddd", data, offset)
+        return cls(x, y, z), offset + 24  # 24 = 3 * 8 bytes (double)
+
+
 class ImageRequest:
-  def __init__(self, version, request_type, width, height, 
-              x=None, y=None, z=None, o=None, minZ=0.0, maxZ=0.0):
-    self.version = version
-    self.request_type = request_type
-    self.width = width
-    self.height = height
-    self.x = x
-    self.y = y
-    self.z = z
-    self.o = o
-    self.minZ = minZ
-    self.maxZ = maxZ
-  
-  @classmethod
-  def from_bytes(cls, data):
-    if len(data) < 16:  # At least 4 ints
-      raise ValueError("Not enough data to decode ImageRequest")
-    
-    version, request_type, width, height = struct.unpack_from(">iiii", data, 0)
-    
-    # If there's more data, we have the optional fields
-    if len(data) > 16:
-      offset = 16
-      x, offset = Vector3d.from_bytes(data, offset)
-      y, offset = Vector3d.from_bytes(data, offset)
-      z, offset = Vector3d.from_bytes(data, offset)
-      o, offset = Vector3d.from_bytes(data, offset)
-      minZ, maxZ = struct.unpack_from(">dd", data, offset)
-      
-      return cls(version, request_type, width, height, x, y, z, o, minZ, maxZ)
-    else:
-      return cls(version, request_type, width, height)
-  
+    def __init__(
+        self,
+        version,
+        request_type,
+        width,
+        height,
+        x=None,
+        y=None,
+        z=None,
+        o=None,
+        minZ=0.0,
+        maxZ=0.0,
+    ):
+        self.version = version
+        self.request_type = request_type
+        self.width = width
+        self.height = height
+        self.x = x
+        self.y = y
+        self.z = z
+        self.o = o
+        self.minZ = minZ
+        self.maxZ = maxZ
+
+    @classmethod
+    def from_bytes(cls, data):
+        if len(data) < 16:  # At least 4 ints
+            raise ValueError("Not enough data to decode ImageRequest")
+
+        version, request_type, width, height = struct.unpack_from(">iiii", data, 0)
+
+        # If there's more data, we have the optional fields
+        if len(data) > 16:
+            offset = 16
+            x, offset = Vector3d.from_bytes(data, offset)
+            y, offset = Vector3d.from_bytes(data, offset)
+            z, offset = Vector3d.from_bytes(data, offset)
+            o, offset = Vector3d.from_bytes(data, offset)
+            minZ, maxZ = struct.unpack_from(">dd", data, offset)
+
+            return cls(version, request_type, width, height, x, y, z, o, minZ, maxZ)
+        else:
+            return cls(version, request_type, width, height)
+
+
 @register
 class LiveVizGroup(Chare):
-  
-  def __init__(self, cb, poll):
-    self.callback = cb
-    self.poll = poll
-    charm.CcsRegisterHandler("lvImage", self.image_handler)
-    if poll:
-      self.requests = deque()
-      self.images = deque()
-
-  def send(self, result):
-    image = ByteImage.from_contributions(result, LiveViz.cfg.isColor)
-    if self.poll:
-      if len(self.requests) > 0:
-        req, delayed = self.requests.popleft()
-        output = ByteImage.with_image_in_corner(image, req.width, req.height)
-        charm.CcsSendDelayedReply(delayed, output.to_binary())
-      else:
-        print("sent")
-        self.images.append(image)
-    else:
-      output = ByteImage.with_image_in_corner(image, self.wid, self.ht)
-      charm.CcsSendDelayedReply(self.reply, output.to_binary())
-
-  def image_handler(self, msg):
-    request = ImageRequest.from_bytes(msg)
-    if self.poll:
-      if len(self.images) > 0:
-        output = ByteImage.with_image_in_corner(self.images.popleft(), request.width, request.height)
-        charm.CcsSendReply(output.to_binary())
-      else:
-        self.requests.append((request, charm.CcsDelayReply()))
-    else:
-      self.ht = request.height
-      self.wid = request.width
-      self.callback(request)
-      self.reply = charm.CcsDelayReply()
-  
+
+    def __init__(self, cb, poll):
+        self.callback = cb
+        self.poll = poll
+        charm.CcsRegisterHandler("lvImage", self.image_handler)
+        if poll:
+            self.requests = deque()
+            self.images = deque()
+
+    def send(self, result):
+        image = ByteImage.from_contributions(result, LiveViz.cfg.isColor)
+        if self.poll:
+            if len(self.requests) > 0:
+                req, delayed = self.requests.popleft()
+                output = ByteImage.with_image_in_corner(image, req.width, req.height)
+                charm.CcsSendDelayedReply(delayed, output.to_binary())
+            else:
+                print("sent")
+                self.images.append(image)
+        else:
+            output = ByteImage.with_image_in_corner(image, self.wid, self.ht)
+            charm.CcsSendDelayedReply(self.reply, output.to_binary())
+
+    def image_handler(self, msg):
+        request = ImageRequest.from_bytes(msg)
+        if self.poll:
+            if len(self.images) > 0:
+                output = ByteImage.with_image_in_corner(
+                    self.images.popleft(), request.width, request.height
+                )
+                charm.CcsSendReply(output.to_binary())
+            else:
+                self.requests.append((request, charm.CcsDelayReply()))
+        else:
+            self.ht = request.height
+            self.wid = request.width
+            self.callback(request)
+            self.reply = charm.CcsDelayReply()
+
+
 class ByteImage:
-  def __init__(self, data=None, width=0, height=0, is_color=True):
-    """
-    Initialize a byte image
-    
-    Args:
-        data (bytes, optional): Raw image data as bytes, or None to create empty image
-        width (int): Image width in pixels
-        height (int): Image height in pixels 
-        is_color (bool): Whether the image is in color (True) or grayscale (False)
-    """
-    self.width = width
-    self.height = height
-    self.is_color = is_color
-    self.bytes_per_pixel = 3 if is_color else 1
-    
-    if data is not None:
-      self.data = data
-    else:
-      self.data = bytes(width * height * self.bytes_per_pixel)
-  
-  @classmethod
-  def from_contributions(cls, contribs, is_color=True):
-    """
-    Create a ByteImage from multiple contributions, positioning each
-    contribution at the right location.
-    
-    Args:
-        contribs (list): List of tuples with format 
-            (bytes_data, startx, starty, local_height, local_width, total_height, total_width)
-        is_color (bool): Whether the image is in color
-    
-    Returns:
-        ByteImage: A composite image with all contributions in the right positions
-    """        
-    _, _, _, _, _, total_height, total_width = contribs[0]
-    bytes_per_pixel = 3 if is_color else 1
-    
-    buffer = bytearray(total_width * total_height * bytes_per_pixel)
-    
-    for data, startx, starty, local_height, local_width, _, _ in contribs:
-      for y in range(local_height):
-        for x in range(local_width):
-          src_pos = (y * local_width + x) * bytes_per_pixel
-          dst_pos = ((starty + y) * total_width + (startx + x)) * bytes_per_pixel
-          
-          if src_pos + bytes_per_pixel <= len(data):
-            buffer[dst_pos:dst_pos + bytes_per_pixel] = (buffer[dst_pos:dst_pos + bytes_per_pixel] + data[src_pos:src_pos + bytes_per_pixel]) % 256
-    
-    return cls(bytes(buffer), total_width, total_height, is_color)
-  
-  def to_binary(self):
-    return self.data
-
-  @classmethod
-  def with_image_in_corner(cls, src_image, new_width, new_height):
-    """
-    Create a new image with specified dimensions and place the source image
-    in the top left corner.
-    
-    Args:
-        src_image (ByteImage): Source image to place in the corner
-        new_width (int): Width of the new image
-        new_height (int): Height of the new image
-        
-    Returns:
-        ByteImage: A new image with the source image in the top left corner
-    """
-    dest_image = cls(None, new_width, new_height, src_image.is_color)
-    bytes_per_pixel = dest_image.bytes_per_pixel
-    
-    buffer = bytearray(new_width * new_height * bytes_per_pixel)
-    
-    # Calculate dimensions to copy
-    copy_width = min(new_width, src_image.width)
-    copy_height = min(new_height, src_image.height)
-    
-    for y in range(copy_height):
-      for x in range(copy_width):
-        src_pos = (y * src_image.width + x) * bytes_per_pixel
-        
-        dst_pos = (y * new_width + x) * bytes_per_pixel
-        
-        if src_pos + bytes_per_pixel <= len(src_image.data):
-          buffer[dst_pos:dst_pos + bytes_per_pixel] = src_image.data[src_pos:src_pos + bytes_per_pixel]
-    
-    return cls(bytes(buffer), new_width, new_height, src_image.is_color)
+    def __init__(self, data=None, width=0, height=0, is_color=True):
+        """
+        Initialize a byte image
+
+        Args:
+            data (bytes, optional): Raw image data as bytes, or None to create empty image
+            width (int): Image width in pixels
+            height (int): Image height in pixels
+            is_color (bool): Whether the image is in color (True) or grayscale (False)
+        """
+        self.width = width
+        self.height = height
+        self.is_color = is_color
+        self.bytes_per_pixel = 3 if is_color else 1
+
+        if data is not None:
+            self.data = data
+        else:
+            self.data = bytes(width * height * self.bytes_per_pixel)
+
+    @classmethod
+    def from_contributions(cls, contribs, is_color=True):
+        """
+        Create a ByteImage from multiple contributions, positioning each
+        contribution at the right location.
+
+        Args:
+            contribs (list): List of tuples with format
+                (bytes_data, startx, starty, local_height, local_width, total_height, total_width)
+            is_color (bool): Whether the image is in color
+
+        Returns:
+            ByteImage: A composite image with all contributions in the right positions
+        """
+        _, _, _, _, _, total_height, total_width = contribs[0]
+        bytes_per_pixel = 3 if is_color else 1
+
+        buffer = bytearray(total_width * total_height * bytes_per_pixel)
+
+        for data, startx, starty, local_height, local_width, _, _ in contribs:
+            for y in range(local_height):
+                for x in range(local_width):
+                    src_pos = (y * local_width + x) * bytes_per_pixel
+                    dst_pos = (
+                        (starty + y) * total_width + (startx + x)
+                    ) * bytes_per_pixel
+
+                    if src_pos + bytes_per_pixel <= len(data):
+                        buffer[dst_pos : dst_pos + bytes_per_pixel] = (
+                            buffer[dst_pos : dst_pos + bytes_per_pixel]
+                            + data[src_pos : src_pos + bytes_per_pixel]
+                        ) % 256
+
+        return cls(bytes(buffer), total_width, total_height, is_color)
+
+    def to_binary(self):
+        return self.data
+
+    @classmethod
+    def with_image_in_corner(cls, src_image, new_width, new_height):
+        """
+        Create a new image with specified dimensions and place the source image
+        in the top left corner.
+
+        Args:
+            src_image (ByteImage): Source image to place in the corner
+            new_width (int): Width of the new image
+            new_height (int): Height of the new image
+
+        Returns:
+            ByteImage: A new image with the source image in the top left corner
+        """
+        dest_image = cls(None, new_width, new_height, src_image.is_color)
+        bytes_per_pixel = dest_image.bytes_per_pixel
+
+        buffer = bytearray(new_width * new_height * bytes_per_pixel)
+
+        # Calculate dimensions to copy
+        copy_width = min(new_width, src_image.width)
+        copy_height = min(new_height, src_image.height)
+
+        for y in range(copy_height):
+            for x in range(copy_width):
+                src_pos = (y * src_image.width + x) * bytes_per_pixel
+
+                dst_pos = (y * new_width + x) * bytes_per_pixel
+
+                if src_pos + bytes_per_pixel <= len(src_image.data):
+                    buffer[dst_pos : dst_pos + bytes_per_pixel] = src_image.data[
+                        src_pos : src_pos + bytes_per_pixel
+                    ]
+
+        return cls(bytes(buffer), new_width, new_height, src_image.is_color)
+
 
 class LiveViz:
-  cfg = None
-  
-  @classmethod
-  def config_handler(cls, msg):
-    charm.CcsSendReply(cls.cfg.to_binary())
-  
-  @classmethod
-  def deposit(cls, buffer, elem, x, y, ht, wid, g_ht, g_wid):
-    elem.reduce(group.send, data=(buffer,x,y,ht,wid,g_ht,g_wid), reducer=Reducer.viz_gather)
-  
-  @classmethod
-  def init(cls, cfg, cb, poll=False):
-    global group
-    cls.cfg = cfg
-    grp = Chare(LiveVizGroup, args=[cb, poll], onPE=0)
-    charm.thisProxy.updateGlobals({'group': grp}, awaitable=True, module_name='charm4py.liveviz').get()
-    charm.CcsRegisterHandler("lvConfig", cls.config_handler)
+    cfg = None
+
+    @classmethod
+    def config_handler(cls, msg):
+        charm.CcsSendReply(cls.cfg.to_binary())
+
+    @classmethod
+    def deposit(cls, buffer, elem, x, y, ht, wid, g_ht, g_wid):
+        elem.reduce(
+            group.send,
+            data=(buffer, x, y, ht, wid, g_ht, g_wid),
+            reducer=Reducer.viz_gather,
+        )
+
+    @classmethod
+    def init(cls, cfg, cb, poll=False):
+        global group
+        cls.cfg = cfg
+        grp = Chare(LiveVizGroup, args=[cb, poll], onPE=0)
+        charm.thisProxy.updateGlobals(
+            {"group": grp}, awaitable=True, module_name="charm4py.liveviz"
+        ).get()
+        charm.CcsRegisterHandler("lvConfig", cls.config_handler)
diff --git a/charm4py/object_store.py b/charm4py/object_store.py
index 4462e7ef..78fc33bc 100644
--- a/charm4py/object_store.py
+++ b/charm4py/object_store.py
@@ -1,6 +1,11 @@
-from charm4py import charm, Chare, Group, Array, Future, coro, Channel, Reducer, register
+from charm4py import (
+    charm,
+    Chare,
+    register,
+)
 from charm4py.c_object_store import CObjectStore
 
+
 @register
 class ObjectStore(Chare):
     def __init__(self):
@@ -13,27 +18,25 @@ def delete_remote_objects(self, obj_id):
         self._object_store.delete_remote_objects(obj_id)
 
     def delete_object(self, obj_id):
-        """Delete this object from the local object store
-        """
+        """Delete this object from the local object store"""
         self._object_store.delete_object(obj_id)
 
     def lookup_object(self, obj_id):
-        """ Lookup object in local object map
-        """
+        """Lookup object in local object map"""
         return self._object_store.lookup_object(obj_id)
-    
+
     def lookup_location(self, obj_id):
-        """ Lookup location in local location map
+        """Lookup location in local location map
         If not found in local map, send a message to home PE to get the location
         back on this PE
         """
         return self._object_store.lookup_location(obj_id)
-    
+
     def update_location(self, obj_id, pe):
-        """ Update location in local map
+        """Update location in local map
         Check buffers for location requests and object requests
         Also check send buffer to see if any message is buffered to send. This is
-        currently not implemented, currently messages are only buffered at the 
+        currently not implemented, currently messages are only buffered at the
         receiving PE
         """
         self._object_store.update_location(obj_id, pe)
@@ -41,9 +44,9 @@ def update_location(self, obj_id, pe):
 
     def insert_object_small(self, obj_id, obj):
         self._object_store.insert_object_small(obj_id, obj)
-    
+
     def receive_remote_object(self, obj_id, obj):
-        """ Add object to local object map
+        """Add object to local object map
         Then check receive buffer to see if any messages are buffered
         on the receiving end on this PE
         """
@@ -52,19 +55,19 @@ def receive_remote_object(self, obj_id, obj):
         charm.check_futures_buffer(obj_id)
 
     def request_object(self, obj_id, requesting_pe):
-        """ If obj_id is found in the local object map, then send it back to the
+        """If obj_id is found in the local object map, then send it back to the
         requesting PE. Else buffer the request
         """
         self._object_store.request_object(obj_id, requesting_pe)
 
     def request_location(self, obj_id, requesting_pe):
-        """ If location for obj_id is in the local map, then send the location back to the
+        """If location for obj_id is in the local map, then send the location back to the
         requesting PE. Else buffer the request
         """
         self._object_store.request_location(obj_id, requesting_pe)
 
     def request_location_object(self, obj_id, requesting_pe):
-        """ If location for obj_id is in the local map, send a request_location call to
+        """If location for obj_id is in the local map, send a request_location call to
         the location of obj_id and add the requesting PE to the local location map. Else
         buffer the request
         """
@@ -77,7 +80,7 @@ def bulk_send_location(self, obj_id, requesting_pes):
         self._object_store.bulk_send_location(obj_id, requesting_pes)
 
     def create_object(self, obj_id, obj):
-        """ Add the object to the local object map and send an update_location
+        """Add the object to the local object map and send an update_location
         call to the home PE of obj_id
         """
         self._object_store.create_object(obj_id, obj)
diff --git a/charm4py/pool.py b/charm4py/pool.py
index 3ec0e025..0eb404eb 100644
--- a/charm4py/pool.py
+++ b/charm4py/pool.py
@@ -1,4 +1,4 @@
-from . import charm, Chare, Group, Array, coro_ext, threads, Future, register, ray
+from . import charm, Chare, Array, coro_ext, threads, Future, register
 from .charm import Charm4PyError
 from .threads import NotThreadedError
 from collections import defaultdict
@@ -34,7 +34,9 @@ def __init__(self, id, func, tasks, result, ncores, chunksize, is_ray=False):
         self.id = id
         self.max_cores = ncores
         self.n_avail = ncores
-        self.func = func  # if func is not None, function is the same for all tasks in the job
+        self.func = (
+            func  # if func is not None, function is the same for all tasks in the job
+        )
         self.workers = []  # ID of workers who have executed tasks from this job
         self.chunked = chunksize > 1
         self.threaded = False
@@ -43,22 +45,28 @@ def __init__(self, id, func, tasks, result, ncores, chunksize, is_ray=False):
         self.is_ray = is_ray
         assert chunksize > 0
         if func is not None:
-            self.threaded = hasattr(func, '_ck_coro')
+            self.threaded = hasattr(func, "_ck_coro")
         else:
             # this is not efficient, especially considering that we iterate over
             # the tasks again below. This case is only needed for submit(). Might
             # just want to consider removing submit() to simplify code?
             for func_, args in tasks:
-                if hasattr(func_, '_ck_coro'):
+                if hasattr(func_, "_ck_coro"):
                     self.threaded = True
                     break
         if self.chunked:
             if result is None or isinstance(result, threads.Future):
                 self.results = [None] * len(tasks)
                 self.future = result
-                self.tasks = [Chunk(tasks[i:i+chunksize], i) for i in range(0, len(tasks), chunksize)]
+                self.tasks = [
+                    Chunk(tasks[i : i + chunksize], i)
+                    for i in range(0, len(tasks), chunksize)
+                ]
             else:
-                self.tasks = [Chunk(tasks[i:i+chunksize], result[i:i+chunksize]) for i in range(0, len(tasks), chunksize)]
+                self.tasks = [
+                    Chunk(tasks[i : i + chunksize], result[i : i + chunksize])
+                    for i in range(0, len(tasks), chunksize)
+                ]
         else:
             if result is None or isinstance(result, threads.Future):
                 self.results = [None] * len(tasks)
@@ -66,12 +74,17 @@ def __init__(self, id, func, tasks, result, ncores, chunksize, is_ray=False):
                 if func is not None:
                     self.tasks = [Task(args, i) for i, args in enumerate(tasks)]
                 else:
-                    self.tasks = [Task(args, i, func) for i, (func, args) in enumerate(tasks)]
+                    self.tasks = [
+                        Task(args, i, func) for i, (func, args) in enumerate(tasks)
+                    ]
             else:
                 if func is not None:
                     self.tasks = [Task(args, result[i]) for i, args in enumerate(tasks)]
                 else:
-                    self.tasks = [Task(args, result[i], func) for i, (func, args) in enumerate(tasks)]
+                    self.tasks = [
+                        Task(args, result[i], func)
+                        for i, (func, args) in enumerate(tasks)
+                    ]
         # print('Created job with', len(self.tasks), 'tasks')
         self.tasks_pending = len(self.tasks)
 
@@ -102,11 +115,15 @@ def __init__(self):
 
     def __start__(self, func, tasks, result):
         if self.workers is None:
-            assert self.num_workers > 0, 'Run with more than 1 PE to use charm.pool'
+            assert self.num_workers > 0, "Run with more than 1 PE to use charm.pool"
             # first time running a job, create Group of workers
-            print('Initializing charm.pool with', self.num_workers, 'worker PEs. '
-                  'Warning: charm.pool is experimental (API and performance '
-                  'is subject to change)')
+            print(
+                "Initializing charm.pool with",
+                self.num_workers,
+                "worker PEs. "
+                "Warning: charm.pool is experimental (API and performance "
+                "is subject to change)",
+            )
             self.workers = Array(Worker, charm.numPes(), args=[self.thisProxy])
 
         if len(self.job_id_pool) == 0:
@@ -118,10 +135,14 @@ def __start__(self, func, tasks, result):
         if charm.interactive:
             try:
                 if func is not None:
-                    self.workers.check(func.__module__, func.__name__, awaitable=True).get()
+                    self.workers.check(
+                        func.__module__, func.__name__, awaitable=True
+                    ).get()
                 else:
                     for func_, args in tasks:
-                        self.workers.check(func_.__module__, func_.__name__, awaitable=True).get()
+                        self.workers.check(
+                            func_.__module__, func_.__name__, awaitable=True
+                        ).get()
             except Exception as e:
                 if result is None:
                     raise e
@@ -153,13 +174,23 @@ def start(self, func, tasks, result, ncores, chunksize, is_ray=False):
         if ncores < 0:
             ncores = self.num_workers
         elif ncores > self.num_workers:
-            print('charm.pool Warning: requested more cores than are '
-                  'available. Using max available cores')
+            print(
+                "charm.pool Warning: requested more cores than are "
+                "available. Using max available cores"
+            )
             ncores = self.num_workers
 
         self.__start__(func, tasks, result)
 
-        job = Job(self.job_id_pool.pop(), func, tasks, result, ncores, chunksize, is_ray=is_ray)
+        job = Job(
+            self.job_id_pool.pop(),
+            func,
+            tasks,
+            result,
+            ncores,
+            chunksize,
+            is_ray=is_ray,
+        )
         self.__addJob__(job)
 
         if job.chunked:
@@ -215,11 +246,23 @@ def schedule(self):
                         self.workers.elemIdx = (worker_id,)
                     else:
                         self.workers.elemIdx = worker_id
-                                
+
                     if isinstance(task.data, tuple):
-                        job.remote(func, [task.result_dest], job.id, *task.data, is_ray=job.is_ray)
+                        job.remote(
+                            func,
+                            [task.result_dest],
+                            job.id,
+                            *task.data,
+                            is_ray=job.is_ray,
+                        )
                     else:
-                        job.remote(func, [task.result_dest], job.id, task.data, is_ray=job.is_ray)
+                        job.remote(
+                            func,
+                            [task.result_dest],
+                            job.id,
+                            task.data,
+                            is_ray=job.is_ray,
+                        )
 
                 if len(job.tasks) == 0:
                     prev.job_next = job.job_next
@@ -238,7 +281,7 @@ def schedule(self):
                 job = prev.job_next
 
     def taskFinished(self, worker_id, job_id, result=None):
-        #print('Job finished')
+        # print('Job finished')
         job = self.jobs[job_id]
         if job.failed:
             return self.taskError(worker_id, job_id, job.exception)
@@ -246,7 +289,7 @@ def taskFinished(self, worker_id, job_id, result=None):
             if job.chunked:
                 i, results = result
                 n = len(results)
-                job.results[i:i+n] = results
+                job.results[i : i + n] = results
             else:
                 i, _result = result
                 job.results[i] = _result
@@ -272,7 +315,7 @@ def threadResumed(self, worker_id):
         self.idle_workers.discard(worker_id)
 
     def migrated(self):
-        charm.abort('Someone migrated PoolScheduler which is non-migratable')
+        charm.abort("Someone migrated PoolScheduler which is non-migratable")
 
     def taskError(self, worker_id, job_id, exception):
         job = self.jobs[job_id]
@@ -281,7 +324,7 @@ def taskError(self, worker_id, job_id, exception):
         # marking as failed will allow the scheduler to delete it from the linked list
         # NOTE that we will only delete from the 'jobs' list once all the pending tasks are done
         job.failed = True
-        if not hasattr(job, 'future'):
+        if not hasattr(job, "future"):
             if job.chunked:
                 for chunk in job.tasks:
                     for f in chunk.result_dest:
@@ -296,7 +339,7 @@ def taskError(self, worker_id, job_id, exception):
             self.job_id_pool.add(job_id)
             for worker_id in job.workers:
                 self.worker_knows[worker_id].remove(job.id)
-            if hasattr(job, 'future'):
+            if hasattr(job, "future"):
                 if job.future is not None:
                     job.future.send(job.exception)
                 else:
@@ -309,7 +352,9 @@ class Worker(Chare):
 
     def __init__(self, scheduler):
         self.scheduler = scheduler
-        assert len(self.scheduler.elemIdx) > 0  # make sure points to the element, not collection
+        assert (
+            len(self.scheduler.elemIdx) > 0
+        )  # make sure points to the element, not collection
         self.__addThreadEventSubscriber__(scheduler, self.thisIndex)
         # TODO: when to purge entries from this dict?
         self.funcs = {}  # job ID -> function used by this job ID
@@ -334,14 +379,20 @@ def runTask(self, func, result_destination, job_id, *args):
         try:
             result = func(*args)
             if isinstance(result_destination, int):
-                self.scheduler.taskFinished(self.thisIndex, job_id, (result_destination, result))
+                self.scheduler.taskFinished(
+                    self.thisIndex, job_id, (result_destination, result)
+                )
             else:
                 # assume result_destination is a future
                 result_destination.send(result)
                 self.scheduler.taskFinished(self.thisIndex, job_id)
         except Exception as e:
             if isinstance(e, NotThreadedError):
-                e = Charm4PyError('Function ' + str(func) + ' must be decorated with @coro to be able to suspend')
+                e = Charm4PyError(
+                    "Function "
+                    + str(func)
+                    + " must be decorated with @coro to be able to suspend"
+                )
             charm.prepareExceptionForSend(e)
             self.scheduler.taskError(self.thisIndex, job_id, e)
             if not isinstance(result_destination, int):
@@ -356,14 +407,20 @@ def runTask_star(self, func, result_destination, job_id, *args):
         try:
             result = func(*args)
             if isinstance(result_destination, int):
-                self.scheduler.taskFinished(self.thisIndex, job_id, (result_destination, result))
+                self.scheduler.taskFinished(
+                    self.thisIndex, job_id, (result_destination, result)
+                )
             else:
                 # assume result_destination is a future
                 result_destination.send(result)
                 self.scheduler.taskFinished(self.thisIndex, job_id)
         except Exception as e:
             if isinstance(e, NotThreadedError):
-                e = Charm4PyError('Function ' + str(func) + ' must be decorated with @coro to be able to suspend')
+                e = Charm4PyError(
+                    "Function "
+                    + str(func)
+                    + " must be decorated with @coro to be able to suspend"
+                )
             charm.prepareExceptionForSend(e)
             self.scheduler.taskError(self.thisIndex, job_id, e)
             if not isinstance(result_destination, int):
@@ -404,7 +461,9 @@ def runChunk(self, func, result_destination, job_id, *chunk):
 
     def send_chunk_results(self, results, result_destination, job_id):
         if isinstance(result_destination, int):
-            self.scheduler.taskFinished(self.thisIndex, job_id, (result_destination, results))
+            self.scheduler.taskFinished(
+                self.thisIndex, job_id, (result_destination, results)
+            )
         else:
             # assume result_destination is a list of futures
             # TODO: should send all results together to PE where future was created,
@@ -415,7 +474,7 @@ def send_chunk_results(self, results, result_destination, job_id):
 
     def send_chunk_exc(self, e, result_destination, job_id):
         if isinstance(e, NotThreadedError):
-            e = Charm4PyError('Function not decorated with @coro tried to suspend')
+            e = Charm4PyError("Function not decorated with @coro tried to suspend")
         charm.prepareExceptionForSend(e)
         self.scheduler.taskError(self.thisIndex, job_id, e)
         if not isinstance(result_destination, int):
@@ -424,7 +483,9 @@ def send_chunk_exc(self, e, result_destination, job_id):
 
     def check(self, func_module, func_name):
         if charm.options.remote_exec is not True:
-            raise Charm4PyError('Remote code execution is disabled. Set charm.options.remote_exec to True')
+            raise Charm4PyError(
+                "Remote code execution is disabled. Set charm.options.remote_exec to True"
+            )
         eval(func_name, sys.modules[func_module].__dict__)
 
 
@@ -457,10 +518,14 @@ def Task(self, func, args, ret=False, awaitable=False):
     def map(self, func, iterable, chunksize=1, ncores=-1, is_ray=False):
         result = Future(store=is_ray)
         # TODO shouldn't send task objects to a central place. what if they are large?
-        self.pool_scheduler.start(func, iterable, result, ncores, chunksize, is_ray=is_ray)
+        self.pool_scheduler.start(
+            func, iterable, result, ncores, chunksize, is_ray=is_ray
+        )
         return result.get()
 
-    def map_async(self, func, iterable, chunksize=1, ncores=-1, multi_future=False, is_ray=False):
+    def map_async(
+        self, func, iterable, chunksize=1, ncores=-1, multi_future=False, is_ray=False
+    ):
         if self.mype == 0:
             # see deepcopy comment above (only need this for async case since
             # the sync case won't return until all the tasks have finished)
@@ -469,7 +534,9 @@ def map_async(self, func, iterable, chunksize=1, ncores=-1, multi_future=False,
             result = [Future(store=is_ray) for _ in range(len(iterable))]
         else:
             result = Future(store=is_ray)
-        self.pool_scheduler.start(func, iterable, result, ncores, chunksize, is_ray=is_ray)
+        self.pool_scheduler.start(
+            func, iterable, result, ncores, chunksize, is_ray=is_ray
+        )
         return result
 
     # iterable is a sequence of (function, args) tuples
diff --git a/charm4py/ray/api.py b/charm4py/ray/api.py
index c49f6250..6f39840b 100644
--- a/charm4py/ray/api.py
+++ b/charm4py/ray/api.py
@@ -1,20 +1,27 @@
 import types
-from copy import deepcopy
 
 counter = 0
 
+
 def init():
     from charm4py import charm, Group, ObjectStore
+
     global object_store
     object_store = Group(ObjectStore)
-    charm.thisProxy.updateGlobals({'object_store' : object_store,},
-                                  awaitable=True, module_name='charm4py.ray.api').get()
+    charm.thisProxy.updateGlobals(
+        {
+            "object_store": object_store,
+        },
+        awaitable=True,
+        module_name="charm4py.ray.api",
+    ).get()
 
 
 def get_object_store():
     global object_store
     return object_store
 
+
 class RayProxyFunction(object):
     def __init__(self, func):
         self.func = func
@@ -28,21 +35,25 @@ def remote(self, *args, **kwargs):
 
 class RayProxy(object):
     def __init__(self, subclass, args, pe):
-        from charm4py import Chare, register, charm
+        from charm4py import Chare
+
         self.proxy = Chare(subclass, args=args, onPE=pe)
         for f in dir(self.proxy):
-            if not f.startswith('__'):
+            if not f.startswith("__"):
                 setattr(self, f, RayProxyFunction(self.remote_function(f)))
 
     def remote_function(self, f):
         proxy_func = getattr(self.proxy, f)
+
         def call_remote(*args, **kwargs):
             return proxy_func(*args, **kwargs, is_ray=True)
+
         return call_remote
 
 
 def get_ray_class(subclass):
     from charm4py import Chare, register, charm
+
     @register
     class RayChare(Chare):
         @staticmethod
@@ -51,37 +62,47 @@ def remote(*a):
             ray_proxy = RayProxy(subclass, a, counter % charm.numPes())
             counter += 1
             return ray_proxy
+
     return RayChare
 
+
 def get_ray_task(func):
     from charm4py import charm
+
     def task(*args):
         func._ck_coro = True
-        return charm.pool.map_async(func, [args], chunksize=1, multi_future=True, is_ray=True)[0]
+        return charm.pool.map_async(
+            func, [args], chunksize=1, multi_future=True, is_ray=True
+        )[0]
+
     return task
 
+
 def remote(*args, **kwargs):
-    from charm4py import charm, Chare, register
-    
+    from charm4py import Chare, register
+
     num_returns = kwargs.pop("num_returns", 1)
     if len(args) == 1 and len(kwargs) == 0:
         if isinstance(args[0], types.FunctionType):
             args[0].remote = get_ray_task(args[0])
             return args[0]
-        else:       
+        else:
             # decorating without any arguments
-            subclass = type(args[0].__name__, (Chare, args[0]), {"__init__": args[0].__init__})
+            subclass = type(
+                args[0].__name__, (Chare, args[0]), {"__init__": args[0].__init__}
+            )
             register(subclass)
             rayclass = get_ray_class(subclass)
             rayclass.__name__ = args[0].__name__
             return rayclass
     else:
         raise NotImplementedError("Arguments not implemented yet")
-    
+
 
 def get(arg):
     from charm4py import charm
     from ..threads import Future
+
     if isinstance(arg, Future):
         return charm.get_future_value(arg)
     elif isinstance(arg, list):
@@ -94,13 +115,15 @@ def wait(futs, num_returns=1, timeout=None, fetch_local=True):
     if timeout != None or not fetch_local:
         raise NotImplementedError("timeout and fetch_local not implemented yet")
     from charm4py import charm
+
     ready = charm.getany_future_value(futs, num_returns)
     not_ready = list(set(futs) - set(ready))
     return ready, not_ready
 
+
 def put(obj):
-    from ..threads import Future
     from ..charm import charm
+
     fut = charm.threadMgr.createFuture(store=True)
     fut.create_object(obj)
-    return fut
\ No newline at end of file
+    return fut
diff --git a/charm4py/reduction.py b/charm4py/reduction.py
index 03992500..c8e99671 100644
--- a/charm4py/reduction.py
+++ b/charm4py/reduction.py
@@ -3,21 +3,26 @@
 import operator as op
 from itertools import chain
 import sys
+
 if sys.version_info[0] < 3:
     import cPickle
 else:
     import pickle as cPickle
 try:
     import numpy as np
+
     haveNumpy = True
 except ImportError:
     # this is to avoid numpy dependency
     haveNumpy = False
+
     class NumpyDummyModule:
         class ndarray:
             pass
+
         class number:
             pass
+
     np = NumpyDummyModule()
 
 
@@ -26,14 +31,38 @@ class number:
 
 NUM_C_TYPES = 13
 # Set of integer identifiers for C types used with internal reducers
-(C_BOOL, C_CHAR, C_SHORT, C_INT, C_LONG, C_LONG_LONG,
- C_UCHAR, C_USHORT, C_UINT, C_ULONG, C_ULONG_LONG,
- C_FLOAT, C_DOUBLE) = range(NUM_C_TYPES)
+(
+    C_BOOL,
+    C_CHAR,
+    C_SHORT,
+    C_INT,
+    C_LONG,
+    C_LONG_LONG,
+    C_UCHAR,
+    C_USHORT,
+    C_UINT,
+    C_ULONG,
+    C_ULONG_LONG,
+    C_FLOAT,
+    C_DOUBLE,
+) = range(NUM_C_TYPES)
 
 # map names of C types (as they appear in CkReductionTypesExt) to their identifiers
-c_typename_to_id = {'char': C_CHAR, 'short': C_SHORT, 'int': C_INT, 'long': C_LONG, 'long_long': C_LONG_LONG,
-                    'uchar': C_UCHAR, 'ushort': C_USHORT, 'uint': C_UINT, 'ulong': C_ULONG, 'ulong_long': C_ULONG_LONG,
-                    'float': C_FLOAT, 'double': C_DOUBLE, 'bool': C_BOOL}
+c_typename_to_id = {
+    "char": C_CHAR,
+    "short": C_SHORT,
+    "int": C_INT,
+    "long": C_LONG,
+    "long_long": C_LONG_LONG,
+    "uchar": C_UCHAR,
+    "ushort": C_USHORT,
+    "uint": C_UINT,
+    "ulong": C_ULONG,
+    "ulong_long": C_ULONG_LONG,
+    "float": C_FLOAT,
+    "double": C_DOUBLE,
+    "bool": C_BOOL,
+}
 
 
 def _useNumpyForReduction(contribs):
@@ -41,6 +70,8 @@ def _useNumpyForReduction(contribs):
     # always prefer numpy when we can use it to take advantage of speed
     # also, the non-section version will return numpy arrays when possible
     return haveNumpy or isNumpyType
+
+
 # ------------------- Reducers -------------------
 
 
@@ -165,20 +196,23 @@ def __init__(self, charm):
         self.addReducer(_bcast_exc_reducer)
         self.addReducer(gather, pre=gather_preprocess, post=gather_postprocess)
 
-        self.nop     = charm.ReducerType.nop
-        self.sum     = (SUM,     self._sum)     # (internal op code, python reducer)
+        self.nop = charm.ReducerType.nop
+        self.sum = (SUM, self._sum)  # (internal op code, python reducer)
         self.product = (PRODUCT, self._product)
-        self.max     = (MAX,     self._max)
-        self.min     = (MIN,     self._min)
+        self.max = (MAX, self._max)
+        self.min = (MIN, self._min)
         self.logical_and = (AND, self._and)
-        self.logical_or  = (OR,  self._or)
+        self.logical_or = (OR, self._or)
         self.logical_xor = (XOR, self._xor)
 
     def addReducer(self, func, pre=None, post=None):
         if hasattr(self, func.__name__):
             from .charm import Charm4PyError
-            raise Charm4PyError("Reducer with name " + func.__name__ + " already registered")
-        func.hasPreprocess  = False
+
+            raise Charm4PyError(
+                "Reducer with name " + func.__name__ + " already registered"
+            )
+        func.hasPreprocess = False
         func.hasPostprocess = False
         if pre is not None:
             func.hasPreprocess = True
@@ -191,6 +225,7 @@ def addReducer(self, func, pre=None, post=None):
 
 # ------------------- Reduction Manager -------------------
 
+
 class ReductionManager(object):
 
     def __init__(self, charm, reducers):
@@ -204,50 +239,72 @@ def populateConversionTables(self):
         #     - c_type is identifier for C type (C_CHAR, C_SHORT, etc)
         #     - charm_reducer_type is value for internal reducer type as they appear in CkReductionTypesExt
         self.red_table = [[]] * 7
-        self.red_table[SUM]     = [0] * NUM_C_TYPES
+        self.red_table[SUM] = [0] * NUM_C_TYPES
         self.red_table[PRODUCT] = [0] * NUM_C_TYPES
-        self.red_table[MAX]     = [0] * NUM_C_TYPES
-        self.red_table[MIN]     = [0] * NUM_C_TYPES
-        self.red_table[AND]     = [0] * NUM_C_TYPES
-        self.red_table[OR]      = [0] * NUM_C_TYPES
-        self.red_table[XOR]     = [0] * NUM_C_TYPES
-
-        fields = self.charm.lib.getReductionTypesFields()  # get names of fields in CkReductionTypesExt
+        self.red_table[MAX] = [0] * NUM_C_TYPES
+        self.red_table[MIN] = [0] * NUM_C_TYPES
+        self.red_table[AND] = [0] * NUM_C_TYPES
+        self.red_table[OR] = [0] * NUM_C_TYPES
+        self.red_table[XOR] = [0] * NUM_C_TYPES
+
+        fields = (
+            self.charm.lib.getReductionTypesFields()
+        )  # get names of fields in CkReductionTypesExt
         maxFieldVal = max([getattr(self.charm.ReducerType, f) for f in fields])
         # charm_reducer_to_ctype maps the values in CkReductionTypesExt to C type identifier
         self.charm_reducer_to_ctype = [None] * (maxFieldVal + 1)
         for f in fields:
-            if f == 'nop':
+            if f == "nop":
                 continue
-            elif f == 'external_py':
-                op, c_type_str = None, 'char'
-            elif f.startswith('logical'):
-                op, c_type_str = f.split('_')[1:]
+            elif f == "external_py":
+                op, c_type_str = None, "char"
+            elif f.startswith("logical"):
+                op, c_type_str = f.split("_")[1:]
             else:
-                op, c_type_str = f.split('_', 1)        # e.g. from 'sum_long' extracts 'sum' and 'long'
-            ctype_code = c_typename_to_id[c_type_str]   # e.g. map 'long' to C_LONG
-            f_val = getattr(self.charm.ReducerType, f)  # value of the field in CkReductionTypesExt
+                op, c_type_str = f.split(
+                    "_", 1
+                )  # e.g. from 'sum_long' extracts 'sum' and 'long'
+            ctype_code = c_typename_to_id[c_type_str]  # e.g. map 'long' to C_LONG
+            f_val = getattr(
+                self.charm.ReducerType, f
+            )  # value of the field in CkReductionTypesExt
             # print(f, "ctype_code", ctype_code, "f_val=", f_val)
             self.charm_reducer_to_ctype[f_val] = ctype_code
-            if   op == 'sum':     self.red_table[SUM][ctype_code] = f_val
-            elif op == 'product': self.red_table[PRODUCT][ctype_code] = f_val
-            elif op == 'max':     self.red_table[MAX][ctype_code] = f_val
-            elif op == 'min':     self.red_table[MIN][ctype_code] = f_val
-            elif op == 'and':     self.red_table[AND][ctype_code] = f_val
-            elif op == 'or':      self.red_table[OR][ctype_code] = f_val
-            elif op == 'xor':     self.red_table[XOR][ctype_code] = f_val
+            if op == "sum":
+                self.red_table[SUM][ctype_code] = f_val
+            elif op == "product":
+                self.red_table[PRODUCT][ctype_code] = f_val
+            elif op == "max":
+                self.red_table[MAX][ctype_code] = f_val
+            elif op == "min":
+                self.red_table[MIN][ctype_code] = f_val
+            elif op == "and":
+                self.red_table[AND][ctype_code] = f_val
+            elif op == "or":
+                self.red_table[OR][ctype_code] = f_val
+            elif op == "xor":
+                self.red_table[XOR][ctype_code] = f_val
 
         # ------ numpy data types ------
         if haveNumpy:
             # map numpy data types to internal reduction C code identifier
-            self.numpy_type_map = {'bool': C_BOOL, 'int8': C_CHAR, 'int16': C_SHORT,
-                                   'int32': C_INT, 'int64': C_LONG, 'uint8': C_UCHAR,
-                                   'uint16': C_USHORT, 'uint32': C_UINT, 'uint64': C_ULONG,
-                                   #'float16': ?
-                                   'float32': C_FLOAT, 'float64': C_DOUBLE}
-            if np.dtype('int64').itemsize > self.charm.lib.sizeof(C_LONG):
-                self.numpy_type_map['int64']  = C_LONG_LONG
-                self.numpy_type_map['uint64'] = C_ULONG_LONG
+            self.numpy_type_map = {
+                "bool": C_BOOL,
+                "int8": C_CHAR,
+                "int16": C_SHORT,
+                "int32": C_INT,
+                "int64": C_LONG,
+                "uint8": C_UCHAR,
+                "uint16": C_USHORT,
+                "uint32": C_UINT,
+                "uint64": C_ULONG,
+                #'float16': ?
+                "float32": C_FLOAT,
+                "float64": C_DOUBLE,
+            }
+            if np.dtype("int64").itemsize > self.charm.lib.sizeof(C_LONG):
+                self.numpy_type_map["int64"] = C_LONG_LONG
+                self.numpy_type_map["uint64"] = C_ULONG_LONG
 
             # verify that mapping is correct
             for dt, c_type in self.numpy_type_map.items():
@@ -261,29 +318,56 @@ def populateConversionTables(self):
             if self.rev_np_array_type_map[C_LONG] is None:
                 self.rev_np_array_type_map[C_LONG] = np.int_().dtype.name
                 self.rev_np_array_type_map[C_ULONG] = np.uint().dtype.name
-                assert np.dtype('int_').itemsize == self.charm.lib.sizeof(C_LONG)
-                assert np.dtype('uint').itemsize == self.charm.lib.sizeof(C_ULONG)
+                assert np.dtype("int_").itemsize == self.charm.lib.sizeof(C_LONG)
+                assert np.dtype("uint").itemsize == self.charm.lib.sizeof(C_ULONG)
             if self.rev_np_array_type_map[C_LONG_LONG] is None:
                 self.rev_np_array_type_map[C_LONG_LONG] = np.longlong().dtype.name
                 self.rev_np_array_type_map[C_ULONG_LONG] = np.ulonglong().dtype.name
-                assert np.dtype('longlong').itemsize == self.charm.lib.sizeof(C_LONG_LONG)
-                assert np.dtype('ulonglong').itemsize == self.charm.lib.sizeof(C_ULONG_LONG)
+                assert np.dtype("longlong").itemsize == self.charm.lib.sizeof(
+                    C_LONG_LONG
+                )
+                assert np.dtype("ulonglong").itemsize == self.charm.lib.sizeof(
+                    C_ULONG_LONG
+                )
 
         # ------ array.array data types ------
 
         # map array.array data types to internal reduction C code identifier
-        self.array_type_map = {'b': C_CHAR, 'B': C_UCHAR, 'h': C_SHORT, 'H': C_USHORT,
-                               'i': C_INT, 'I': C_UINT, 'l': C_LONG, 'L': C_ULONG,
-                               'f': C_FLOAT, 'd': C_DOUBLE}
+        self.array_type_map = {
+            "b": C_CHAR,
+            "B": C_UCHAR,
+            "h": C_SHORT,
+            "H": C_USHORT,
+            "i": C_INT,
+            "I": C_UINT,
+            "l": C_LONG,
+            "L": C_ULONG,
+            "f": C_FLOAT,
+            "d": C_DOUBLE,
+        }
         if sys.version_info >= (3, 3, 0):
-            self.array_type_map['q'] = C_LONG_LONG
-            self.array_type_map['Q'] = C_ULONG_LONG
+            self.array_type_map["q"] = C_LONG_LONG
+            self.array_type_map["Q"] = C_ULONG_LONG
 
         # verify that mapping is correct
         for dt, c_type in self.array_type_map.items():
             assert array.array(dt).itemsize == self.charm.lib.sizeof(c_type)
 
-        self.rev_array_type_map = ['b', 'b', 'h', 'i', 'l', 'q', 'B', 'H', 'I', 'L', 'Q', 'f', 'd']
+        self.rev_array_type_map = [
+            "b",
+            "b",
+            "h",
+            "i",
+            "l",
+            "q",
+            "B",
+            "H",
+            "I",
+            "L",
+            "Q",
+            "f",
+            "d",
+        ]
         assert len(self.rev_array_type_map) == NUM_C_TYPES
 
         # ------ python data types ------
@@ -337,12 +421,19 @@ def prepare(self, data, reducer, contributor):
             pyReducer = reducer
 
         if pyReducer is None:
-            assert charm_reducer_type > 0, 'Could not find a valid reducer. Check that datatype matches the operator'
+            assert (
+                charm_reducer_type > 0
+            ), "Could not find a valid reducer. Check that datatype matches the operator"
             return (charm_reducer_type, data, c_type)
         else:
-            if not hasattr(pyReducer, 'hasPreprocess'):
+            if not hasattr(pyReducer, "hasPreprocess"):
                 from .charm import Charm4PyError
-                raise Charm4PyError('Invalid reducer ' + str(reducer) + '. Reducers must be functions registered with addReducer')
+
+                raise Charm4PyError(
+                    "Invalid reducer "
+                    + str(reducer)
+                    + ". Reducers must be functions registered with addReducer"
+                )
             if pyReducer.hasPreprocess:
                 data = pyReducer.preprocess(data, contributor)
             rednMsg = ({b"custom_reducer": pyReducer.__name__}, [data])
diff --git a/charm4py/sections.py b/charm4py/sections.py
index 27ae633b..b0d1b4c0 100644
--- a/charm4py/sections.py
+++ b/charm4py/sections.py
@@ -5,7 +5,9 @@
 # Reduction Info object: holds state for an in-progress reduction
 class RedInfo(object):
     def __init__(self):
-        self.ready = False  # got all messages, can reduce and send contribution to the parent
+        self.ready = (
+            False  # got all messages, can reduce and send contribution to the parent
+        )
         self.msgs = []  # list of reduction msgs received on this PE
         self.reducer = None  # reducer function
         self.cb = None  # reduction callback
@@ -20,16 +22,19 @@ def __init__(self):
             self.parent = None
             self.children = []  # these are PE numbers
             self.local_elems = []  # list of local chares that are part of the section
-            self.buffered_msgs = []  # stores msgs received for this section before creation has completed
+            self.buffered_msgs = (
+                []
+            )  # stores msgs received for this section before creation has completed
             self.redno = 0  # current reduction number for this section
             self.reds = []  # list of RedInfo objects for pending reductions
 
-
     def __init__(self):
-        assert not hasattr(charm, 'sectionMgr')
+        assert not hasattr(charm, "sectionMgr")
         charm.sectionMgr = self
         self.profiling = charm.options.profiling
-        self.sections = defaultdict(SectionManager.SectionEntry)  # stores section entries for this PE
+        self.sections = defaultdict(
+            SectionManager.SectionEntry
+        )  # stores section entries for this PE
         self.send_ep = self.thisProxy.sendToSection.ep
 
     def createSectionDown(self, sid, pes, parent=None):
@@ -54,7 +59,7 @@ def createSectionDown(self, sid, pes, parent=None):
         entry.buffered_msgs = []
         self.releaseRed(sid, entry, entry.reds)
 
-    @when('cons is not None or gid in charm.groups')
+    @when("cons is not None or gid in charm.groups")
     def createGroupSectionDown(self, sid, gid, pes, parent=None, cons=None):
         entry = self.sections[sid]
         entry.final = True
@@ -111,8 +116,9 @@ def sendToSectionLocal(self, sid, ep, header, *args):
                 em = charm.runningEntryMethod
                 em.startMeasuringSendTime()
             msg = charm.packMsg(None, [sid, ep, header] + list(args), {})
-            charm.lib.CkGroupSendMulti(self.thisProxy.gid, entry.children,
-                                       self.send_ep, msg)
+            charm.lib.CkGroupSendMulti(
+                self.thisProxy.gid, entry.children, self.send_ep, msg
+            )
             del msg
             if profiling:
                 em.stopMeasuringSendTime()
@@ -136,7 +142,9 @@ def sendToSection(self, sid, ep, header, *args):
             # (thus avoiding any copies)
             charm.lib.sendToSection(self.thisProxy.gid, entry.children)
             if profiling:
-                charm.recordSend(charm.msg_recv_stats[4])  # send size is same as last received msg size
+                charm.recordSend(
+                    charm.msg_recv_stats[4]
+                )  # send size is same as last received msg size
                 em.stopMeasuringSendTime()
 
         for obj in entry.local_elems:
@@ -182,9 +190,13 @@ def releaseRed(self, sid, entry, reds):
                         redinfo.cb(reduced_data)
                     else:
                         if reducer == Reducer._bcast_exc_reducer:
-                            entry.parent.contrib(sid, entry.redno - 1, reduced_data, reducer, None)
+                            entry.parent.contrib(
+                                sid, entry.redno - 1, reduced_data, reducer, None
+                            )
                         else:
-                            entry.parent.contrib(sid, entry.redno - 1, reduced_data, None, None)
+                            entry.parent.contrib(
+                                sid, entry.redno - 1, reduced_data, None, None
+                            )
             else:
                 return
 
diff --git a/charm4py/threads.py b/charm4py/threads.py
index 3c6f0547..5cd84b57 100644
--- a/charm4py/threads.py
+++ b/charm4py/threads.py
@@ -4,9 +4,9 @@
 # Future IDs (fids) are sometimes carried as reference numbers inside
 # Charm++ CkCallback objects. The data type most commonly used for
 # this is unsigned short, hence this limit
-# FIXME: This could fail according to the above warning, 
+# FIXME: This could fail according to the above warning,
 # but we need large number of futures for the ray
-# programming model. 
+# programming model.
 FIDMAXVAL = 4294967295
 
 
@@ -26,16 +26,21 @@ def __init__(self, msg):
 # See commit 25e2935 if need to resurrect code where proxies were included when
 # futures were pickled.
 
+
 class Future(object):
 
     def __init__(self, fid, gr, src, num_vals, store=False):
         self.fid = fid  # unique future ID within the process that created it
         self.gr = gr  # greenlet that created the future
-        self.src = src  # PE where the future was created (not used for collective futures)
+        self.src = (
+            src  # PE where the future was created (not used for collective futures)
+        )
         self.nvals = num_vals  # number of values that the future expects to receive
         self.values = []  # values of the future
         self.blocked = False  # flag to check if creator thread is blocked on the future
-        self.gotvalues = False  # flag to check if expected number of values have been received
+        self.gotvalues = (
+            False  # flag to check if expected number of values have been received
+        )
         self.error = None  # if the future receives an Exception, it is set here
         if store:
             self.store_id = (self.src << 32) + self.fid
@@ -48,10 +53,11 @@ def __init__(self, fid, gr, src, num_vals, store=False):
         self.borrow_depth = 0
 
     def get(self):
-        """ Blocking call on current entry method's thread to obtain the values of the
-            future. If the values are already available then they are returned immediately.
+        """Blocking call on current entry method's thread to obtain the values of the
+        future. If the values are already available then they are returned immediately.
         """
         from .charm import charm
+
         if self.store:
             return charm.get_future_value(self)
         else:
@@ -74,7 +80,7 @@ def waitReady(self, f):
         self.blocked = 2
 
     def send(self, result=None):
-        """ Send a value to this future. """
+        """Send a value to this future."""
         if self.store:
             self.create_object(result)
         else:
@@ -87,7 +93,7 @@ def getTargetProxyEntryMethod(self):
         return charm.thisProxy[self.src]._future_deposit_result
 
     def deposit(self, result):
-        """ Deposit a value for this future. """
+        """Deposit a value for this future."""
         self.values.append(result)
         if isinstance(result, Exception):
             self.error = result
@@ -108,55 +114,67 @@ def resume(self, threadMgr):
 
     def lookup_location(self):
         from .charm import charm
+
         if not self.store:
-            raise ValueError("Operation not supported for future not"
-                             " stored in the object store")
+            raise ValueError(
+                "Operation not supported for future not" " stored in the object store"
+            )
         obj_store = get_object_store()
         local_obj_store = obj_store[charm.myPe()].ckLocalBranch()
         return local_obj_store.lookup_location(self.store_id)
-    
+
     def lookup_object(self):
         from .charm import charm
+
         if not self.store:
-            raise ValueError("Operation not supported for future not"
-                             " stored in the object store")
+            raise ValueError(
+                "Operation not supported for future not" " stored in the object store"
+            )
         obj_store = get_object_store()
         local_obj_store = obj_store[charm.myPe()].ckLocalBranch()
         return local_obj_store.lookup_object(self.store_id)
-    
+
     def delete_object(self):
         from .charm import charm
+
         if not self.store:
-            raise ValueError("Operation not supported for future not"
-                             " stored in the object store")
+            raise ValueError(
+                "Operation not supported for future not" " stored in the object store"
+            )
         obj_store = get_object_store()
         obj_store[self.store_id % charm.numPes()].delete_remote_objects(self.store_id)
-    
+
     def is_local(self):
         if not self.store:
-            raise ValueError("Operation not supported for future not"
-                             " stored in the object store")
-        return not (self.lookup_object() is None)
-    
+            raise ValueError(
+                "Operation not supported for future not" " stored in the object store"
+            )
+        return self.lookup_object() is not None
+
     def create_object(self, obj):
         from .charm import charm
+
         if not self.store:
-            raise ValueError("Operation not supported for future not"
-                             " stored in the object store")
+            raise ValueError(
+                "Operation not supported for future not" " stored in the object store"
+            )
         obj_store = get_object_store()
         local_obj_store = obj_store[charm.myPe()].ckLocalBranch()
         local_obj_store.create_object(self.store_id, obj)
 
     def request_object(self):
         if not self.store:
-            raise ValueError("Operation not supported for future not"
-                             " stored in the object store")
+            raise ValueError(
+                "Operation not supported for future not" " stored in the object store"
+            )
         if self._requested:
             return
         from .charm import charm
+
         obj_store = get_object_store()
         obj_store[self.store_id % charm.numPes()].request_location_object(
-            self.store_id, charm.myPe())
+            self.store_id, charm.myPe()
+        )
         self._requested = True
 
     def __getstate__(self):
@@ -180,12 +198,14 @@ def __del__(self):
         if self.store:
             if self.parent == None and self.num_borrowers == 0:
                 # This is the owner, delete the object from the object store
-                #print("Deleting owner", self.store_id)
+                # print("Deleting owner", self.store_id)
                 self.delete_object()
             else:
                 # this is a borrower, notify its parent of the deletion
-                #print("Deleting", self.store_id, "from", charm.myPe(), "sending notify to", self.parent)
-                charm.thisProxy[self.parent].notify_future_deletion(self.store_id, self.borrow_depth - 1)
+                # print("Deleting", self.store_id, "from", charm.myPe(), "sending notify to", self.parent)
+                charm.thisProxy[self.parent].notify_future_deletion(
+                    self.store_id, self.borrow_depth - 1
+                )
 
 
 class CollectiveFuture(Future):
@@ -235,6 +255,7 @@ class EntryMethodThreadManager(object):
     def __init__(self, _charm):
         global charm, Charm4PyError, threadMgr
         from .charm import Charm4PyError
+
         charm = _charm
         threadMgr = self
         self.options = charm.options
@@ -256,17 +277,23 @@ def isMainThread(self):
 
     def objMigrating(self, obj):
         if obj._numthreads > 0:
-            raise Charm4PyError('Migration of chares with active threads is not currently supported')
+            raise Charm4PyError(
+                "Migration of chares with active threads is not currently supported"
+            )
 
     def throwNotThreadedError(self):
-        raise NotThreadedError("Method '" + charm.last_em_exec.C.__name__ + "." +
-                               charm.last_em_exec.name +
-                               "' must be a couroutine to be able to suspend (decorate it with @coro)")
+        raise NotThreadedError(
+            "Method '"
+            + charm.last_em_exec.C.__name__
+            + "."
+            + charm.last_em_exec.name
+            + "' must be a couroutine to be able to suspend (decorate it with @coro)"
+        )
 
     def pauseThread(self):
-        """ Called by an entry method thread to wait for something.
-            Returns data that the thread was waiting for, or None if it was
-            waiting for an event
+        """Called by an entry method thread to wait for something.
+        Returns data that the thread was waiting for, or None if it was
+        waiting for an event
         """
         gr = getcurrent()
         main_gr = self.main_gr
@@ -287,10 +314,10 @@ def pauseThread(self):
             return main_gr.switch()
 
     def _resumeThread(self, gr, arg):
-        """ Deposit a result or signal that a local entry method thread is waiting on,
-            and resume it. This executes on the main thread.
+        """Deposit a result or signal that a local entry method thread is waiting on,
+        and resume it. This executes on the main thread.
         """
-        #assert getcurrent() == self.main_gr
+        # assert getcurrent() == self.main_gr
         if gr.notify:
             obj = gr.obj
             obj._thread_notify_target.threadResumed(obj._thread_notify_data)
@@ -309,14 +336,14 @@ def resumeThread_prof(self, gr, arg):
             ems[-1].startMeasuringTime()
 
     def createFuture(self, num_vals=1, store=False):
-        """ Creates a new Future object by obtaining a unique (local) future ID. """
+        """Creates a new Future object by obtaining a unique (local) future ID."""
         gr = getcurrent()
         if not store and gr == self.main_gr:
             self.throwNotThreadedError()
         # get a unique local Future ID
         global FIDMAXVAL
         futures = self.futures
-        assert len(futures) < FIDMAXVAL, 'Too many pending futures, cannot create more'
+        assert len(futures) < FIDMAXVAL, "Too many pending futures, cannot create more"
         fid = (self.lastfid % FIDMAXVAL) + 1
         while fid in futures:
             fid = (fid % FIDMAXVAL) + 1
@@ -326,7 +353,7 @@ def createFuture(self, num_vals=1, store=False):
         return f
 
     def createCollectiveFuture(self, fid, obj, proxy):
-        """ fid is supplied in this case and has to be the same for all distributed chares """
+        """fid is supplied in this case and has to be the same for all distributed chares"""
         gr = getcurrent()
         if gr == self.main_gr:
             self.throwNotThreadedError()
@@ -335,13 +362,15 @@ def createCollectiveFuture(self, fid, obj, proxy):
         return f
 
     def depositFuture(self, fid, result):
-        """ Set a value of a future that is being managed by this ThreadManager. """
+        """Set a value of a future that is being managed by this ThreadManager."""
         futures = self.futures
         try:
             f = futures[fid]
         except KeyError:
-            raise Charm4PyError('No pending future with fid=' + str(fid) + '. A common reason is '
-                                'sending to a future that already received its value(s)')
+            raise Charm4PyError(
+                "No pending future with fid=" + str(fid) + ". A common reason is "
+                "sending to a future that already received its value(s)"
+            )
         if f.deposit(result):
             del futures[fid]
             # resume if a thread is blocked on the future
diff --git a/charm4py/wait.py b/charm4py/wait.py
index 355f5a4b..158ae91c 100644
--- a/charm4py/wait.py
+++ b/charm4py/wait.py
@@ -14,19 +14,19 @@ class MsgTagCond(object):
     group = True
 
     def __init__(self, cond_str, attrib_name, arg_idx):
-        self.cond_str    = cond_str
+        self.cond_str = cond_str
         self.attrib_name = attrib_name
-        self.arg_idx     = arg_idx
+        self.arg_idx = arg_idx
 
     def evaluateWhen(self, obj, args):
         return args[self.arg_idx] == getattr(obj, self.attrib_name)
 
     def createWaitCondition(self):
         c = object.__new__(MsgTagCond)
-        c.cond_str    = self.cond_str
+        c.cond_str = self.cond_str
         c.attrib_name = self.attrib_name
-        c.arg_idx     = self.arg_idx
-        c.wait_queue  = defaultdict(list)
+        c.arg_idx = self.arg_idx
+        c.wait_queue = defaultdict(list)
         return c
 
     def enqueue(self, elem):
@@ -62,17 +62,17 @@ class ChareStateMsgCond(object):
     group = False
 
     def __init__(self, cond_str, cond_func):
-        self.cond_str  = cond_str
+        self.cond_str = cond_str
         self.cond_func = cond_func
 
     def createWaitCondition(self):
         c = object.__new__(ChareStateMsgCond)
-        c.cond_str  = self.cond_str
+        c.cond_str = self.cond_str
         c.cond_func = self.cond_func
         return c
 
     def evaluateWhen(self, obj, args):
-        #return eval(cond_str)    # eval is very slow
+        # return eval(cond_str)    # eval is very slow
         return self.cond_func(obj, args)
 
     def enqueue(self, elem):
@@ -80,7 +80,7 @@ def enqueue(self, elem):
 
     def check(self, obj):
         t, em, header, args = self.elem
-        #if eval(me.cond_str):    # eval is very slow
+        # if eval(me.cond_str):    # eval is very slow
         if self.cond_func(obj, args):
             em.run(obj, header, args)
             return True, True
@@ -112,20 +112,21 @@ class ChareStateCond(object):
     group = True
 
     def __init__(self, cond_str, module_name):
-        self.cond_str  = cond_str
+        self.cond_str = cond_str
         self.globals_module_name = module_name
-        self.cond_func = eval('lambda self: ' + cond_str,
-                              import_module(module_name).__dict__)
+        self.cond_func = eval(
+            "lambda self: " + cond_str, import_module(module_name).__dict__
+        )
 
     def createWaitCondition(self):
         c = object.__new__(ChareStateCond)
-        c.cond_str   = self.cond_str
-        c.cond_func  = self.cond_func
+        c.cond_str = self.cond_str
+        c.cond_func = self.cond_func
         c.wait_queue = []
         return c
 
     def evaluateWhen(self, obj, args):
-        #return eval(me.cond_str)   # eval is very slow
+        # return eval(me.cond_str)   # eval is very slow
         return self.cond_func(obj)
 
     def enqueue(self, elem):
@@ -133,7 +134,7 @@ def enqueue(self, elem):
 
     def check(self, obj):
         dequeued = False
-        #while eval(me.cond_str):   # eval is very slow
+        # while eval(me.cond_str):   # eval is very slow
         while self.cond_func(obj):
             elem = self.wait_queue.pop()
             if elem[0] == 0:
@@ -153,16 +154,20 @@ def __getstate__(self):
         return self.cond_str, self.wait_queue, self._cond_next, self.globals_module_name
 
     def __setstate__(self, state):
-        self.cond_str, self.wait_queue, self._cond_next, self.globals_module_name = state
-        self.cond_func = eval('lambda self: ' + self.cond_str,
-                              import_module(self.globals_module_name).__dict__)
+        self.cond_str, self.wait_queue, self._cond_next, self.globals_module_name = (
+            state
+        )
+        self.cond_func = eval(
+            "lambda self: " + self.cond_str,
+            import_module(self.globals_module_name).__dict__,
+        )
 
 
 def is_tag_cond(root_ast):
-    """ Determine if the AST corresponds to a 'when' condition of the form
-        `self.xyz == args[x]` where xyz is the name of an attribute, x is an
-        integer. if True, returns the condition string, the name of the attribute
-        (e.g. xyz) and the integer index (e.g. x). Otherwise returns None """
+    """Determine if the AST corresponds to a 'when' condition of the form
+    `self.xyz == args[x]` where xyz is the name of an attribute, x is an
+    integer. if True, returns the condition string, the name of the attribute
+    (e.g. xyz) and the integer index (e.g. x). Otherwise returns None"""
     try:
         if not isinstance(root_ast.body, ast.Compare):
             return None
@@ -181,10 +186,10 @@ def is_tag_cond(root_ast):
         elif isinstance(right, ast.Attribute) and (isinstance(left, ast.Subscript)):
             attrib, args = right, left
 
-        if (attrib is None) or (attrib.value.id != 'self'):
+        if (attrib is None) or (attrib.value.id != "self"):
             return None
 
-        if args.value.id != 'args':
+        if args.value.id != "args":
             return None
 
         idx = args.slice.value
@@ -192,11 +197,11 @@ def is_tag_cond(root_ast):
             idx = idx.n
         elif isinstance(idx, ast.Constant):
             idx = idx.value
-        
+
         if not isinstance(idx, int):
             return None
 
-        return ('self.' + attrib.attr + ' == args[' + str(idx) + ']', attrib.attr, idx)
+        return ("self." + attrib.attr + " == args[" + str(idx) + "]", attrib.attr, idx)
     except:
         return None
 
@@ -208,18 +213,25 @@ def __init__(self, method_arguments):
         self.num_msg_args = 0
 
     def visit_Attribute(self, node):
-        if isinstance(node.value, ast.Name) and node.value.id in self.method_arguments and node.value.id != 'self':
+        if (
+            isinstance(node.value, ast.Name)
+            and node.value.id in self.method_arguments
+            and node.value.id != "self"
+        ):
             idx = self.method_arguments[node.value.id]
             self.num_msg_args += 1
-            return ast.copy_location(ast.Attribute(
-                value=ast.Subscript(
-                    value=ast.Name(id='args', ctx=ast.Load()),
-                    slice=ast.Index(value=ast.Num(n=idx)),
-                    ctx=node.ctx
+            return ast.copy_location(
+                ast.Attribute(
+                    value=ast.Subscript(
+                        value=ast.Name(id="args", ctx=ast.Load()),
+                        slice=ast.Index(value=ast.Num(n=idx)),
+                        ctx=node.ctx,
+                    ),
+                    attr=node.attr,
+                    ctx=node.ctx,
                 ),
-                attr=node.attr,
-                ctx=node.ctx
-            ), node)
+                node,
+            )
         else:
             return self.generic_visit(node)
 
@@ -227,27 +239,31 @@ def visit_Name(self, node):
         if node.id in self.method_arguments:
             idx = self.method_arguments[node.id]
             self.num_msg_args += 1
-            return ast.copy_location(ast.Subscript(
-                value=ast.Name(id='args', ctx=ast.Load()),
-                slice=ast.Index(value=ast.Num(n=idx)),
-                ctx=node.ctx
-            ), node)
+            return ast.copy_location(
+                ast.Subscript(
+                    value=ast.Name(id="args", ctx=ast.Load()),
+                    slice=ast.Index(value=ast.Num(n=idx)),
+                    ctx=node.ctx,
+                ),
+                node,
+            )
         else:
             return node
 
 
-#import astunparse
+# import astunparse
+
 
 def parse_cond_str(cond_str, module_name, method_arguments={}):
 
-    #print("Original condition string is", cond_str)
-    t = ast.parse(cond_str, filename='<string>', mode='eval')
+    # print("Original condition string is", cond_str)
+    t = ast.parse(cond_str, filename="<string>", mode="eval")
     if len(method_arguments) > 0:
         # in the AST, convert names of method arguments to `args[x]`, where x is the
         # position of the argument in the function definition
         transformer = MsgArgsTransformer(method_arguments)
         transformer.visit(t)
-        #print("Transformed to", astunparse.unparse(t), "num args detected=", transformer.num_msg_args)
+        # print("Transformed to", astunparse.unparse(t), "num args detected=", transformer.num_msg_args)
         if transformer.num_msg_args == 0:
             return ChareStateCond(cond_str, module_name)
     else:
@@ -258,11 +274,12 @@ def parse_cond_str(cond_str, module_name, method_arguments={}):
         return MsgTagCond(*tag_cond)
 
     # compile AST to code, then eval to a lambda function
-    new_tree = ast.parse("lambda self, args: x", filename='<string>', mode='eval')
+    new_tree = ast.parse("lambda self, args: x", filename="<string>", mode="eval")
     new_tree.body.body = t.body
     new_tree = ast.fix_missing_locations(new_tree)
-    lambda_func = eval(compile(new_tree, '<string>', 'eval'),
-                       import_module(module_name).__dict__)
+    lambda_func = eval(
+        compile(new_tree, "<string>", "eval"), import_module(module_name).__dict__
+    )
     return ChareStateMsgCond(cond_str, lambda_func)
 
 
diff --git a/charmrun/start.py b/charmrun/start.py
index ea176165..8d354d0d 100644
--- a/charmrun/start.py
+++ b/charmrun/start.py
@@ -12,11 +12,13 @@ def executable_is_python(args):
     Note: Returns true if no executable was found or if an executable
     was found and that executable is a Python file.
     """
+
     def is_exe(fpath):
         return os.path.isfile(fpath) and os.access(fpath, os.X_OK)
 
     def is_pyfile(fpath):
         return os.path.isfile(fpath) and fpath.endswith(".py")
+
     for each in args:
         if is_pyfile(each):
             return True
@@ -31,33 +33,34 @@ def nodelist_islocal(filename, regexp):
         # it is an error if filename doesn't exist, but I'll let charmrun print
         # the error. don't add ++local so that charmrun detects it
         return False
-    with open(filename, 'r') as f:
+    with open(filename, "r") as f:
         for line in f:
             m = regexp.search(line)
-            if m is not None and m.group(1) not in {'localhost', '127.0.0.1'}:
+            if m is not None and m.group(1) not in {"localhost", "127.0.0.1"}:
                 return False
     return True
 
 
 def checkNodeListLocal(args):
     import re
+
     regexp = re.compile("^\s*host\s+(\S+)\s*$")
 
     try:
-        i = args.index('++nodelist')
+        i = args.index("++nodelist")
     except ValueError:
         i = -1
     if i != -1:
-        return nodelist_islocal(args[i+1], regexp)
+        return nodelist_islocal(args[i + 1], regexp)
 
-    if 'NODELIST' in os.environ:
-        return nodelist_islocal(os.environ['NODELIST'], regexp)
+    if "NODELIST" in os.environ:
+        return nodelist_islocal(os.environ["NODELIST"], regexp)
 
-    nodelist_cur_dir = os.path.join(os.getcwd(), 'nodelist')
+    nodelist_cur_dir = os.path.join(os.getcwd(), "nodelist")
     if os.path.exists(nodelist_cur_dir):
         return nodelist_islocal(nodelist_cur_dir, regexp)
 
-    nodelist_home_dir = os.path.join(os.path.expanduser('~'), '.nodelist')
+    nodelist_home_dir = os.path.join(os.path.expanduser("~"), ".nodelist")
     if os.path.exists(nodelist_home_dir):
         return nodelist_islocal(nodelist_home_dir, regexp)
 
@@ -69,13 +72,13 @@ def start(args=[]):
 
     if len(args) == 0:
         args = sys.argv[1:]
-    if '++local' not in args and '++mpiexec' not in args and checkNodeListLocal(args):
-        args.append('++local')
+    if "++local" not in args and "++mpiexec" not in args and checkNodeListLocal(args):
+        args.append("++local")
 
-    if '++interactive' in args and 'charm4py.interactive' not in args:
-        args += ['-m', 'charm4py.interactive']
+    if "++interactive" in args and "charm4py.interactive" not in args:
+        args += ["-m", "charm4py.interactive"]
 
-    cmd = [os.path.join(os.path.dirname(__file__), 'charmrun')]
+    cmd = [os.path.join(os.path.dirname(__file__), "charmrun")]
     if executable_is_python(args):
         # Note: sys.executable is the absolute path to the Python interpreter
         # We only want to invoke the interpreter if the execution target is a
@@ -85,10 +88,10 @@ def start(args=[]):
     try:
         return subprocess.call(cmd)
     except FileNotFoundError:
-        print('charmrun executable not found. You are running \"' + __file__ + '\"')
-        print('Make sure this is a built or installed version of charmrun')
+        print('charmrun executable not found. You are running "' + __file__ + '"')
+        print("Make sure this is a built or installed version of charmrun")
         return 1
 
 
-if __name__ == '__main__':
+if __name__ == "__main__":
     sys.exit(start())
diff --git a/docs/conf.py b/docs/conf.py
index a9fe20af..d9f8388d 100644
--- a/docs/conf.py
+++ b/docs/conf.py
@@ -13,53 +13,51 @@
 # All configuration values have a default; values that are commented out
 # serve to show the default.
 
-import sys
-import os
 
 # If extensions (or modules to document with autodoc) are in another directory,
 # add these directories to sys.path here. If the directory is relative to the
 # documentation root, use os.path.abspath to make it absolute, like shown here.
-#sys.path.insert(0, os.path.abspath('.'))
+# sys.path.insert(0, os.path.abspath('.'))
 
 # -- General configuration ------------------------------------------------
 
 # If your documentation needs a minimal Sphinx version, state it here.
-#needs_sphinx = '1.0'
+# needs_sphinx = '1.0'
 
 # Add any Sphinx extension module names here, as strings. They can be
 # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 # ones.
 extensions = [
-    'sphinx.ext.todo',
+    "sphinx.ext.todo",
 ]
 
 # Add any paths that contain templates here, relative to this directory.
-templates_path = ['_templates']
+templates_path = ["_templates"]
 
 # The suffix(es) of source filenames.
 # You can specify multiple suffix as a list of string:
 # source_suffix = ['.rst', '.md']
-source_suffix = '.rst'
+source_suffix = ".rst"
 
 # The encoding of source files.
-#source_encoding = 'utf-8-sig'
+# source_encoding = 'utf-8-sig'
 
 # The master toctree document.
-master_doc = 'index'
+master_doc = "index"
 
 # General information about the project.
-project = 'Charm4py'
-copyright = '2019, University of Illinois'
-author = 'Juan Galvez'
+project = "Charm4py"
+copyright = "2019, University of Illinois"
+author = "Juan Galvez"
 
 # The version info for the project you're documenting, acts as replacement for
 # |version| and |release|, also used in various other places throughout the
 # built documents.
 
 # The short X.Y version.
-version = '1.1'
+version = "1.1"
 # The full version, including alpha/beta/rc tags.
-release = '1.1'
+release = "1.1"
 
 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.
@@ -70,37 +68,37 @@
 
 # There are two options for replacing |today|: either, you set today to some
 # non-false value, then it is used:
-#today = ''
+# today = ''
 # Else, today_fmt is used as the format for a strftime call.
-#today_fmt = '%B %d, %Y'
+# today_fmt = '%B %d, %Y'
 
 # List of patterns, relative to source directory, that match files and
 # directories to ignore when looking for source files.
-exclude_patterns = ['_build']
+exclude_patterns = ["_build"]
 
 # The reST default role (used for this markup: `text`) to use for all
 # documents.
-#default_role = None
+# default_role = None
 
 # If true, '()' will be appended to :func: etc. cross-reference text.
-#add_function_parentheses = True
+# add_function_parentheses = True
 
 # If true, the current module name will be prepended to all description
 # unit titles (such as .. function::).
-#add_module_names = True
+# add_module_names = True
 
 # If true, sectionauthor and moduleauthor directives will be shown in the
 # output. They are ignored by default.
-#show_authors = False
+# show_authors = False
 
 # The name of the Pygments (syntax highlighting) style to use.
-pygments_style = 'sphinx'
+pygments_style = "sphinx"
 
 # A list of ignored prefixes for module index sorting.
-#modindex_common_prefix = []
+# modindex_common_prefix = []
 
 # If true, keep warnings as "system message" paragraphs in the built documents.
-#keep_warnings = False
+# keep_warnings = False
 
 # If true, `todo` and `todoList` produce output, else they produce nothing.
 todo_include_todos = True
@@ -115,151 +113,144 @@
 # Theme options are theme-specific and customize the look and feel of a theme
 # further.  For a list of options available for each theme, see the
 # documentation.
-#html_theme_options = {}
+# html_theme_options = {}
 
 # Add any paths that contain custom themes here, relative to this directory.
-#html_theme_path = []
+# html_theme_path = []
 
 # The name for this set of Sphinx documents.  If None, it defaults to
 # "<project> v<release> documentation".
-#html_title = None
+# html_title = None
 
 # A shorter title for the navigation bar.  Default is the same as html_title.
-#html_short_title = None
+# html_short_title = None
 
 # The name of an image file (relative to this directory) to place at the top
 # of the sidebar.
-#html_logo = None
+# html_logo = None
 
 # The name of an image file (relative to this directory) to use as a favicon of
 # the docs.  This file should be a Windows icon file (.ico) being 16x16 or 32x32
 # pixels large.
-#html_favicon = None
+# html_favicon = None
 
 # Add any paths that contain custom static files (such as style sheets) here,
 # relative to this directory. They are copied after the builtin static files,
 # so a file named "default.css" will overwrite the builtin "default.css".
-html_static_path = ['_static']
+html_static_path = ["_static"]
 
 # Add any extra paths that contain custom files (such as robots.txt or
 # .htaccess) here, relative to this directory. These files are copied
 # directly to the root of the documentation.
-#html_extra_path = []
+# html_extra_path = []
 
 # If not '', a 'Last updated on:' timestamp is inserted at every page bottom,
 # using the given strftime format.
-#html_last_updated_fmt = '%b %d, %Y'
+# html_last_updated_fmt = '%b %d, %Y'
 
 # If true, SmartyPants will be used to convert quotes and dashes to
 # typographically correct entities.
-#html_use_smartypants = True
+# html_use_smartypants = True
 
 # Custom sidebar templates, maps document names to template names.
-#html_sidebars = {}
+# html_sidebars = {}
 
 # Additional templates that should be rendered to pages, maps page names to
 # template names.
-#html_additional_pages = {}
+# html_additional_pages = {}
 
 # If false, no module index is generated.
-#html_domain_indices = True
+# html_domain_indices = True
 
 # If false, no index is generated.
-#html_use_index = True
+# html_use_index = True
 
 # If true, the index is split into individual pages for each letter.
-#html_split_index = False
+# html_split_index = False
 
 # If true, links to the reST sources are added to the pages.
-#html_show_sourcelink = True
+# html_show_sourcelink = True
 
 # If true, "Created using Sphinx" is shown in the HTML footer. Default is True.
-#html_show_sphinx = True
+# html_show_sphinx = True
 
 # If true, "(C) Copyright ..." is shown in the HTML footer. Default is True.
-#html_show_copyright = True
+# html_show_copyright = True
 
 # If true, an OpenSearch description file will be output, and all pages will
 # contain a <link> tag referring to it.  The value of this option must be the
 # base URL from which the finished HTML is served.
-#html_use_opensearch = ''
+# html_use_opensearch = ''
 
 # This is the file name suffix for HTML files (e.g. ".xhtml").
-#html_file_suffix = None
+# html_file_suffix = None
 
 # Language to be used for generating the HTML full-text search index.
 # Sphinx supports the following languages:
 #   'da', 'de', 'en', 'es', 'fi', 'fr', 'h', 'it', 'ja'
 #   'nl', 'no', 'pt', 'ro', 'r', 'sv', 'tr'
-#html_search_language = 'en'
+# html_search_language = 'en'
 
 # A dictionary with options for the search language support, empty by default.
 # Now only 'ja' uses this config value
-#html_search_options = {'type': 'default'}
+# html_search_options = {'type': 'default'}
 
 # The name of a javascript file (relative to the configuration directory) that
 # implements a search results scorer. If empty, the default will be used.
-#html_search_scorer = 'scorer.js'
+# html_search_scorer = 'scorer.js'
 
 # Output file base name for HTML help builder.
-htmlhelp_basename = 'charm4pydoc'
+htmlhelp_basename = "charm4pydoc"
 
 # -- Options for LaTeX output ---------------------------------------------
 
 latex_elements = {
-# The paper size ('letterpaper' or 'a4paper').
-#'papersize': 'letterpaper',
-
-# The font size ('10pt', '11pt' or '12pt').
-#'pointsize': '10pt',
-
-# Additional stuff for the LaTeX preamble.
-#'preamble': '',
-
-# Latex figure (float) alignment
-#'figure_align': 'htbp',
+    # The paper size ('letterpaper' or 'a4paper').
+    #'papersize': 'letterpaper',
+    # The font size ('10pt', '11pt' or '12pt').
+    #'pointsize': '10pt',
+    # Additional stuff for the LaTeX preamble.
+    #'preamble': '',
+    # Latex figure (float) alignment
+    #'figure_align': 'htbp',
 }
 
 # Grouping the document tree into LaTeX files. List of tuples
 # (source start file, target name, title,
 #  author, documentclass [howto, manual, or own class]).
 latex_documents = [
-    (master_doc, 'charm4py.tex', 'Charm4py Documentation',
-     'Juan Galvez', 'manual'),
+    (master_doc, "charm4py.tex", "Charm4py Documentation", "Juan Galvez", "manual"),
 ]
 
 # The name of an image file (relative to this directory) to place at the top of
 # the title page.
-#latex_logo = None
+# latex_logo = None
 
 # For "manual" documents, if this is true, then toplevel headings are parts,
 # not chapters.
-#latex_use_parts = False
+# latex_use_parts = False
 
 # If true, show page references after internal links.
-#latex_show_pagerefs = False
+# latex_show_pagerefs = False
 
 # If true, show URL addresses after external links.
-#latex_show_urls = False
+# latex_show_urls = False
 
 # Documents to append as an appendix to all manuals.
-#latex_appendices = []
+# latex_appendices = []
 
 # If false, no module index is generated.
-#latex_domain_indices = True
+# latex_domain_indices = True
 
 
 # -- Options for manual page output ---------------------------------------
 
 # One entry per manual page. List of tuples
 # (source start file, name, description, authors, manual section).
-man_pages = [
-    (master_doc, 'charm4py', 'Charm4py Documentation',
-     [author], 1)
-]
+man_pages = [(master_doc, "charm4py", "Charm4py Documentation", [author], 1)]
 
 # If true, show URL addresses after external links.
-#man_show_urls = False
+# man_show_urls = False
 
 
 # -- Options for Texinfo output -------------------------------------------
@@ -268,19 +259,25 @@
 # (source start file, target name, title, author,
 #  dir menu entry, description, category)
 texinfo_documents = [
-    (master_doc, 'charm4py', 'Charm4py Documentation',
-     author, 'charm4py', 'One line description of project.',
-     'Miscellaneous'),
+    (
+        master_doc,
+        "charm4py",
+        "Charm4py Documentation",
+        author,
+        "charm4py",
+        "One line description of project.",
+        "Miscellaneous",
+    ),
 ]
 
 # Documents to append as an appendix to all manuals.
-#texinfo_appendices = []
+# texinfo_appendices = []
 
 # If false, no module index is generated.
-#texinfo_domain_indices = True
+# texinfo_domain_indices = True
 
 # How to display URL addresses: 'footnote', 'no', or 'inline'.
-#texinfo_show_urls = 'footnote'
+# texinfo_show_urls = 'footnote'
 
 # If true, do not generate a @detailmenu in the "Top" node's menu.
-#texinfo_no_detailmenu = False
+# texinfo_no_detailmenu = False
diff --git a/examples/cannon/cannon.py b/examples/cannon/cannon.py
index e2516236..0a068c35 100644
--- a/examples/cannon/cannon.py
+++ b/examples/cannon/cannon.py
@@ -9,10 +9,12 @@
     def njit(func):
         return func
 
+
 @njit
 def matmul(C, A, B):
     C += A @ B
 
+
 class SubMatrix(Chare):
     def __init__(self, subdim_size, charedim, init_done):
         super().__init__()
@@ -22,12 +24,12 @@ def __init__(self, subdim_size, charedim, init_done):
         self.neighbor_cache = {}
 
         self.sub_a = np.ones((subdim_size, subdim_size), dtype=np.float64)
-        self.sub_a[:,:] = (charedim*self.thisIndex[1]) + self.thisIndex[0]
+        self.sub_a[:, :] = (charedim * self.thisIndex[1]) + self.thisIndex[0]
         self.sub_b = np.ones((subdim_size, subdim_size), dtype=np.float64)
-        self.sub_b[:,:] = (charedim*self.thisIndex[0]) + self.thisIndex[1]
+        self.sub_b[:, :] = (charedim * self.thisIndex[0]) + self.thisIndex[1]
 
-        self.recv_a = np.ndarray((subdim_size,subdim_size), dtype=np.float64)
-        self.recv_b = np.ndarray((subdim_size,subdim_size), dtype=np.float64)
+        self.recv_a = np.ndarray((subdim_size, subdim_size), dtype=np.float64)
+        self.recv_b = np.ndarray((subdim_size, subdim_size), dtype=np.float64)
 
         self.sub_c = np.zeros((subdim_size, subdim_size), dtype=np.float64)
 
@@ -40,9 +42,7 @@ def __init__(self, subdim_size, charedim, init_done):
 
     def get_neighbor_channel(self, target_idx):
         if target_idx not in self.neighbor_cache:
-            self.neighbor_cache[target_idx] = Channel(self,
-                                                      self.thisProxy[target_idx]
-                                                      )
+            self.neighbor_cache[target_idx] = Channel(self, self.thisProxy[target_idx])
         return self.neighbor_cache[target_idx]
 
     @coro
@@ -74,12 +74,14 @@ def cannons_multiplication(self, mult_done_future):
 
     # the communication routines should be optimized so both sends/receives can complete in parallel
     def shift(self, up_shift, left_shift):
-        send_target_idx = ((self.thisIndex[0] - up_shift) % self.charedim,
-                           (self.thisIndex[1] - left_shift) % self.charedim
-                           )
-        recv_target_idx = ((self.thisIndex[0] + up_shift) % self.charedim,
-                           (self.thisIndex[1] + left_shift) % self.charedim
-                           )
+        send_target_idx = (
+            (self.thisIndex[0] - up_shift) % self.charedim,
+            (self.thisIndex[1] - left_shift) % self.charedim,
+        )
+        recv_target_idx = (
+            (self.thisIndex[0] + up_shift) % self.charedim,
+            (self.thisIndex[1] + left_shift) % self.charedim,
+        )
 
         send_ch = self.get_neighbor_channel(send_target_idx)
         recv_ch = self.get_neighbor_channel(recv_target_idx)
@@ -95,9 +97,10 @@ def shift(self, up_shift, left_shift):
 def main(args):
     if len(args) < 3:
         print(f"USAGE: {args[0]} matrix_dim chare_dim")
-        print("matrix_dim and chare_dim must be perfect squares "
-              "where matrix_dim is divisible by chare_dim"
-              )
+        print(
+            "matrix_dim and chare_dim must be perfect squares "
+            "where matrix_dim is divisible by chare_dim"
+        )
         charm.exit(1)
     matrix_dim = int(args[1])
     chare_dim = int(args[2])
@@ -111,9 +114,9 @@ def main(args):
     print(f"Size of each chare's sub-array: {8*(subdim_size**2)/(1024**2)}MiB")
 
     init_done = Future()
-    chares = Array(SubMatrix, (chare_dim, chare_dim),
-                   args=[subdim_size, chare_dim, init_done]
-                   )
+    chares = Array(
+        SubMatrix, (chare_dim, chare_dim), args=[subdim_size, chare_dim, init_done]
+    )
     init_done.get()
 
     mult_done_future = Future()
@@ -125,4 +128,5 @@ def main(args):
     print(f"Elapsed time: {tend-tstart}")
     charm.exit()
 
+
 charm.start(main)
diff --git a/examples/ccs/ccs_server.py b/examples/ccs/ccs_server.py
index 5739949c..cde1de09 100644
--- a/examples/ccs/ccs_server.py
+++ b/examples/ccs/ccs_server.py
@@ -1,13 +1,15 @@
-from charm4py import charm, Chare, Array, Future, Reducer, Group
+from charm4py import charm, Chare, Future, Reducer, Group
+
 
 def handler(msg):
     print("CCS Ping handler called on " + str(charm.myPe()))
-    msg = msg.decode('utf-8')
-    msg = msg.rstrip('\x00')
+    msg = msg.decode("utf-8")
+    msg = msg.rstrip("\x00")
     answer = "Hello to sender " + str(msg) + " from PE " + str(charm.myPe()) + ".\n"
-    answer_bytes = answer.encode('utf-8')
+    answer_bytes = answer.encode("utf-8")
     charm.CcsSendReply(answer_bytes)
 
+
 class RegisterPerChare(Chare):
 
     def register(self, return_future, handler):
@@ -15,6 +17,7 @@ def register(self, return_future, handler):
         charm.CcsRegisterHandler("ping", handler)
         self.reduce(return_future, Reducer.nop)
 
+
 def main(args):
     # No need to initialize converse, because charm.start does this
     # just register the handler
@@ -25,4 +28,4 @@ def main(args):
     print("CCS Handlers registered . Waiting for net requests...")
 
 
-charm.start(main)
\ No newline at end of file
+charm.start(main)
diff --git a/examples/cuda/hapi/hapi-cuda-callback.py b/examples/cuda/hapi/hapi-cuda-callback.py
index a7887e52..12690f0d 100644
--- a/examples/cuda/hapi/hapi-cuda-callback.py
+++ b/examples/cuda/hapi/hapi-cuda-callback.py
@@ -3,12 +3,14 @@
 import numba.cuda as cuda
 import numpy as np
 
+
 @cuda.jit
 def elementwise_sum_kernel(x_in, x_out):
     idx = cuda.grid(1)
     if idx < x_in.shape[0]:
         x_out[idx] = x_in[idx] + x_in[idx]
 
+
 def main(args):
     N = 1_000_000
     array_size = (N,)
@@ -33,7 +35,9 @@ def main(args):
     charm.hapiAddCudaCallback(stream_handle, return_fut)
     return_fut.get()
     kernel_done_time = time.perf_counter()
-    print(f"Callback received, kernel finished in {kernel_done_time - start_time:.6f} seconds.")
+    print(
+        f"Callback received, kernel finished in {kernel_done_time - start_time:.6f} seconds."
+    )
 
     B_host = B_gpu.copy_to_host(stream=s)
 
@@ -44,4 +48,5 @@ def main(args):
 
     charm.exit()
 
+
 charm.start(main)
diff --git a/examples/cuda/hapi/multi_gpu_callback.py b/examples/cuda/hapi/multi_gpu_callback.py
index bc4f4926..fe0e311f 100644
--- a/examples/cuda/hapi/multi_gpu_callback.py
+++ b/examples/cuda/hapi/multi_gpu_callback.py
@@ -1,43 +1,48 @@
-'''
+"""
 Use one process to launch two torch matmul kernels, each on a separate device
 A HAPI callback is registered for each kernel
 which triggers two different methods
 Must run this program with 2 different gpus
-'''
+"""
 
 from charm4py import charm
 import torch
 
+
 def main(args):
 
-    N=10000
+    N = 10000
 
     if not torch.cuda.is_available():
         print("Error: No GPU detected")
         charm.exit()
     if torch.cuda.device_count() < 2:
-        print("Error: fewer than 2 GPUs, only " + str(torch.cuda.device_count()) + " gpus found")
+        print(
+            "Error: fewer than 2 GPUs, only "
+            + str(torch.cuda.device_count())
+            + " gpus found"
+        )
         charm.exit()
-    
-    cuda0 = torch.device('cuda:0') #first device
-    cuda1 = torch.device('cuda:1') #second device
+
+    cuda0 = torch.device("cuda:0")  # first device
+    cuda1 = torch.device("cuda:1")  # second device
 
     stream0 = torch.cuda.Stream(device=cuda0)
     stream1 = torch.cuda.Stream(device=cuda1)
 
-    #allocate tensors on device 0
+    # allocate tensors on device 0
     with cuda0:
-        a0 = torch.randn(N,N)
-        b0 = torch.randn(N,N)
+        a0 = torch.randn(N, N)
+        b0 = torch.randn(N, N)
         c0 = torch.mm(a0, b0)
-    
-    #allocate tensors on device 1
+
+    # allocate tensors on device 1
     with cuda1:
-        a1 = torch.randn(N,N)
-        b1 = torch.randn(N,N)
+        a1 = torch.randn(N, N)
+        b1 = torch.randn(N, N)
         c1 = torch.mm(a1, b1)
-    
-    #create callbacks (should we implement callbacks to entry methods?)
+
+    # create callbacks (should we implement callbacks to entry methods?)
     future0 = charm.Future()
     future1 = charm.Future()
     print("Future 0 id: ", future0.fid)
@@ -47,8 +52,9 @@ def main(args):
     charm.hapiAddCudaCallback(stream1.cuda_stream, future1)
 
     for fut_object in charm.iwait(futures):
-        print('One device kernel complete, id: ', fut_object.fid)
+        print("One device kernel complete, id: ", fut_object.fid)
 
     charm.exit()
 
+
 charm.start(main)
diff --git a/examples/dist-task-scheduler/scheduler.py b/examples/dist-task-scheduler/scheduler.py
index 227db11a..45cb679b 100644
--- a/examples/dist-task-scheduler/scheduler.py
+++ b/examples/dist-task-scheduler/scheduler.py
@@ -4,7 +4,7 @@
 
 
 class Job(object):
-    """ This class is mainly for book-keeping (store and manage job state) """
+    """This class is mainly for book-keeping (store and manage job state)"""
 
     def __init__(self, job_id, func, tasks, callback):
         self.id = job_id
@@ -31,7 +31,7 @@ def nextTask(self):
 
 
 class Scheduler(Chare):
-    """ The scheduler sends tasks to distributed workers """
+    """The scheduler sends tasks to distributed workers"""
 
     def __init__(self):
         # create a Worker on every process, pass them a reference (proxy) to myself
@@ -42,8 +42,8 @@ def __init__(self):
         self.jobs = {}
 
     def map_async(self, func, iterable, callback):
-        """ Start a new parallel map job (apply func to elements in iterable).
-            The result will be sent back via the provided callback """
+        """Start a new parallel map job (apply func to elements in iterable).
+        The result will be sent back via the provided callback"""
         self.addJob(func, list(iterable), callback)
         self.schedule()
 
@@ -64,7 +64,7 @@ def schedule(self):
                 self.workers[free_worker].apply(job.func, task, task_id, job.id)
 
     def taskDone(self, worker_id, task_id, job_id, result):
-        """ Called by workers to tell the scheduler that they are done with a task """
+        """Called by workers to tell the scheduler that they are done with a task"""
         self.free_workers.add(worker_id)
         job = self.jobs[job_id]
         job.addResult(task_id, result)
@@ -81,7 +81,7 @@ def __init__(self, scheduler):
         self.scheduler = scheduler
 
     def apply(self, func, arg, task_id, job_id):
-        """ Apply function to argument and send the result to the scheduler """
+        """Apply function to argument and send the result to the scheduler"""
         result = func(arg)
         self.scheduler.taskDone(self.thisIndex, task_id, job_id, result)
 
@@ -101,7 +101,7 @@ def main(args):
     scheduler.map_async(square, [1, 2, 3, 4, 5], callback=future1)
     scheduler.map_async(square, [1, 3, 5, 7, 9], callback=future2)
     # wait for the two jobs to complete and print the results
-    print('Final results are:')
+    print("Final results are:")
     print(future1.get())
     print(future2.get())
     exit()
diff --git a/examples/fibonacci/fib-numba.py b/examples/fibonacci/fib-numba.py
index 602317b2..f1e44f8e 100644
--- a/examples/fibonacci/fib-numba.py
+++ b/examples/fibonacci/fib-numba.py
@@ -25,7 +25,7 @@ def fib(n):
         # this will create two tasks which will be sent to distributed workers
         # (tasks can execute on any PE). map will block here for the result of
         # fib(n-1) and fib(n-2), which is why we mark fib as a coroutine
-        return sum(charm.pool.map(fib, [n-1, n-2]))
+        return sum(charm.pool.map(fib, [n - 1, n - 2]))
 
 
 @numba.jit(nopython=True, cache=False)  # numba really speeds up the computation
@@ -33,7 +33,7 @@ def fib_seq(n):
     if n < 2:
         return n
     else:
-        return fib_seq(n-1) + fib_seq(n-2)
+        return fib_seq(n - 1) + fib_seq(n - 2)
 
 
 class Util(Chare):
@@ -43,7 +43,7 @@ def compile(self):
 
 def main(args):
     global GRAINSIZE
-    print('\nUsage: fib-numba.py [n] [grainsize]')
+    print("\nUsage: fib-numba.py [n] [grainsize]")
     n = 40
     if len(args) > 1:
         n = int(args[1])
@@ -52,14 +52,14 @@ def main(args):
         GRAINSIZE = int(args[2])
     GRAINSIZE = max(2, GRAINSIZE)
     # set GRAINSIZE as a global variable on all processes before starting
-    charm.thisProxy.updateGlobals({'GRAINSIZE': GRAINSIZE}, awaitable=True).get()
+    charm.thisProxy.updateGlobals({"GRAINSIZE": GRAINSIZE}, awaitable=True).get()
     # precompile fib_seq on every process before the actual computation starts,
     # by calling the function. this helps get consistent benchmark results
     Group(Util).compile(awaitable=True).get()
-    print('Calculating fibonacci of N=' + str(n) + ', grainsize=', GRAINSIZE)
+    print("Calculating fibonacci of N=" + str(n) + ", grainsize=", GRAINSIZE)
     t0 = time.time()
     result = fib(n)
-    print('Result is', result, 'elapsed=', round(time.time() - t0, 3))
+    print("Result is", result, "elapsed=", round(time.time() - t0, 3))
     exit()
 
 
diff --git a/examples/fibonacci/fib.py b/examples/fibonacci/fib.py
index 9f44a196..db537e24 100644
--- a/examples/fibonacci/fib.py
+++ b/examples/fibonacci/fib.py
@@ -19,18 +19,18 @@ def fib(n):
         # this will create two tasks which will be sent to distributed workers
         # (tasks can execute on any PE). map will block here for the result of
         # fib(n-1) and fib(n-2), which is why we mark fib as a coroutine
-        return sum(charm.pool.map(fib, [n-1, n-2]))
+        return sum(charm.pool.map(fib, [n - 1, n - 2]))
 
 
 def main(args):
-    print('\nUsage: fib.py [n]')
+    print("\nUsage: fib.py [n]")
     n = 12
     if len(args) > 1:
         n = int(args[1])
-    print('Calculating fibonacci of N=' + str(n))
+    print("Calculating fibonacci of N=" + str(n))
     t0 = time.time()
     result = fib(n)
-    print('Result is', result, 'elapsed=', round(time.time() - t0, 3))
+    print("Result is", result, "elapsed=", round(time.time() - t0, 3))
     exit()
 
 
diff --git a/examples/fibonacci/fibonacci_with_futures.py b/examples/fibonacci/fibonacci_with_futures.py
index 519aa7d0..d29453d3 100644
--- a/examples/fibonacci/fibonacci_with_futures.py
+++ b/examples/fibonacci/fibonacci_with_futures.py
@@ -1,8 +1,10 @@
 from charm4py import charm, Chare, Future, coro
-#modeled after the charm with futures example in the charm++ textbook
+
+# modeled after the charm with futures example in the charm++ textbook
 
 THRESHOLD = 20
 
+
 class Fib(Chare):
 
     @coro
@@ -33,6 +35,7 @@ def seqFib(self, n):
         else:
             return self.seqFib(n - 1) + self.seqFib(n - 2)
 
+
 @coro
 def main(args):
     if len(args) < 2:
@@ -54,4 +57,5 @@ def main(args):
     print("The requested Fibonacci number is:", res)
     charm.exit()
 
+
 charm.start(main)
diff --git a/examples/hello/array_hello.py b/examples/hello/array_hello.py
index 577f1e60..fdcf8b08 100644
--- a/examples/hello/array_hello.py
+++ b/examples/hello/array_hello.py
@@ -11,17 +11,22 @@ def __init__(self, array_dims):
         self.array_dims = array_dims
 
     def sayHi(self, hello_num):
-        print('Hi[' + str(hello_num) + '] from element', self.thisIndex, 'on PE', charm.myPe())
-        lastIdx = tuple([size-1 for size in self.array_dims])
+        print(
+            "Hi[" + str(hello_num) + "] from element",
+            self.thisIndex,
+            "on PE",
+            charm.myPe(),
+        )
+        lastIdx = tuple([size - 1 for size in self.array_dims])
         if self.thisIndex == lastIdx:
             # this is the last index, we are done
-            print('All done')
+            print("All done")
             exit()
         else:
             # send a hello message to the next element (in row-major order)
             nextIndex = list(self.thisIndex)
             num_dims = len(self.array_dims)
-            for i in range(num_dims-1, -1, -1):
+            for i in range(num_dims - 1, -1, -1):
                 nextIndex[i] = (nextIndex[i] + 1) % self.array_dims[i]
                 if nextIndex[i] != 0:
                     break
@@ -29,7 +34,7 @@ def sayHi(self, hello_num):
 
 
 def main(args):
-    print('\nUsage: array_hello.py [dim1_size dim2_size ...]')
+    print("\nUsage: array_hello.py [dim1_size dim2_size ...]")
     array_dims = (2, 2, 2)  # default: create a 2 x 2 x 2 chare array
     if len(args) > 1:
         array_dims = tuple([int(x) for x in args[1:]])
@@ -37,8 +42,14 @@ def main(args):
     num_elems = 1
     for size in array_dims:
         num_elems *= size
-    print('Running Hello on', charm.numPes(), 'processors for', num_elems,
-          'elements, array dimensions are', array_dims)
+    print(
+        "Running Hello on",
+        charm.numPes(),
+        "processors for",
+        num_elems,
+        "elements, array dimensions are",
+        array_dims,
+    )
 
     # create a chare array of Hello chares, passing the array dimensions to
     # each element's constructor
diff --git a/examples/hello/group_hello.py b/examples/hello/group_hello.py
index 8346079c..a609ede4 100644
--- a/examples/hello/group_hello.py
+++ b/examples/hello/group_hello.py
@@ -8,10 +8,10 @@
 class Hello(Chare):
 
     def sayHi(self, hello_num):
-        print('Hi[' + str(hello_num) + '] from element', self.thisIndex)
+        print("Hi[" + str(hello_num) + "] from element", self.thisIndex)
         if self.thisIndex == charm.numPes() - 1:
             # we reached the last element
-            print('All done')
+            print("All done")
             exit()
         else:
             # pass the hello message to the next element
@@ -19,11 +19,11 @@ def sayHi(self, hello_num):
 
 
 def main(args):
-    print('\nRunning Hello on', charm.numPes(), 'processors')
+    print("\nRunning Hello on", charm.numPes(), "processors")
     # create a Group of Hello chares (there will be one chare per PE)
     group_proxy = Group(Hello)
     # send hello message to the first element
     group_proxy[0].sayHi(17)
 
 
-charm.start(main)
\ No newline at end of file
+charm.start(main)
diff --git a/examples/hwmon/hwmon.py b/examples/hwmon/hwmon.py
index 9d36b7d9..009a5768 100644
--- a/examples/hwmon/hwmon.py
+++ b/examples/hwmon/hwmon.py
@@ -15,15 +15,15 @@ class Controller(Chare):
 
     @coro
     def start(self, monitors, logfilename=None):
-        print('\nStarting hardware monitor...')
+        print("\nStarting hardware monitor...")
         if logfilename is not None:
-            self.log = open(logfilename, 'a')
+            self.log = open(logfilename, "a")
         else:
             self.log = sys.stdout
         self.hosts = monitors.getHostName(ret=True).get()
         for i, host in enumerate(self.hosts):
-            print('Monitor', i, 'running on host', host)
-        print('Going to run for', EXIT_AFTER_SECS, 'secs')
+            print("Monitor", i, "running on host", host)
+        print("Going to run for", EXIT_AFTER_SECS, "secs")
         monitors.start(self.thisProxy)
         charm.scheduleCallableAfter(self.thisProxy.close, EXIT_AFTER_SECS)
 
@@ -32,7 +32,13 @@ def close(self):
         exit()
 
     def reportAboveThreshold(self, values, from_id):
-        self.log.write('Host ' + str(self.hosts[from_id]) + ' is running hot: ' + str(values) + '\n')
+        self.log.write(
+            "Host "
+            + str(self.hosts[from_id])
+            + " is running hot: "
+            + str(values)
+            + "\n"
+        )
         self.log.flush()
 
 
@@ -55,11 +61,11 @@ def getHostName(self):
     def read_sensor(self):
         # note that this depends on specific output format of the sensors
         # command, which could change in the future. Adapt as needed
-        lines = subprocess.check_output('sensors').decode().split('\n')
+        lines = subprocess.check_output("sensors").decode().split("\n")
         temps = []
         for l in lines:
             fields = l.split()
-            if len(fields) > 0 and fields[0] == 'Core':
+            if len(fields) > 0 and fields[0] == "Core":
                 temps.append(float(fields[2][1:-2]))
         return temps
 
diff --git a/examples/jacobi/jacobi2d.py b/examples/jacobi/jacobi2d.py
index 3cdefa07..d3946f67 100644
--- a/examples/jacobi/jacobi2d.py
+++ b/examples/jacobi/jacobi2d.py
@@ -1,15 +1,19 @@
 from charm4py import charm, Chare, Group, Array, Future, coro, Channel, Reducer
 import time
 import numpy as np
+
 try:
     from numba import jit
+
     numbaFound = True
 except ImportError:
     numbaFound = False
+
     # create a dummy numba.jit decorator
     def jit(*args, **kwargs):
         def deco(func):
             return func
+
         return deco
 
 
@@ -26,8 +30,10 @@ def __init__(self, sim_done_future):
         # store future to notify main function when computation is done
         self.sim_done_future = sim_done_future
         # each chare has a 2D block of the global array (the block is a 2D NumPy array)
-        self.temperature     = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64)
-        self.new_temperature = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64)
+        self.temperature = np.zeros((blockDimX + 2, blockDimY + 2), dtype=np.float64)
+        self.new_temperature = np.zeros(
+            (blockDimX + 2, blockDimY + 2), dtype=np.float64
+        )
 
         # determine border conditions, who my neighbors are and establish Channels with them
         self.leftBound = self.rightBound = self.topBound = self.bottomBound = False
@@ -41,67 +47,78 @@ def __init__(self, sim_done_future):
             self.leftBound = True
             self.istart += 1
         else:
-            self.left_nb = Channel(self, remote=self.thisProxy[(x-1, y)])
+            self.left_nb = Channel(self, remote=self.thisProxy[(x - 1, y)])
             self.nbs.append(self.left_nb)
 
         if x == num_chare_x - 1:
             self.rightBound = True
             self.ifinish -= 1
         else:
-            self.right_nb = Channel(self, remote=self.thisProxy[(x+1, y)])
+            self.right_nb = Channel(self, remote=self.thisProxy[(x + 1, y)])
             self.nbs.append(self.right_nb)
 
         if y == 0:
             self.topBound = True
             self.jstart += 1
         else:
-            self.top_nb = Channel(self, remote=self.thisProxy[(x, y-1)])
+            self.top_nb = Channel(self, remote=self.thisProxy[(x, y - 1)])
             self.nbs.append(self.top_nb)
 
         if y == num_chare_y - 1:
             self.bottomBound = True
             self.jfinish -= 1
         else:
-            self.bottom_nb = Channel(self, remote=self.thisProxy[(x, y+1)])
+            self.bottom_nb = Channel(self, remote=self.thisProxy[(x, y + 1)])
             self.nbs.append(self.bottom_nb)
 
         self.constrainBC()
 
     @coro
     def run(self):
-        """ this is the main computation loop """
+        """this is the main computation loop"""
         iteration = 0
         converged = False
         while not converged and iteration < MAX_ITER:
             # send ghost faces to my neighbors. sends are asynchronous
             if not self.leftBound:
-                self.left_nb.send(RIGHT, self.temperature[1, 1:blockDimY+1])
+                self.left_nb.send(RIGHT, self.temperature[1, 1 : blockDimY + 1])
             if not self.rightBound:
-                self.right_nb.send(LEFT, self.temperature[blockDimX, 1:blockDimY+1])
+                self.right_nb.send(LEFT, self.temperature[blockDimX, 1 : blockDimY + 1])
             if not self.topBound:
-                self.top_nb.send(BOTTOM, self.temperature[1:blockDimX+1, 1])
+                self.top_nb.send(BOTTOM, self.temperature[1 : blockDimX + 1, 1])
             if not self.bottomBound:
-                self.bottom_nb.send(TOP, self.temperature[1:blockDimX+1, blockDimY])
+                self.bottom_nb.send(TOP, self.temperature[1 : blockDimX + 1, blockDimY])
 
             # receive ghost data from neighbors. iawait iteratively yields
             # channels as they become ready (have data to receive)
             for nb in charm.iwait(self.nbs):
                 direction, ghosts = nb.recv()
                 if direction == LEFT:
-                    self.temperature[0, 1:len(ghosts)+1] = ghosts
+                    self.temperature[0, 1 : len(ghosts) + 1] = ghosts
                 elif direction == RIGHT:
-                    self.temperature[blockDimX+1, 1:len(ghosts)+1] = ghosts
+                    self.temperature[blockDimX + 1, 1 : len(ghosts) + 1] = ghosts
                 elif direction == TOP:
-                    self.temperature[1:len(ghosts)+1, 0] = ghosts
+                    self.temperature[1 : len(ghosts) + 1, 0] = ghosts
                 elif direction == BOTTOM:
-                    self.temperature[1:len(ghosts)+1, blockDimY+1] = ghosts
+                    self.temperature[1 : len(ghosts) + 1, blockDimY + 1] = ghosts
                 else:
-                    charm.abort('Invalid direction')
-
-            max_error = check_and_compute(self.temperature, self.new_temperature,
-                                          self.istart, self.ifinish, self.jstart, self.jfinish)
-            self.temperature, self.new_temperature = self.new_temperature, self.temperature
-            converged = self.allreduce(max_error <= THRESHOLD, Reducer.logical_and).get()
+                    charm.abort("Invalid direction")
+
+            max_error = check_and_compute(
+                self.temperature,
+                self.new_temperature,
+                self.istart,
+                self.ifinish,
+                self.jstart,
+                self.jfinish,
+            )
+            self.temperature, self.new_temperature = (
+                self.new_temperature,
+                self.temperature,
+            )
+            converged = self.allreduce(
+                max_error <= THRESHOLD, Reducer.logical_and
+            ).get()
             iteration += 1
 
         if self.thisIndex == (0, 0):
@@ -111,17 +128,17 @@ def run(self):
     def constrainBC(self):
         # enforce some boundary conditions
         if self.topBound:
-            self.temperature[0:blockDimX+2, 1] = 1.0
-            self.new_temperature[0:blockDimX+2, 1] = 1.0
+            self.temperature[0 : blockDimX + 2, 1] = 1.0
+            self.new_temperature[0 : blockDimX + 2, 1] = 1.0
         if self.leftBound:
-            self.temperature[1, 0:blockDimY+2] = 1.0
-            self.new_temperature[1, 0:blockDimY+2] = 1.0
+            self.temperature[1, 0 : blockDimY + 2] = 1.0
+            self.new_temperature[1, 0 : blockDimY + 2] = 1.0
         if self.bottomBound:
-            self.temperature[0:blockDimX+2, blockDimY] = 1.0
-            self.new_temperature[0:blockDimX+2, blockDimY] = 1.0
+            self.temperature[0 : blockDimX + 2, blockDimY] = 1.0
+            self.new_temperature[0 : blockDimX + 2, blockDimY] = 1.0
         if self.rightBound:
-            self.temperature[blockDimX, 0:blockDimY+2] = 1.0
-            self.new_temperature[blockDimX, 0:blockDimY+2] = 1.0
+            self.temperature[blockDimX, 0 : blockDimY + 2] = 1.0
+            self.new_temperature[blockDimX, 0 : blockDimY + 2] = 1.0
 
 
 @jit(nopython=True, cache=False)
@@ -131,31 +148,35 @@ def check_and_compute(temperature, new_temperature, istart, ifinish, jstart, jfi
     # when all neighbor values have been received, we update our values and proceed
     for i in range(istart, ifinish):
         for j in range(jstart, jfinish):
-            temperature_ith = (temperature[i,j]
-                               + temperature[i-1,j] + temperature[i+1,j]
-                               + temperature[i,j-1] + temperature[i,j+1]) * 0.2
+            temperature_ith = (
+                temperature[i, j]
+                + temperature[i - 1, j]
+                + temperature[i + 1, j]
+                + temperature[i, j - 1]
+                + temperature[i, j + 1]
+            ) * 0.2
             # update relative error
-            difference = temperature_ith - temperature[i,j]
+            difference = temperature_ith - temperature[i, j]
             if difference < 0:
                 difference *= -1.0
             if max_error <= difference:
                 max_error = difference
-            new_temperature[i,j] = temperature_ith
+            new_temperature[i, j] = temperature_ith
     return max_error
 
 
 class Util(Chare):
     def compile(self):
-        T = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64)
-        NT = np.zeros((blockDimX+2, blockDimY+2), dtype=np.float64)
-        check_and_compute(T, NT, 1, blockDimX+1, 1, blockDimY+1)
+        T = np.zeros((blockDimX + 2, blockDimY + 2), dtype=np.float64)
+        NT = np.zeros((blockDimX + 2, blockDimY + 2), dtype=np.float64)
+        check_and_compute(T, NT, 1, blockDimX + 1, 1, blockDimY + 1)
 
 
 def main(args):
     global blockDimX, blockDimY, num_chare_x, num_chare_y
     if len(args) != 3 and len(args) != 5:
-        print('\nUsage:\t', args[0], 'array_size block_size')
-        print('\t', args[0], 'array_size_X array_size_Y block_size_X block_size_Y')
+        print("\nUsage:\t", args[0], "array_size block_size")
+        print("\t", args[0], "array_size_X array_size_Y block_size_X block_size_Y")
         exit()
 
     if len(args) == 3:
@@ -171,39 +192,65 @@ def main(args):
     num_chare_y = arrayDimY // blockDimY
 
     # set the following global variables on every PE, wait for the call to complete
-    charm.thisProxy.updateGlobals({'blockDimX': blockDimX,
-                                   'blockDimY': blockDimY,
-                                   'num_chare_x': num_chare_x,
-                                   'num_chare_y': num_chare_y},
-                                   awaitable=True).get()
-
-    print('\nRunning Jacobi on', charm.numPes(), 'processors with', num_chare_x, 'x', num_chare_y, 'chares')
-    print('Array Dimensions:', arrayDimX, 'x', arrayDimY)
-    print('Block Dimensions:', blockDimX, 'x', blockDimY)
-    print('Max iterations:', MAX_ITER)
-    print('Threshold:', THRESHOLD)
+    charm.thisProxy.updateGlobals(
+        {
+            "blockDimX": blockDimX,
+            "blockDimY": blockDimY,
+            "num_chare_x": num_chare_x,
+            "num_chare_y": num_chare_y,
+        },
+        awaitable=True,
+    ).get()
+
+    print(
+        "\nRunning Jacobi on",
+        charm.numPes(),
+        "processors with",
+        num_chare_x,
+        "x",
+        num_chare_y,
+        "chares",
+    )
+    print("Array Dimensions:", arrayDimX, "x", arrayDimY)
+    print("Block Dimensions:", blockDimX, "x", blockDimY)
+    print("Max iterations:", MAX_ITER)
+    print("Threshold:", THRESHOLD)
 
     if numbaFound:
         # wait until Numba functions are compiled on every PE, so we can get consistent benchmark results
         Group(Util).compile(awaitable=True).get()
-        print('Numba compilation complete')
+        print("Numba compilation complete")
     else:
-        print('!!WARNING!! Numba not found. Will run without Numba but it will be very slow')
+        print(
+            "!!WARNING!! Numba not found. Will run without Numba but it will be very slow"
+        )
 
     sim_done = Future()
     # create 2D chare array of Jacobi objects (each chare will hold one block)
     array = Array(Jacobi, (num_chare_x, num_chare_y), args=[sim_done])
     charm.awaitCreation(array)
 
-    print('Starting computation')
+    print("Starting computation")
     initTime = time.time()
     array.run()  # this is a broadcast
     total_iterations = sim_done.get()  # wait until the computation completes
     totalTime = time.time() - initTime
     if total_iterations >= MAX_ITER:
-        print('Finished due to max iterations', total_iterations, 'total time', round(totalTime, 3), 'seconds')
+        print(
+            "Finished due to max iterations",
+            total_iterations,
+            "total time",
+            round(totalTime, 3),
+            "seconds",
+        )
     else:
-        print('Finished due to convergence, iterations', total_iterations, 'total time', round(totalTime, 3), 'seconds')
+        print(
+            "Finished due to convergence, iterations",
+            total_iterations,
+            "total time",
+            round(totalTime, 3),
+            "seconds",
+        )
     exit()
 
 
diff --git a/examples/liveviz/liveviz.py b/examples/liveviz/liveviz.py
index c1538f02..730adc88 100644
--- a/examples/liveviz/liveviz.py
+++ b/examples/liveviz/liveviz.py
@@ -1,36 +1,41 @@
-from charm4py import charm, Chare, Array, Future, Reducer, Group, liveviz, coro
+from charm4py import charm, Chare, Array, liveviz
 import random
 
+
 class Unit(Chare):
-  
-  def __init__(self):
-    self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)]
-    
-  def reqImg(self, request):
-    self.particles = []
-    
-    for _ in range(300):
-      x = random.randint(0, 49)
-      y = random.randint(0, 49)
-      
-      color = random.choice(self.colors)
-      
-      self.particles.append((x, y, color))
-    
-    data = bytearray(50 * 50 * 3)
-    
-    for x, y, (r, g, b) in self.particles:
-      pixel_index = (y * 50 + x) * 3
-      data[pixel_index] = r
-      data[pixel_index + 1] = g
-      data[pixel_index + 2] = b
-    
-    liveviz.LiveViz.deposit(data, self, self.thisIndex[0]*50, self.thisIndex[1]*50, 50, 50, 800, 800)
+
+    def __init__(self):
+        self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)]
+
+    def reqImg(self, request):
+        self.particles = []
+
+        for _ in range(300):
+            x = random.randint(0, 49)
+            y = random.randint(0, 49)
+
+            color = random.choice(self.colors)
+
+            self.particles.append((x, y, color))
+
+        data = bytearray(50 * 50 * 3)
+
+        for x, y, (r, g, b) in self.particles:
+            pixel_index = (y * 50 + x) * 3
+            data[pixel_index] = r
+            data[pixel_index + 1] = g
+            data[pixel_index + 2] = b
+
+        liveviz.LiveViz.deposit(
+            data, self, self.thisIndex[0] * 50, self.thisIndex[1] * 50, 50, 50, 800, 800
+        )
+
 
 def main(args):
-    units = Array(Unit, dims=(16,16))
+    units = Array(Unit, dims=(16, 16))
     config = liveviz.Config()
     liveviz.LiveViz.init(config, units.reqImg)
     print("CCS Handlers registered . Waiting for net requests...")
 
+
 charm.start(main)
diff --git a/examples/liveviz/liveviz_poll.py b/examples/liveviz/liveviz_poll.py
index ed1ea873..5b9f4c15 100644
--- a/examples/liveviz/liveviz_poll.py
+++ b/examples/liveviz/liveviz_poll.py
@@ -1,39 +1,50 @@
-from charm4py import charm, Chare, Array, Future, Reducer, Group, liveviz, coro
-import time
+from charm4py import charm, Chare, Array, liveviz
 import random
 
+
 class Unit(Chare):
-  
-  def __init__(self):
-    self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)]
-    
-  def reqImg(self):
-    for i in range(50):
-      self.particles = []
-      
-      for _ in range(300):
-        x = random.randint(0, 49)
-        y = random.randint(0, 49)
-        
-        color = random.choice(self.colors)
-        
-        self.particles.append((x, y, color))
-      
-      data = bytearray(50 * 50 * 3)
-      
-      for x, y, (r, g, b) in self.particles:
-        pixel_index = (y * 50 + x) * 3
-        data[pixel_index] = r
-        data[pixel_index + 1] = g
-        data[pixel_index + 2] = b
-      
-      liveviz.LiveViz.deposit(data, self, self.thisIndex[0]*50, self.thisIndex[1]*50, 50, 50, 800, 800)
+
+    def __init__(self):
+        self.colors = [(200, 0, 0), (0, 200, 0), (0, 0, 200)]
+
+    def reqImg(self):
+        for i in range(50):
+            self.particles = []
+
+            for _ in range(300):
+                x = random.randint(0, 49)
+                y = random.randint(0, 49)
+
+                color = random.choice(self.colors)
+
+                self.particles.append((x, y, color))
+
+            data = bytearray(50 * 50 * 3)
+
+            for x, y, (r, g, b) in self.particles:
+                pixel_index = (y * 50 + x) * 3
+                data[pixel_index] = r
+                data[pixel_index + 1] = g
+                data[pixel_index + 2] = b
+
+            liveviz.LiveViz.deposit(
+                data,
+                self,
+                self.thisIndex[0] * 50,
+                self.thisIndex[1] * 50,
+                50,
+                50,
+                800,
+                800,
+            )
+
 
 def main(args):
-    units = Array(Unit, dims=(16,16))
+    units = Array(Unit, dims=(16, 16))
     config = liveviz.Config()
     liveviz.LiveViz.init(config, units.reqImg, poll=True)
     units.reqImg()
     print("CCS Handlers registered . Waiting for net requests...")
 
+
 charm.start(main)
diff --git a/examples/miniapps/LeanMD/main.py b/examples/miniapps/LeanMD/main.py
index 1b7ed5b6..321c70c9 100644
--- a/examples/miniapps/LeanMD/main.py
+++ b/examples/miniapps/LeanMD/main.py
@@ -1,4 +1,3 @@
-import array
 import random
 import numba
 import math
@@ -6,15 +5,16 @@
 import time
 from charm4py import *
 
+
 class GlobalDefs:
     # These need to be member variables because it simplifies broadcasting
-    def __init__( self ):
+    def __init__(self):
         self.BLOCK_SIZE = 512
-        self.HYDROGEN_MASS = ( 1.67 * 1e-24 ) # in g
-        self.VDW_A = ( 1.1328 * 1e-133 )  # in (g m^2/s^2) m^12
-        self.VDW_B = ( 2.23224 * 1e-76 ) # (g m^2/s^2) m^6
+        self.HYDROGEN_MASS = 1.67 * 1e-24  # in g
+        self.VDW_A = 1.1328 * 1e-133  # in (g m^2/s^2) m^12
+        self.VDW_B = 2.23224 * 1e-76  # (g m^2/s^2) m^6
 
-        self.ENERGY_VAR = (1.0 * 1e-5 )
+        self.ENERGY_VAR = 1.0 * 1e-5
 
         # average of next two should be what you want as your atom density
         # this should comply with the PERDIM parameter; for KAWAY 1 1 1, the maximum number
@@ -26,7 +26,7 @@ def __init__( self ):
         self.PARTICLES_PER_CELL_START = 100
         self.PARTICLES_PER_CELL_END = 250
 
-        self.DEFAULT_DELTA = 1 # in femtoseconds
+        self.DEFAULT_DELTA = 1  # in femtoseconds
 
         self.DEFAULT_FIRST_LDB = 20
         self.DEFAULT_LDB_PERIOD = 20
@@ -35,25 +35,24 @@ def __init__( self ):
         self.KAWAY_X = 2
         self.KAWAY_Y = 2
         self.KAWAY_Z = 1
-        self.NBRS_X  = (2*self.KAWAY_X+1)
-        self.NBRS_Y = (2*self.KAWAY_Y+1)
-        self.NBRS_Z = (2*self.KAWAY_Z+1)
-        self.NUM_NEIGHBORS = (self.NBRS_X * self.NBRS_Y * self.NBRS_Z)
+        self.NBRS_X = 2 * self.KAWAY_X + 1
+        self.NBRS_Y = 2 * self.KAWAY_Y + 1
+        self.NBRS_Z = 2 * self.KAWAY_Z + 1
+        self.NUM_NEIGHBORS = self.NBRS_X * self.NBRS_Y * self.NBRS_Z
 
         self.CELLARRAY_DIM_X = 3
         self.CELLARRAY_DIM_Y = 3
         self.CELLARRAY_DIM_Z = 3
-        self.PTP_CUT_OFF = 26 # cut off for atom to atom interactions
+        self.PTP_CUT_OFF = 26  # cut off for atom to atom interactions
         self.CELL_MARGIN = 4  # constant diff between cutoff and cell size
-        self.CELL_SIZE_X = (self.PTP_CUT_OFF + self.CELL_MARGIN)//self.KAWAY_X
-        self.CELL_SIZE_Y = (self.PTP_CUT_OFF + self.CELL_MARGIN)//self.KAWAY_Y
-        self.CELL_SIZE_Z = (self.PTP_CUT_OFF + self.CELL_MARGIN)//self.KAWAY_Z
+        self.CELL_SIZE_X = (self.PTP_CUT_OFF + self.CELL_MARGIN) // self.KAWAY_X
+        self.CELL_SIZE_Y = (self.PTP_CUT_OFF + self.CELL_MARGIN) // self.KAWAY_Y
+        self.CELL_SIZE_Z = (self.PTP_CUT_OFF + self.CELL_MARGIN) // self.KAWAY_Z
 
         self.cellArrayDimX = self.CELLARRAY_DIM_X
         self.cellArrayDimY = self.CELLARRAY_DIM_Y
         self.cellArrayDimZ = self.CELLARRAY_DIM_Z
 
-
         # variables to control initial uniform placement of atoms;
         # atoms should not be too close at startup for a stable system
         # PERDIM * GAP should be less than (PTPCUTOFF+CELL_MARGIN)
@@ -67,158 +66,197 @@ def __init__( self ):
 
         self.MIGRATE_STEPCOUNT = 20
         self.DEFAULT_FINALSTEPCOUNT = 1001
-        self.MAX_VELOCITY = .1  # in A/fs
+        self.MAX_VELOCITY = 0.1  # in A/fs
 
         self.finalStepCount = self.DEFAULT_FINALSTEPCOUNT
         self.firstLdbStep = self.DEFAULT_FIRST_LDB
         self.ldbPeriod = self.DEFAULT_LDB_PERIOD
 
-
         # Proxies for the different arrays
         self.cellArray = None
         self.computeArray = None
 
+
 def WRAP_X(a):
     return (a + cellArrayDimX) % cellArrayDimX
+
+
 def WRAP_Y(a):
     return (a + cellArrayDimY) % cellArrayDimY
+
+
 def WRAP_Z(a):
     return (a + cellArrayDimZ) % cellArrayDimZ
 
 
-@numba.njit( cache = True )
-def velocityCheck( inVelocity: float ) -> float:
-    if abs( inVelocity ) > MAX_VELOCITY:
+@numba.njit(cache=True)
+def velocityCheck(inVelocity: float) -> float:
+    if abs(inVelocity) > MAX_VELOCITY:
         if inVelocity < 0.0:
             return -1 * MAX_VELOCITY
         return MAX_VELOCITY
     return inVelocity
 
 
-@numba.njit( cache = True )
-def updateProperties( forces, particle_mass, particle_vel, particle_pos,
-                      energy, stepCount, finalStepCount ):
-    powTen = 10.0 ** 10
-    powTwenty = 10.0 ** -20
+@numba.njit(cache=True)
+def updateProperties(
+    forces, particle_mass, particle_vel, particle_pos, energy, stepCount, finalStepCount
+):
+    powTen = 10.0**10
+    powTwenty = 10.0**-20
     realTimeDeltaVel = DEFAULT_DELTA * powTwenty
     for i in range(particle_mass.size):
         mass = particle_mass[i]
         # calculate energy only at beginning and end
-        if (stepCount == 1):
-            dot = particle_vel[i,0]**2 + particle_vel[i,1]**2 + particle_vel[i,2]**2
-            energy[0] += (0.5 * mass * dot * powTen)  # in milliJoules
-        elif (stepCount == finalStepCount):
-            dot = particle_vel[i,0]**2 + particle_vel[i,1]**2 + particle_vel[i,2]**2
-            energy[1] += (0.5 * mass * dot * powTen)
+        if stepCount == 1:
+            dot = (
+                particle_vel[i, 0] ** 2
+                + particle_vel[i, 1] ** 2
+                + particle_vel[i, 2] ** 2
+            )
+            energy[0] += 0.5 * mass * dot * powTen  # in milliJoules
+        elif stepCount == finalStepCount:
+            dot = (
+                particle_vel[i, 0] ** 2
+                + particle_vel[i, 1] ** 2
+                + particle_vel[i, 2] ** 2
+            )
+            energy[1] += 0.5 * mass * dot * powTen
             # apply kinetic equations
         invMassParticle = 1.0 / mass
-            #self.particles[i].acc = forces[i] * invMassParticle  # in m/sec^2
-            #self.particles[i].vel += self.particles[i].acc * realTimeDeltaVel  # in A/fs
-            # in m/sec^2
-        particle_vel[i,0] += forces[i,0] * invMassParticle * realTimeDeltaVel  # in A/fs
-        particle_vel[i,1] += forces[i,1] * invMassParticle * realTimeDeltaVel  # in A/fs
-        particle_vel[i,2] += forces[i,2] * invMassParticle * realTimeDeltaVel  # in A/fs
-
-        particle_vel[i,0] = velocityCheck(particle_vel[i,0])
-        particle_vel[i,1] = velocityCheck(particle_vel[i,1])
-        particle_vel[i,2] = velocityCheck(particle_vel[i,2])
-
-        particle_pos[i,0] += particle_vel[i,0] * DEFAULT_DELTA  # in A
-        particle_pos[i,1] += particle_vel[i,1] * DEFAULT_DELTA  # in A
-        particle_pos[i,2] += particle_vel[i,2] * DEFAULT_DELTA  # in A
-
-class CellMap( ArrayMap ):
+        # self.particles[i].acc = forces[i] * invMassParticle  # in m/sec^2
+        # self.particles[i].vel += self.particles[i].acc * realTimeDeltaVel  # in A/fs
+        # in m/sec^2
+        particle_vel[i, 0] += (
+            forces[i, 0] * invMassParticle * realTimeDeltaVel
+        )  # in A/fs
+        particle_vel[i, 1] += (
+            forces[i, 1] * invMassParticle * realTimeDeltaVel
+        )  # in A/fs
+        particle_vel[i, 2] += (
+            forces[i, 2] * invMassParticle * realTimeDeltaVel
+        )  # in A/fs
+
+        particle_vel[i, 0] = velocityCheck(particle_vel[i, 0])
+        particle_vel[i, 1] = velocityCheck(particle_vel[i, 1])
+        particle_vel[i, 2] = velocityCheck(particle_vel[i, 2])
+
+        particle_pos[i, 0] += particle_vel[i, 0] * DEFAULT_DELTA  # in A
+        particle_pos[i, 1] += particle_vel[i, 1] * DEFAULT_DELTA  # in A
+        particle_pos[i, 2] += particle_vel[i, 2] * DEFAULT_DELTA  # in A
+
+
+class CellMap(ArrayMap):
     # group
-    def __init__( self, cellX, cellY, cellZ ):
+    def __init__(self, cellX, cellY, cellZ):
         self.num_x = cellX
         self.num_y = cellY
         self.num_z = cellZ
 
         self.num_yz = self.num_y * self.num_z
-        self.ratio = charm.numPes() / ( self.num_x * self.num_yz )
+        self.ratio = charm.numPes() / (self.num_x * self.num_yz)
+
+    def procNum(self, index):
+        patchID = index[2] + index[1] * self.num_z + index[0] * self.num_yz
+        return int(patchID * self.ratio)
 
-    def procNum( self, index ):
-        patchID = index[ 2 ] + index[ 1 ] * self.num_z + index[ 0 ] * self.num_yz
-        return int( patchID * self.ratio )
 
 class Particle:
-    def __init__( self ):
+    def __init__(self):
         self.mass = 0.0
-        self.position = np.zeros( 3 )
-        self.acceleration = np.zeros( 3 )
-        self.velocity = np.zeros( 3 )
+        self.position = np.zeros(3)
+        self.acceleration = np.zeros(3)
+        self.velocity = np.zeros(3)
+
 
-class Cell( Chare ):
+class Cell(Chare):
 
-    def __init__( self, energyFuture ):
-        self.stepCount :int = 0
-        self.mynumParts :int = 0
-        self.inbrs :int = NUM_NEIGHBORS
+    def __init__(self, energyFuture):
+        self.stepCount: int = 0
+        self.mynumParts: int = 0
+        self.inbrs: int = NUM_NEIGHBORS
         self.stepTime: float = 0
         self.computesList = [0] * self.inbrs
         self.neighborChannels = list()
-        self.updateCount :int = 0
+        self.updateCount: int = 0
         self.duplicateComputes = None
-        self.energy = np.zeros(2, dtype = np.float64)
+        self.energy = np.zeros(2, dtype=np.float64)
         self.mCastSecProxy = None
 
         self.energyFuture = energyFuture
-        self.myid: int = self.thisIndex[ 2 ] + cellArrayDimZ * \
-                    ( self.thisIndex[1] + self.thisIndex[0] * cellArrayDimY)
+        self.myid: int = self.thisIndex[2] + cellArrayDimZ * (
+            self.thisIndex[1] + self.thisIndex[0] * cellArrayDimY
+        )
 
-        num = self.myid * (PARTICLES_PER_CELL_END-PARTICLES_PER_CELL_START)
-        denom = cellArrayDimX*cellArrayDimY*cellArrayDimZ
-        self.myNumParts = PARTICLES_PER_CELL_START + ( num // denom )
+        num = self.myid * (PARTICLES_PER_CELL_END - PARTICLES_PER_CELL_START)
+        denom = cellArrayDimX * cellArrayDimY * cellArrayDimZ
+        self.myNumParts = PARTICLES_PER_CELL_START + (num // denom)
 
-        self.particle_mass = np.zeros( self.myNumParts, dtype = np.float64 )
-        self.particle_pos = np.zeros( ( self.myNumParts, 3 ), dtype = np.float64 )
-        self.particle_vel = np.zeros( ( self.myNumParts, 3 ), dtype = np.float64 )
+        self.particle_mass = np.zeros(self.myNumParts, dtype=np.float64)
+        self.particle_pos = np.zeros((self.myNumParts, 3), dtype=np.float64)
+        self.particle_vel = np.zeros((self.myNumParts, 3), dtype=np.float64)
 
         self.neighborChannels = self.createNeighborChannels()
 
-        random.seed( self.myid )
+        random.seed(self.myid)
 
-        for i in range( self.myNumParts ):
-            self.particle_mass[ i ] = HYDROGEN_MASS
+        for i in range(self.myNumParts):
+            self.particle_mass[i] = HYDROGEN_MASS
 
             # uniformly place particles, avoid close distance among them
-            x = (GAP/2.0) + self.thisIndex[0] * CELL_SIZE_X + ((i*KAWAY_Y*KAWAY_Z)//(PERDIM*PERDIM))*GAP
-            y = (GAP/2.0) + self.thisIndex[1] * CELL_SIZE_Y + (((i*KAWAY_Z)//PERDIM)%(PERDIM//KAWAY_Y))*GAP
-            z = (GAP/2.0) + self.thisIndex[2] * CELL_SIZE_Z + (i%(PERDIM//KAWAY_Z))*GAP
-            self.particle_pos[ i ] = x, y, z
-
-            self.particle_vel[i] = np.array( ( (random.random() - 0.5) * .2 * MAX_VELOCITY,
-                                               (random.random() - 0.5) * .2 * MAX_VELOCITY,
-                                               (random.random() - 0.5) * .2 * MAX_VELOCITY),
-                                            dtype = np.float64
+            x = (
+                (GAP / 2.0)
+                + self.thisIndex[0] * CELL_SIZE_X
+                + ((i * KAWAY_Y * KAWAY_Z) // (PERDIM * PERDIM)) * GAP
+            )
+            y = (
+                (GAP / 2.0)
+                + self.thisIndex[1] * CELL_SIZE_Y
+                + (((i * KAWAY_Z) // PERDIM) % (PERDIM // KAWAY_Y)) * GAP
+            )
+            z = (
+                (GAP / 2.0)
+                + self.thisIndex[2] * CELL_SIZE_Z
+                + (i % (PERDIM // KAWAY_Z)) * GAP
+            )
+            self.particle_pos[i] = x, y, z
+
+            self.particle_vel[i] = np.array(
+                (
+                    (random.random() - 0.5) * 0.2 * MAX_VELOCITY,
+                    (random.random() - 0.5) * 0.2 * MAX_VELOCITY,
+                    (random.random() - 0.5) * 0.2 * MAX_VELOCITY,
+                ),
+                dtype=np.float64,
             )
 
-        self.energy[ 0 ] = 0
-        self.energy[ 1 ] = 0
+        self.energy[0] = 0
+        self.energy[1] = 0
 
-    def reportDuplicates( self ):
+    def reportDuplicates(self):
         for d in self.duplicateComputes:
-            computeArray[ d ].setDuplicate()
+            computeArray[d].setDuplicate()
 
-    def nbrNumtoNbrIdx( self, num ):
+    def nbrNumtoNbrIdx(self, num):
         x1 = num // (NBRS_Y * NBRS_Z) - NBRS_X // 2
         y1 = (num % (NBRS_Y * NBRS_Z)) // NBRS_Z - NBRS_Y // 2
         z1 = num % NBRS_Z - NBRS_Z // 2
 
-        return ( WRAP_X( self.thisIndex[ 0 ] + x1 ),
-                 WRAP_Y( self.thisIndex[ 1 ] + y1 ),
-                 WRAP_Z( self.thisIndex[ 2 ] + z1 )
+        return (
+            WRAP_X(self.thisIndex[0] + x1),
+            WRAP_Y(self.thisIndex[1] + y1),
+            WRAP_Z(self.thisIndex[2] + z1),
         )
 
     @coro
-    def createNeighborChannels( self ):
+    def createNeighborChannels(self):
         output = list()
-        for num in range( self.inbrs ):
-            nbrIdx = self.nbrNumtoNbrIdx( num )
-            output.append( Channel( self, remote = self.thisProxy[ nbrIdx ] ) )
+        for num in range(self.inbrs):
+            nbrIdx = self.nbrNumtoNbrIdx(num)
+            output.append(Channel(self, remote=self.thisProxy[nbrIdx]))
         return output
 
-    def createComputes( self ):
+    def createComputes(self):
         x, y, z = self.thisIndex
 
         currPe = charm.myPe() + 1
@@ -226,9 +264,9 @@ def createComputes( self ):
         dupes = list()
         seen = set()
 
-        for num in range( self.inbrs ):
-            dx = num // ( NBRS_Y * NBRS_Z ) - NBRS_X // 2
-            dy = ( num % ( NBRS_Y * NBRS_Z ) ) // NBRS_Z - NBRS_Y // 2
+        for num in range(self.inbrs):
+            dx = num // (NBRS_Y * NBRS_Z) - NBRS_X // 2
+            dy = (num % (NBRS_Y * NBRS_Z)) // NBRS_Z - NBRS_Y // 2
             dz = num % NBRS_Z - NBRS_Z // 2
 
             if num >= self.inbrs // 2:
@@ -243,9 +281,12 @@ def createComputes( self ):
                 currPe += 1
 
                 # CkArrayIndex6D index(px1, py1, pz1, px2, py2, pz2);
-                index = ( px1, py1, pz1, px2, py2, pz2 )
-                computeArray.ckInsert( index, onPE = ( currPe ) % charm.numPes(),
-                                       args = [ self.energyFuture ], useAtSync = True
+                index = (px1, py1, pz1, px2, py2, pz2)
+                computeArray.ckInsert(
+                    index,
+                    onPE=(currPe) % charm.numPes(),
+                    args=[self.energyFuture],
+                    useAtSync=True,
                 )
                 self.computesList[num] = index
             else:
@@ -260,179 +301,190 @@ def createComputes( self ):
 
         for c in self.computesList:
             if c in seen:
-                dupes.append( c )
+                dupes.append(c)
             seen.add(c)
 
         self.computesList = list(seen)
         self.duplicateComputes = dupes
 
-    def migrateToCell( self, particlePos ):
-        x = self.thisIndex[ 0 ] * CELL_SIZE_X + CELL_ORIGIN_X
-        y = self.thisIndex[ 1 ] * CELL_SIZE_Y + CELL_ORIGIN_Y
-        z = self.thisIndex[ 2 ] * CELL_SIZE_Z + CELL_ORIGIN_Z
+    def migrateToCell(self, particlePos):
+        x = self.thisIndex[0] * CELL_SIZE_X + CELL_ORIGIN_X
+        y = self.thisIndex[1] * CELL_SIZE_Y + CELL_ORIGIN_Y
+        z = self.thisIndex[2] * CELL_SIZE_Z + CELL_ORIGIN_Z
 
         px = py = pz = 0
-        particleXpos = particlePos[ 0 ]
-        particleYpos = particlePos[ 1 ]
-        particleZpos = particlePos[ 2 ]
+        particleXpos = particlePos[0]
+        particleYpos = particlePos[1]
+        particleZpos = particlePos[2]
 
-        if particleXpos < (x-CELL_SIZE_X):
+        if particleXpos < (x - CELL_SIZE_X):
             px = -2
         elif particleXpos < x:
             px = -1
-        elif particleXpos > (x+2*CELL_SIZE_X):
+        elif particleXpos > (x + 2 * CELL_SIZE_X):
             px = 2
-        elif particleXpos > (x+CELL_SIZE_X):
+        elif particleXpos > (x + CELL_SIZE_X):
             px = 1
 
-        if particleYpos < (y-CELL_SIZE_Y):
+        if particleYpos < (y - CELL_SIZE_Y):
             py = -2
         elif particleYpos < y:
             py = -1
-        elif particleYpos > (y+2*CELL_SIZE_Y):
+        elif particleYpos > (y + 2 * CELL_SIZE_Y):
             py = 2
-        elif particleYpos > (y+CELL_SIZE_Y):
+        elif particleYpos > (y + CELL_SIZE_Y):
             py = 1
 
-        if particleZpos < (z-CELL_SIZE_Z):
+        if particleZpos < (z - CELL_SIZE_Z):
             pz = -2
         elif particleZpos < z:
             pz = -1
-        elif particleZpos > (z+2*CELL_SIZE_Z):
+        elif particleZpos > (z + 2 * CELL_SIZE_Z):
             pz = 2
-        elif particleZpos > (z+CELL_SIZE_Z):
+        elif particleZpos > (z + CELL_SIZE_Z):
             pz = 1
 
-        return ( px, py, pz ) # setting px, py, pz to zero
-
-    def wrapAround( self, particlePos ):
-        if particlePos[ 0 ] < CELL_ORIGIN_X:
-            particlePos[ 0 ] += CELL_SIZE_X*cellArrayDimX
-        if particlePos[ 1 ] < CELL_ORIGIN_Y:
-            particlePos[ 1 ] += CELL_SIZE_Y*cellArrayDimY
-        if particlePos[ 2 ] < CELL_ORIGIN_Z:
-            particlePos[ 2 ] += CELL_SIZE_Z*cellArrayDimZ
-
-        if particlePos[ 0 ] > CELL_ORIGIN_X + CELL_SIZE_X*cellArrayDimX:
-            particlePos[ 0 ] -= CELL_SIZE_X*cellArrayDimX
-        if particlePos[ 1 ] > CELL_ORIGIN_Y + CELL_SIZE_Y*cellArrayDimY:
-            particlePos[ 1 ] -= CELL_SIZE_Y*cellArrayDimY
-        if particlePos[ 2 ] > CELL_ORIGIN_Z + CELL_SIZE_Z*cellArrayDimZ:
-            particlePos[ 2 ] -= CELL_SIZE_Z*cellArrayDimZ
+        return (px, py, pz)  # setting px, py, pz to zero
+
+    def wrapAround(self, particlePos):
+        if particlePos[0] < CELL_ORIGIN_X:
+            particlePos[0] += CELL_SIZE_X * cellArrayDimX
+        if particlePos[1] < CELL_ORIGIN_Y:
+            particlePos[1] += CELL_SIZE_Y * cellArrayDimY
+        if particlePos[2] < CELL_ORIGIN_Z:
+            particlePos[2] += CELL_SIZE_Z * cellArrayDimZ
+
+        if particlePos[0] > CELL_ORIGIN_X + CELL_SIZE_X * cellArrayDimX:
+            particlePos[0] -= CELL_SIZE_X * cellArrayDimX
+        if particlePos[1] > CELL_ORIGIN_Y + CELL_SIZE_Y * cellArrayDimY:
+            particlePos[1] -= CELL_SIZE_Y * cellArrayDimY
+        if particlePos[2] > CELL_ORIGIN_Z + CELL_SIZE_Z * cellArrayDimZ:
+            particlePos[2] -= CELL_SIZE_Z * cellArrayDimZ
         return particlePos
 
-    def createSection( self ):
+    def createSection(self):
         # computeArray is global
-        self.mCastSecProxy = charm.split( computeArray, 1, elems = [ self.computesList ] )[ 0 ]
+        self.mCastSecProxy = charm.split(computeArray, 1, elems=[self.computesList])[0]
 
     @coro
-    def migrateParticles( self ):
-        outgoing = [ [[],[],[]] for _ in range(self.inbrs) ]
+    def migrateParticles(self):
+        outgoing = [[[], [], []] for _ in range(self.inbrs)]
 
         size = numParts = self.particle_mass.size
 
-        for i in range(numParts - 1, -1 -1 ):
-            x1, y1, z1 = self.migrateToCell( self.particle_pos[ i ] )
-            if any( [x1, y1, z1 ] ):
-                outIndex = (x1+KAWAY_X)*NBRS_Y*NBRS_Z + (y1+KAWAY_Y)*NBRS_Z + (z1+KAWAY_Z)
+        for i in range(numParts - 1, -1 - 1):
+            x1, y1, z1 = self.migrateToCell(self.particle_pos[i])
+            if any([x1, y1, z1]):
+                outIndex = (
+                    (x1 + KAWAY_X) * NBRS_Y * NBRS_Z
+                    + (y1 + KAWAY_Y) * NBRS_Z
+                    + (z1 + KAWAY_Z)
+                )
 
                 outgoing[outIndex][0].append(self.particle_mass[i])
-                outgoing[outIndex][1].append(self.wrapAround(self.particle_pos[i].copy()))
+                outgoing[outIndex][1].append(
+                    self.wrapAround(self.particle_pos[i].copy())
+                )
                 outgoing[outIndex][2].append(self.particle_vel[i].copy())
-                self.particle_mass[i] = self.particle_mass[size-1]
-                self.particle_pos[i]  = self.particle_pos[size-1]
-                self.particle_vel[i]  = self.particle_vel[size-1]
+                self.particle_mass[i] = self.particle_mass[size - 1]
+                self.particle_pos[i] = self.particle_pos[size - 1]
+                self.particle_vel[i] = self.particle_vel[size - 1]
                 size -= 1
 
-
         if size < numParts:
             self.particle_mass = self.particle_mass[:size].copy()
-            self.particle_pos  = self.particle_pos[:size].copy()
-            self.particle_vel  = self.particle_vel[:size].copy()
-
+            self.particle_pos = self.particle_pos[:size].copy()
+            self.particle_vel = self.particle_vel[:size].copy()
 
-        for num in range( self.inbrs ):
+        for num in range(self.inbrs):
             numOutgoing = len(outgoing[num][0])
             if numOutgoing > 0:
                 mass = np.array(outgoing[num][0], dtype=np.float64)
-                pos  = np.concatenate(outgoing[num][1])
-                vel  = np.concatenate(outgoing[num][2])
-                self.neighborChannels[ num ].send(True, mass, pos, vel)
+                pos = np.concatenate(outgoing[num][1])
+                vel = np.concatenate(outgoing[num][2])
+                self.neighborChannels[num].send(True, mass, pos, vel)
             else:
-                self.neighborChannels[ num ].send(True, None, None, None)
+                self.neighborChannels[num].send(True, None, None, None)
 
-
-    def sendPositions( self, forceFuture ):
-        self.mCastSecProxy.calculateForces( self.mCastSecProxy,
-                                            np.array(self.thisIndex),
-                                            self.particle_pos, forceFuture
+    def sendPositions(self, forceFuture):
+        self.mCastSecProxy.calculateForces(
+            self.mCastSecProxy, np.array(self.thisIndex), self.particle_pos, forceFuture
         )
 
     def resumeFromSync(self):
-        if not any( self.thisIndex ):
+        if not any(self.thisIndex):
             stepT = time.time()
-            print( f'Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step' )
+            print(f"Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step")
             self.stepTime = stepT
-        self.thisProxy[ self.thisIndex ].run()
-
+        self.thisProxy[self.thisIndex].run()
 
     @coro
-    def run( self ):
+    def run(self):
         if self.stepCount == 0:
             self.reportDuplicates()
             self.createSection()
             self.stepCount = 1
 
         # todo: something not quite right here
-        if not any( self.thisIndex ):
+        if not any(self.thisIndex):
             self.stepTime = time.time()
 
-
-        for self.stepCount in range( self.stepCount, finalStepCount + 1 ):
+        for self.stepCount in range(self.stepCount, finalStepCount + 1):
             reduceForceFuture = Future()
-            self.sendPositions( reduceForceFuture )
+            self.sendPositions(reduceForceFuture)
             forces = reduceForceFuture.get()
-            updateProperties( forces, self.particle_mass, self.particle_vel,
-                              self.particle_pos, self.energy, self.stepCount,
-                              finalStepCount
+            updateProperties(
+                forces,
+                self.particle_mass,
+                self.particle_vel,
+                self.particle_pos,
+                self.energy,
+                self.stepCount,
+                finalStepCount,
             )
 
             if not self.stepCount % MIGRATE_STEPCOUNT:
                 self.migrateParticles()
-                for ch in charm.iwait( self.neighborChannels ):
-                    self.receiveParticles( *ch.recv() )
+                for ch in charm.iwait(self.neighborChannels):
+                    self.receiveParticles(*ch.recv())
 
             # TODO: Add a check to see if load balancing should be done here
-            if self.shouldLoadBalance(): 
+            if self.shouldLoadBalance():
                 self.AtSync()
                 return
 
-            if not any( self.thisIndex ):
+            if not any(self.thisIndex):
                 stepT = time.time()
-                print( f'Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step' )
+                print(
+                    f"Step {self.stepCount} Time {(stepT-self.stepTime)*1000} ms/step"
+                )
                 self.stepTime = stepT
-        self.reduce( self.energyFuture, self.energy, Reducer.sum )
-
-    def shouldLoadBalance( self ):
-        return not any( [ self.stepCount <= firstLdbStep, self.stepCount % ldbPeriod, self.stepCount >= finalStepCount ] )
+        self.reduce(self.energyFuture, self.energy, Reducer.sum)
+
+    def shouldLoadBalance(self):
+        return not any(
+            [
+                self.stepCount <= firstLdbStep,
+                self.stepCount % ldbPeriod,
+                self.stepCount >= finalStepCount,
+            ]
+        )
 
-    def receiveParticles( self, r, mass, poss, vel ):
+    def receiveParticles(self, r, mass, poss, vel):
         if mass is not None:
             total = self.particle_mass.size + mass.size
             self.particle_mass = np.append(self.particle_mass, mass)
-            self.particle_pos  = np.append(self.particle_pos, pos)
-            self.particle_vel  = np.append(self.particle_vel, vel)
+            self.particle_pos = np.append(self.particle_pos, pos)
+            self.particle_vel = np.append(self.particle_vel, vel)
             self.particle_pos.shape = (total, 3)
             self.particle_vel.shape = (total, 3)
 
 
 class Physics:
 
-    @numba.njit( cache = True )
-    def calcPairForces( firstIndex, secondIndex,
-                        firstPos, secondPos,
-                        stepCount,
-                        force1, force2
+    @numba.njit(cache=True)
+    def calcPairForces(
+        firstIndex, secondIndex, firstPos, secondPos, stepCount, force1, force2
     ) -> float:
 
         firstLen = firstPos.shape[0]
@@ -444,102 +496,106 @@ def calcPairForces( firstIndex, secondIndex,
 
         # check for wrap around and adjust locations accordingly
         diff_0, diff_1, diff_2 = 0.0, 0.0, 0.0
-        if abs(firstIndex[0] - secondIndex[0]) > 1 :
+        if abs(firstIndex[0] - secondIndex[0]) > 1:
             diff_0 = CELL_SIZE_X * cellArrayDimX
-            if secondIndex[0] < firstIndex[0] : diff_0 = -1 * diff_0
-        if abs(firstIndex[1] - secondIndex[1]) > 1 :
+            if secondIndex[0] < firstIndex[0]:
+                diff_0 = -1 * diff_0
+        if abs(firstIndex[1] - secondIndex[1]) > 1:
             diff_1 = CELL_SIZE_Y * cellArrayDimY
-            if secondIndex[1] < firstIndex[1] : diff_1 = -1 * diff_1
+            if secondIndex[1] < firstIndex[1]:
+                diff_1 = -1 * diff_1
 
-        if abs(firstIndex[2] - secondIndex[2]) > 1 :
+        if abs(firstIndex[2] - secondIndex[2]) > 1:
             diff_2 = CELL_SIZE_Z * cellArrayDimZ
-            if secondIndex[2] < firstIndex[2] : diff_2 = -1 * diff_2
+            if secondIndex[2] < firstIndex[2]:
+                diff_2 = -1 * diff_2
 
         ptpCutOffSqd = PTP_CUT_OFF * PTP_CUT_OFF
-        powTen = 10.0 ** -10
-        powTwenty = 10.0 ** -20
+        powTen = 10.0**-10
+        powTwenty = 10.0**-20
 
         separation_0, separation_1, separation_2 = 0.0, 0.0, 0.0
         for i1 in range(0, firstLen, BLOCK_SIZE):
             for j1 in range(0, secondLen, BLOCK_SIZE):
-                for i in range(i1, min(i1+BLOCK_SIZE, firstLen)):
-                    for j in range(j1, min(j1+BLOCK_SIZE, secondLen)):
-                        #separation = firstPos[i] - secondPos[j]
-                        separation_0 = firstPos[i,0] + diff_0 - secondPos[j,0]
-                        separation_1 = firstPos[i,1] + diff_1 - secondPos[j,1]
-                        separation_2 = firstPos[i,2] + diff_2 - secondPos[j,2]
+                for i in range(i1, min(i1 + BLOCK_SIZE, firstLen)):
+                    for j in range(j1, min(j1 + BLOCK_SIZE, secondLen)):
+                        # separation = firstPos[i] - secondPos[j]
+                        separation_0 = firstPos[i, 0] + diff_0 - secondPos[j, 0]
+                        separation_1 = firstPos[i, 1] + diff_1 - secondPos[j, 1]
+                        separation_2 = firstPos[i, 2] + diff_2 - secondPos[j, 2]
                         rsqd = separation_0**2 + separation_1**2 + separation_2**2
-                        #rsqd = dot(separation, separation)
+                        # rsqd = dot(separation, separation)
                         if rsqd > 1 and rsqd < ptpCutOffSqd:
                             rsqd = rsqd * powTwenty
                             r = math.sqrt(rsqd)
                             rSix = rsqd * rsqd * rsqd
                             rTwelve = rSix * rSix
-                            f = ( (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix)
+                            f = (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix
                             if doEnergy:
-                                energy += ( VDW_A / rTwelve - VDW_B / rSix)  # in milliJoules
+                                energy += (
+                                    VDW_A / rTwelve - VDW_B / rSix
+                                )  # in milliJoules
                                 fr = f / rsqd
-                                #force = separation * (fr * powTen)
-                                #force1[i] += force
-                                #force2[j] -= force
+                                # force = separation * (fr * powTen)
+                                # force1[i] += force
+                                # force2[j] -= force
                                 force_0 = separation_0 * (fr * powTen)
                                 force_1 = separation_1 * (fr * powTen)
                                 force_2 = separation_2 * (fr * powTen)
-                                force1[i,0] += force_0
-                                force1[i,1] += force_1
-                                force1[i,2] += force_2
-                                force2[j,0] -= force_0
-                                force2[j,1] -= force_1
-                                force2[j,2] -= force_2
+                                force1[i, 0] += force_0
+                                force1[i, 1] += force_1
+                                force1[i, 2] += force_2
+                                force2[j, 0] -= force_0
+                                force2[j, 1] -= force_1
+                                force2[j, 2] -= force_2
 
         return energy
 
-    @numba.njit( cache = True )
-    def calcInternalForces( firstPos, firstIndex, stepCount, force1 ):
+    @numba.njit(cache=True)
+    def calcInternalForces(firstPos, firstIndex, stepCount, force1):
         firstLen = firstPos.shape[0]
         energy = 0.0
         doEnergy = False
-        if (stepCount == 1 or stepCount == finalStepCount):
+        if stepCount == 1 or stepCount == finalStepCount:
             doEnergy = True
 
         ptpCutOffSqd = PTP_CUT_OFF * PTP_CUT_OFF
-        powTen = 10.0 ** -10
-        powTwenty = 10.0 ** -20
+        powTen = 10.0**-10
+        powTwenty = 10.0**-20
         separation_0, separation_1, separation_2 = 0.0, 0.0, 0.0
         force_0, force_1, force_2 = 0.0, 0.0, 0.0
-        for i in range(firstLen) :
-            for j in range(i+1, firstLen) :
+        for i in range(firstLen):
+            for j in range(i + 1, firstLen):
                 # computing base values
-                separation_0 = firstPos[i,0] - firstPos[j,0]
-                separation_1 = firstPos[i,1] - firstPos[j,1]
-                separation_2 = firstPos[i,2] - firstPos[j,2]
+                separation_0 = firstPos[i, 0] - firstPos[j, 0]
+                separation_1 = firstPos[i, 1] - firstPos[j, 1]
+                separation_2 = firstPos[i, 2] - firstPos[j, 2]
                 rsqd = separation_0**2 + separation_1**2 + separation_2**2
                 if rsqd > 1 and rsqd < ptpCutOffSqd:
                     rsqd = rsqd * powTwenty
                     r = math.sqrt(rsqd)
                     rSix = rsqd * rsqd * rsqd
                     rTwelve = rSix * rSix
-                    f = ( (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix)
-                    if(doEnergy) :
-                        energy += ( VDW_A / rTwelve - VDW_B / rSix)
+                    f = (12 * VDW_A) / rTwelve - (6 * VDW_B) / rSix
+                    if doEnergy:
+                        energy += VDW_A / rTwelve - VDW_B / rSix
 
                     fr = f / rsqd
                     force_0 = separation_0 * (fr * powTen)
                     force_1 = separation_1 * (fr * powTen)
                     force_2 = separation_2 * (fr * powTen)
-                    force1[i,0] += force_0
-                    force1[i,1] += force_1
-                    force1[i,2] += force_2
-                    force1[j,0] -= force_0
-                    force1[j,1] -= force_1
-                    force1[j,2] -= force_2
+                    force1[i, 0] += force_0
+                    force1[i, 1] += force_1
+                    force1[i, 2] += force_2
+                    force1[j, 0] -= force_0
+                    force1[j, 1] -= force_1
+                    force1[j, 2] -= force_2
 
         return energy
 
 
-
-class Compute( Chare ):
-    def __init__( self, energySumFuture = None ):
+class Compute(Chare):
+    def __init__(self, energySumFuture=None):
         self.energy = np.zeros(2, dtype=np.float64)
         self.stepCount = 1
         self.energySumFuture = energySumFuture
@@ -549,41 +605,51 @@ def __init__( self, energySumFuture = None ):
         self.isDuplicate = False
         self._self_compute = None
 
-
-    def isSelfCompute( self ):
+    def isSelfCompute(self):
         if self._self_compute is None:
-            conds = [ self.thisIndex[ x ] == self.thisIndex[ x + 3 ] for x in range( len( self.thisIndex ) // 2 ) ]
-            self._self_compute = all( conds )
+            conds = [
+                self.thisIndex[x] == self.thisIndex[x + 3]
+                for x in range(len(self.thisIndex) // 2)
+            ]
+            self._self_compute = all(conds)
         return self._self_compute
 
-
     def setDuplicate(self):
         self.isDuplicate = True
 
-    def calculateForces( self, secProxy, senderCoords, forces, doneFut ):
+    def calculateForces(self, secProxy, senderCoords, forces, doneFut):
         if self.isSelfCompute():
-            self.selfInteract( secProxy, senderCoords, forces, doneFut )
+            self.selfInteract(secProxy, senderCoords, forces, doneFut)
             self.stepCount += 1
         else:
-            self.dataReceived.append( [ secProxy, senderCoords, forces, doneFut ] )
-            assert len( self.dataReceived ) < 3
+            self.dataReceived.append([secProxy, senderCoords, forces, doneFut])
+            assert len(self.dataReceived) < 3
 
             if self.isDuplicate:
                 # Not all neighbors are unique, we treat the duplicates as
                 # self interactions, but we have to receive both duplicates.
-                self.selfInteract( secProxy, senderCoords, forces, doneFut )
+                self.selfInteract(secProxy, senderCoords, forces, doneFut)
                 self.dataReceived = list()
-            elif len( self.dataReceived ) == 2:
-                redProxy1, coords1, forces1, doneFut1 = self.dataReceived[ 0 ]
-                redProxy2, coords2, forces2, doneFut2 = self.dataReceived[ 1 ]
-                self.thisProxy[self.thisIndex].interact( redProxy1, coords1, forces1, doneFut1, redProxy2, coords2, forces2, doneFut2 )
+            elif len(self.dataReceived) == 2:
+                redProxy1, coords1, forces1, doneFut1 = self.dataReceived[0]
+                redProxy2, coords2, forces2, doneFut2 = self.dataReceived[1]
+                self.thisProxy[self.thisIndex].interact(
+                    redProxy1,
+                    coords1,
+                    forces1,
+                    doneFut1,
+                    redProxy2,
+                    coords2,
+                    forces2,
+                    doneFut2,
+                )
                 self.dataReceived = list()
             self.stepCount += 1
 
         if self.stepCount > finalStepCount:
             # Everything done, reduction on potential energy
-            assert len( self.energy ) == 2
-            self.reduce( self.energySumFuture, self.energy, Reducer.sum )
+            assert len(self.energy) == 2
+            self.reduce(self.energySumFuture, self.energy, Reducer.sum)
 
         # TODO: Add a check to see if load balancing should be done here
         if self.stepCount > firstLdbStep and not self.stepCount % ldbPeriod:
@@ -594,131 +660,135 @@ def resumeFromSync(self):
         # Still, this method must exist in the chare
         pass
 
-    def selfInteract( self, mcast1, senderCoords, msg, doneFuture ):
+    def selfInteract(self, mcast1, senderCoords, msg, doneFuture):
         energyP: float = 0
 
-        force1 = np.zeros( (len(msg),3), dtype = np.float64 )
+        force1 = np.zeros((len(msg), 3), dtype=np.float64)
 
-        energyP = Physics.calcInternalForces( msg, senderCoords, self.stepCount, force1 )
+        energyP = Physics.calcInternalForces(msg, senderCoords, self.stepCount, force1)
 
         if self.stepCount == 1:
-            self.energy[ 0 ] = energyP
+            self.energy[0] = energyP
         elif self.stepCount == finalStepCount:
-            self.energy[ 1 ] = energyP
+            self.energy[1] = energyP
 
-        self.contribute( force1, Reducer.sum, doneFuture, mcast1 )
+        self.contribute(force1, Reducer.sum, doneFuture, mcast1)
 
-    def setReductionClient( self, proxy, method ):
+    def setReductionClient(self, proxy, method):
         self.reductionClientProxy = proxy
         self.reductionClientMethod = method
-        self.reductionClientFn = getattr( proxy, method )
+        self.reductionClientFn = getattr(proxy, method)
 
-    def interact( self, mcast1, coords1, msg1, doneFut1,
-                  mcast2, coords2, msg2, doneFut2
+    def interact(
+        self, mcast1, coords1, msg1, doneFut1, mcast2, coords2, msg2, doneFut2
     ):
         x1, y1, z1 = coords1
         x2, y2, z2 = coords1
         doSwap = False
-        if x2 * cellArrayDimY * cellArrayDimZ + y2 * cellArrayDimZ + z2 < \
-           x1 * cellArrayDimY * cellArrayDimZ + y1 * cellArrayDimZ + z1:
+        if (
+            x2 * cellArrayDimY * cellArrayDimZ + y2 * cellArrayDimZ + z2
+            < x1 * cellArrayDimY * cellArrayDimZ + y1 * cellArrayDimZ + z1
+        ):
             mcast1, mcast2 = mcast2, mcast1
             doneFut1, doneFut2 = doneFut2, doneFut1
             doSwap = True
 
         # unpacking arguments so they can be sent to the numba calcPairForces
-        force1 = np.zeros( ( len(msg1), 3 ), dtype = np.float64 )
-        force2 = np.zeros( ( len(msg2), 3 ), dtype = np.float64 )
-        energyP = Physics.calcPairForces( coords1, coords2,
-                                          msg1,
-                                          msg2,
-                                          self.stepCount,
-                                          force1,
-                                          force2
+        force1 = np.zeros((len(msg1), 3), dtype=np.float64)
+        force2 = np.zeros((len(msg2), 3), dtype=np.float64)
+        energyP = Physics.calcPairForces(
+            coords1, coords2, msg1, msg2, self.stepCount, force1, force2
         )
 
         if doSwap:
             force1, force2 = force2, force1
 
         if self.stepCount == 1:
-            self.energy[ 0 ] = energyP
+            self.energy[0] = energyP
         elif self.stepCount == finalStepCount:
-            self.energy[ 1 ] = energyP
+            self.energy[1] = energyP
 
-        self.reduce( doneFut1, force1, Reducer.sum, mcast1 )
-        self.reduce( doneFut2, force2, Reducer.sum, mcast2 )
+        self.reduce(doneFut1, force1, Reducer.sum, mcast1)
+        self.reduce(doneFut2, force2, Reducer.sum, mcast2)
 
 
-def energySum( startEnergy, endEnergy ):
+def energySum(startEnergy, endEnergy):
     iE1, fE1 = startEnergy
     iE2, fE2 = endEnergy
-    if abs( fE1 + fE2 - iE1 - iE2 ) > ENERGY_VAR:
-        print( f'Energy value has varied significantly from {iE1+iE2} to {fE1 + fE2}' )
+    if abs(fE1 + fE2 - iE1 - iE2) > ENERGY_VAR:
+        print(f"Energy value has varied significantly from {iE1+iE2} to {fE1 + fE2}")
     else:
-        print( 'Energy conservation test passed for maximum allowed variation of '
-               f'{ENERGY_VAR} units. \nSIMULATION SUCCESSFUL'
+        print(
+            "Energy conservation test passed for maximum allowed variation of "
+            f"{ENERGY_VAR} units. \nSIMULATION SUCCESSFUL"
         )
 
 
-def main( args ):
-    print( 'LENNARD JONES MOLECULAR DYNAMICS START UP...' )
-    Chare( Compute )
+def main(args):
+    print("LENNARD JONES MOLECULAR DYNAMICS START UP...")
+    Chare(Compute)
 
-    if len( args ) != 7:
-        print( 'USAGE python3 -m charmrun.start +p<NProcs> dimX dimY dimZ steps firstLBstep LBPeriod' )
+    if len(args) != 7:
+        print(
+            "USAGE python3 -m charmrun.start +p<NProcs> dimX dimY dimZ steps firstLBstep LBPeriod"
+        )
         exit()
 
     globs = GlobalDefs()
 
-    dimX, dimY, dimZ = [ int( x ) for x in args[ 1:4 ] ]
+    dimX, dimY, dimZ = [int(x) for x in args[1:4]]
     globs.cellArrayDimX, globs.cellArrayDimY, globs.cellArrayDimZ = dimX, dimY, dimZ
-    steps = int( args[ 4 ] )
+    steps = int(args[4])
     globs.finalStepCount = steps
-    globs.firstLdbStep = int( args[ 5 ] )
-    globs.lbPeriod = int( args[ 6 ] )
+    globs.firstLdbStep = int(args[5])
+    globs.lbPeriod = int(args[6])
 
-    print( f'Cell Array Dimension X: {dimX} Y: {dimY} Z: {dimZ} '
-           f'of size {globs.CELL_SIZE_X} {globs.CELL_SIZE_Y} {globs.CELL_SIZE_Z}'
+    print(
+        f"Cell Array Dimension X: {dimX} Y: {dimY} Z: {dimZ} "
+        f"of size {globs.CELL_SIZE_X} {globs.CELL_SIZE_Y} {globs.CELL_SIZE_Z}"
     )
-    print( f'Final Step Count: {steps}' )
-    print( f'First LB Step: {globs.firstLdbStep}' )
-    print( f'LB Period: {globs.lbPeriod}' )
+    print(f"Final Step Count: {steps}")
+    print(f"First LB Step: {globs.firstLdbStep}")
+    print(f"LB Period: {globs.lbPeriod}")
 
-    charm.thisProxy.updateGlobals( globs.__dict__, awaitable = True ).get()
+    charm.thisProxy.updateGlobals(globs.__dict__, awaitable=True).get()
 
     doneFuture = Future()
 
     # 2, one for start energy and one for end energy
-    energyFuture = Future( 2 )
+    energyFuture = Future(2)
 
-    cellMap = Group( CellMap, args = ( dimX, dimY, dimZ ) )
-    globs.cellArray = Array( Cell, ( dimX, dimY, dimZ ), map = cellMap, args = [ energyFuture ], useAtSync = True )
-    globs.computeArray = Array( Compute, ndims = 6 )
-    charm.thisProxy.updateGlobals( globs.__dict__, awaitable = True ).get()
-    globs.cellArray.createComputes( awaitable = True ).get()
-    charm.thisProxy.updateGlobals( globs.__dict__, awaitable = True ).get()
+    cellMap = Group(CellMap, args=(dimX, dimY, dimZ))
+    globs.cellArray = Array(
+        Cell, (dimX, dimY, dimZ), map=cellMap, args=[energyFuture], useAtSync=True
+    )
+    globs.computeArray = Array(Compute, ndims=6)
+    charm.thisProxy.updateGlobals(globs.__dict__, awaitable=True).get()
+    globs.cellArray.createComputes(awaitable=True).get()
+    charm.thisProxy.updateGlobals(globs.__dict__, awaitable=True).get()
 
-    print( f'Cells: {globs.cellArrayDimY} X {globs.cellArrayDimY} X {globs.cellArrayDimZ} .... created' )
+    print(
+        f"Cells: {globs.cellArrayDimY} X {globs.cellArrayDimY} X {globs.cellArrayDimZ} .... created"
+    )
 
     computeArray.ckDoneInserting()
 
-    nComputes = (NUM_NEIGHBORS//2+1) * \
-                cellArrayDimX*cellArrayDimY*cellArrayDimZ
-    print(f"Computes: {nComputes} .... created\n" )
+    nComputes = (NUM_NEIGHBORS // 2 + 1) * cellArrayDimX * cellArrayDimY * cellArrayDimZ
+    print(f"Computes: {nComputes} .... created\n")
     print("Starting simulation .... \n\n")
 
     startBenchmarkTime = time.time()
 
-
     cellArray.run()
     starting, ending = energyFuture.get()
 
-    energySum( starting, ending )
+    energySum(starting, ending)
 
     endBenchmarkTime = time.time()
 
-    print( f'Total application time: {endBenchmarkTime - startBenchmarkTime}' )
+    print(f"Total application time: {endBenchmarkTime - startBenchmarkTime}")
     exit()
 
 
-if __name__ == '__main__':
-    charm.start( main )
+if __name__ == "__main__":
+    charm.start(main)
diff --git a/examples/miniapps/MiniWeather/constants.py b/examples/miniapps/MiniWeather/constants.py
index 8ebfc933..32e59f97 100644
--- a/examples/miniapps/MiniWeather/constants.py
+++ b/examples/miniapps/MiniWeather/constants.py
@@ -1,37 +1,73 @@
 import numpy as np
 
-pi        = 3.14159265358979323846264338327;   #Pi
-grav      = 9.8;                               #Gravitational acceleration (m / s^2)
-cp        = 1004.;                             #Specific heat of dry air at constant pressure
-cv        = 717.;                              #Specific heat of dry air at constant volume
-rd        = 287.;                              #Dry air constant for equation of state (P=rho*rd*T)
-p0        = 1.e5;                              #Standard pressure at the surface in Pascals
-C0        = 27.5629410929725921310572974482;   #Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
-gamm      = 1.40027894002789400278940027894;   #gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
-#Define domain and stability-related constants
-xlen      = 2.e4;    #Length of the domain in the x-direction (meters)
-zlen      = 1.e4;    #Length of the domain in the z-direction (meters)
-hv_beta   = 0.25;     #How strong to diffuse the solution: hv_beta \in [0:1]
-cfl       = 1.50;    #"Courant, Friedrichs, Lewy" number (for numerical stability)
-max_speed = 450;        #Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
-hs        = 2;          #"Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
-sten_size = 4;          #Size of the stencil used for interpolation
+pi = 3.14159265358979323846264338327
+# Pi
+grav = 9.8
+# Gravitational acceleration (m / s^2)
+cp = 1004.0
+# Specific heat of dry air at constant pressure
+cv = 717.0
+# Specific heat of dry air at constant volume
+rd = 287.0
+# Dry air constant for equation of state (P=rho*rd*T)
+p0 = 1.0e5
+# Standard pressure at the surface in Pascals
+C0 = 27.5629410929725921310572974482
+# Constant to translate potential temperature into pressure (P=C0*(rho*theta)**gamma)
+gamm = 1.40027894002789400278940027894
+# gamma=cp/Rd , have to call this gamm because "gamma" is taken (I hate C so much)
+# Define domain and stability-related constants
+xlen = 2.0e4
+# Length of the domain in the x-direction (meters)
+zlen = 1.0e4
+# Length of the domain in the z-direction (meters)
+hv_beta = 0.25
+# How strong to diffuse the solution: hv_beta \in [0:1]
+cfl = 1.50
+# "Courant, Friedrichs, Lewy" number (for numerical stability)
+max_speed = 450
+# Assumed maximum wave speed during the simulation (speed of sound + speed of wind) (meter / sec)
+hs = 2
+# "Halo" size: number of cells beyond the MPI tasks's domain needed for a full "stencil" of information for reconstruction
+sten_size = 4
+# Size of the stencil used for interpolation
 
 # Parameters for indexing and flags
-NUM_VARS = 4;           #Number of fluid state variables
-ID_DENS  = 0;           #index for density ("rho")
-ID_UMOM  = 1;           #index for momentum in the x-direction ("rho * u")
-ID_WMOM  = 2;           #index for momentum in the z-direction ("rho * w")
-ID_RHOT  = 3;           #index for density * potential temperature ("rho * theta")
-DIR_X = 1;              #Integer constant to express that this operation is in the x-direction
-DIR_Z = 2;              #Integer constant to express that this operation is in the z-direction
-DATA_SPEC_COLLISION       = 1;
-DATA_SPEC_THERMAL         = 2;
-DATA_SPEC_MOUNTAIN        = 3;
-DATA_SPEC_TURBULENCE      = 4;
-DATA_SPEC_DENSITY_CURRENT = 5;
-DATA_SPEC_INJECTION       = 6;
+NUM_VARS = 4
+# Number of fluid state variables
+ID_DENS = 0
+# index for density ("rho")
+ID_UMOM = 1
+# index for momentum in the x-direction ("rho * u")
+ID_WMOM = 2
+# index for momentum in the z-direction ("rho * w")
+ID_RHOT = 3
+# index for density * potential temperature ("rho * theta")
+DIR_X = 1
+# Integer constant to express that this operation is in the x-direction
+DIR_Z = 2
+# Integer constant to express that this operation is in the z-direction
+DATA_SPEC_COLLISION = 1
+DATA_SPEC_THERMAL = 2
+DATA_SPEC_MOUNTAIN = 3
+DATA_SPEC_TURBULENCE = 4
+DATA_SPEC_DENSITY_CURRENT = 5
+DATA_SPEC_INJECTION = 6
 
-nqpoints = 3;
-qpoints = np.array([0.112701665379258311482073460022E0 , 0.500000000000000000000000000000E0 , 0.887298334620741688517926539980E0], dtype=np.float64)
-qweights = np.array([0.277777777777777777777777777779E0 , 0.444444444444444444444444444444E0 , 0.277777777777777777777777777779E0], dtype=np.float64)
\ No newline at end of file
+nqpoints = 3
+qpoints = np.array(
+    [
+        0.112701665379258311482073460022e0,
+        0.500000000000000000000000000000e0,
+        0.887298334620741688517926539980e0,
+    ],
+    dtype=np.float64,
+)
+qweights = np.array(
+    [
+        0.277777777777777777777777777779e0,
+        0.444444444444444444444444444444e0,
+        0.277777777777777777777777777779e0,
+    ],
+    dtype=np.float64,
+)
diff --git a/examples/miniapps/MiniWeather/create_visualization.py b/examples/miniapps/MiniWeather/create_visualization.py
index 51738086..890b6dfa 100644
--- a/examples/miniapps/MiniWeather/create_visualization.py
+++ b/examples/miniapps/MiniWeather/create_visualization.py
@@ -6,6 +6,7 @@
 import glob
 import re
 
+
 def create_gif(input_dir, output_gif_filename, qoi_index):
     """
     Creates a GIF from .npz simulation output files from multiple chares.
@@ -15,176 +16,231 @@ def create_gif(input_dir, output_gif_filename, qoi_index):
         output_gif_filename (str): Name of the output GIF file.
         qoi_index (int): Index of the Quantity of Interest to visualize.
     """
-    
+
     search_pattern = os.path.join(input_dir, "data_iter_*_chare_*.npz")
     all_npz_files = sorted(glob.glob(search_pattern))
 
     if not all_npz_files:
-        print(f"No .npz files found in {input_dir} matching the pattern {search_pattern}")
+        print(
+            f"No .npz files found in {input_dir} matching the pattern {search_pattern}"
+        )
         return
     print(f"Found {len(all_npz_files)} total chare .npz files to process.")
 
-    iteration_files_metadata = {} 
+    iteration_files_metadata = {}
     filename_pattern = re.compile(r"data_iter_(\d+)_chare_(\d+)_(\d+)\.npz")
 
     for file_path in all_npz_files:
         basename = os.path.basename(file_path)
         match = filename_pattern.match(basename)
         if not match:
-            print(f"Warning: Filename {basename} does not match expected pattern data_iter_XXXXXX_chare_YYY_ZZZ.npz. Skipping.")
+            print(
+                f"Warning: Filename {basename} does not match expected pattern data_iter_XXXXXX_chare_YYY_ZZZ.npz. Skipping."
+            )
             continue
-        
+
         iter_num = int(match.group(1))
 
         try:
             with np.load(file_path) as data_archive:
-                required_keys = ['state', 'etime', 'chare_nx', 'chare_i_beg', 'chare_nz', 'chare_k_beg']
+                required_keys = [
+                    "state",
+                    "etime",
+                    "chare_nx",
+                    "chare_i_beg",
+                    "chare_nz",
+                    "chare_k_beg",
+                ]
                 if not all(key in data_archive for key in required_keys):
-                    print(f"Warning: File {file_path} is missing one or more required keys ({', '.join(required_keys)}). Skipping.")
+                    print(
+                        f"Warning: File {file_path} is missing one or more required keys ({', '.join(required_keys)}). Skipping."
+                    )
                     continue
 
                 meta = {
-                    'path': file_path,
-                    'etime': float(data_archive['etime']),
-                    'chare_nx': int(data_archive['chare_nx']),
-                    'chare_i_beg': int(data_archive['chare_i_beg']),
-                    'chare_nz': int(data_archive['chare_nz']),
-                    'chare_k_beg': int(data_archive['chare_k_beg'])
+                    "path": file_path,
+                    "etime": float(data_archive["etime"]),
+                    "chare_nx": int(data_archive["chare_nx"]),
+                    "chare_i_beg": int(data_archive["chare_i_beg"]),
+                    "chare_nz": int(data_archive["chare_nz"]),
+                    "chare_k_beg": int(data_archive["chare_k_beg"]),
                 }
-            
+
             if iter_num not in iteration_files_metadata:
                 iteration_files_metadata[iter_num] = []
             iteration_files_metadata[iter_num].append(meta)
         except Exception as e:
             print(f"Could not load metadata from {file_path}: {e}")
             continue
-            
+
     if not iteration_files_metadata:
         print("No valid iteration data could be processed from file metadata.")
         return
 
     sorted_iter_nums = sorted(iteration_files_metadata.keys())
-    
-    reconstructed_frames_info = [] 
+
+    reconstructed_frames_info = []
     num_vars_global = None
 
     print("Reconstructing data for each iteration...")
     for iter_idx, iter_num in enumerate(sorted_iter_nums):
         chare_metas_for_iter = iteration_files_metadata[iter_num]
-        if not chare_metas_for_iter: continue
+        if not chare_metas_for_iter:
+            continue
 
         current_global_nx = 0
         current_global_nz = 0
-        sim_time_for_iter = chare_metas_for_iter[0]['etime'] 
-        
+        sim_time_for_iter = chare_metas_for_iter[0]["etime"]
+
         temp_chare_data_for_iter = []
 
         valid_iter = True
         for chare_meta in chare_metas_for_iter:
-            current_global_nx = max(current_global_nx, chare_meta['chare_i_beg'] + chare_meta['chare_nx'])
-            current_global_nz = max(current_global_nz, chare_meta['chare_k_beg'] + chare_meta['chare_nz'])
-            
+            current_global_nx = max(
+                current_global_nx, chare_meta["chare_i_beg"] + chare_meta["chare_nx"]
+            )
+            current_global_nz = max(
+                current_global_nz, chare_meta["chare_k_beg"] + chare_meta["chare_nz"]
+            )
+
             try:
-                with np.load(chare_meta['path']) as data_archive:
-                    state_data_chare = data_archive['state']
-                
+                with np.load(chare_meta["path"]) as data_archive:
+                    state_data_chare = data_archive["state"]
+
                 if num_vars_global is None:
                     num_vars_global = state_data_chare.shape[0]
                     if qoi_index >= num_vars_global:
-                        print(f"Error: QoI index {qoi_index} is out of bounds for data (num_vars={num_vars_global}). Max valid QoI index is {num_vars_global - 1}.")
-                        return # Critical error, stop processing
+                        print(
+                            f"Error: QoI index {qoi_index} is out of bounds for data (num_vars={num_vars_global}). Max valid QoI index is {num_vars_global - 1}."
+                        )
+                        return  # Critical error, stop processing
                 elif state_data_chare.shape[0] != num_vars_global:
-                    print(f"Warning: Inconsistent number of variables in {chare_meta['path']} ({state_data_chare.shape[0]} vs {num_vars_global}). Skipping iteration {iter_num}.")
+                    print(
+                        f"Warning: Inconsistent number of variables in {chare_meta['path']} ({state_data_chare.shape[0]} vs {num_vars_global}). Skipping iteration {iter_num}."
+                    )
                     valid_iter = False
-                    break 
-                
-                temp_chare_data_for_iter.append({**chare_meta, 'state': state_data_chare})
+                    break
+
+                temp_chare_data_for_iter.append(
+                    {**chare_meta, "state": state_data_chare}
+                )
 
             except Exception as e:
-                print(f"Could not load state from {chare_meta['path']} for iter {iter_num}: {e}")
+                print(
+                    f"Could not load state from {chare_meta['path']} for iter {iter_num}: {e}"
+                )
                 valid_iter = False
                 break
-        
+
         if not valid_iter or not temp_chare_data_for_iter:
-            print(f"Warning: Skipping iteration {iter_num} due to data loading issues or inconsistencies.")
+            print(
+                f"Warning: Skipping iteration {iter_num} due to data loading issues or inconsistencies."
+            )
             continue
-        
-        if num_vars_global is None: # Should be set if at least one chare was processed
-            print(f"Warning: Number of variables could not be determined for iteration {iter_num}. Skipping.")
+
+        if num_vars_global is None:  # Should be set if at least one chare was processed
+            print(
+                f"Warning: Number of variables could not be determined for iteration {iter_num}. Skipping."
+            )
             continue
 
-        full_state_np = np.zeros((num_vars_global, current_global_nz, current_global_nx), dtype=np.float64)
-            
+        full_state_np = np.zeros(
+            (num_vars_global, current_global_nz, current_global_nx), dtype=np.float64
+        )
+
         for data_loaded in temp_chare_data_for_iter:
-            s = data_loaded['state']
-            i_beg, i_len = data_loaded['chare_i_beg'], data_loaded['chare_nx']
-            k_beg, k_len = data_loaded['chare_k_beg'], data_loaded['chare_nz']
-            full_state_np[:, k_beg:k_beg+k_len, i_beg:i_beg+i_len] = s
-            
+            s = data_loaded["state"]
+            i_beg, i_len = data_loaded["chare_i_beg"], data_loaded["chare_nx"]
+            k_beg, k_len = data_loaded["chare_k_beg"], data_loaded["chare_nz"]
+            full_state_np[:, k_beg : k_beg + k_len, i_beg : i_beg + i_len] = s
+
         qoi_slice = full_state_np[qoi_index, :, :]
-        reconstructed_frames_info.append({
-            'iter_num': iter_num,
-            'sim_time': sim_time_for_iter,
-            'qoi_data': qoi_slice 
-        })
-        if (iter_idx + 1) % 10 == 0 or (iter_idx + 1) == len(sorted_iter_nums) or len(sorted_iter_nums) < 10 :
-             print(f"  Reconstructed data for iteration {iter_num} ({iter_idx+1}/{len(sorted_iter_nums)})")
+        reconstructed_frames_info.append(
+            {"iter_num": iter_num, "sim_time": sim_time_for_iter, "qoi_data": qoi_slice}
+        )
+        if (
+            (iter_idx + 1) % 10 == 0
+            or (iter_idx + 1) == len(sorted_iter_nums)
+            or len(sorted_iter_nums) < 10
+        ):
+            print(
+                f"  Reconstructed data for iteration {iter_num} ({iter_idx+1}/{len(sorted_iter_nums)})"
+            )
 
     if not reconstructed_frames_info:
         print("No simulation frames could be reconstructed. GIF creation aborted.")
         return
-        
+
     vmin, vmax = None, None
     print("Determining color scale from reconstructed data...")
     for i, frame_info in enumerate(reconstructed_frames_info):
-        qoi_data = frame_info['qoi_data']
+        qoi_data = frame_info["qoi_data"]
         if i == 0:
             vmin = np.min(qoi_data)
             vmax = np.max(qoi_data)
         else:
             vmin = min(vmin, np.min(qoi_data))
             vmax = max(vmax, np.max(qoi_data))
-            
+
     if vmin is None or vmax is None:
-        print("Could not determine color scale from reconstructed data. No valid data files processed or QoI data was empty.")
+        print(
+            "Could not determine color scale from reconstructed data. No valid data files processed or QoI data was empty."
+        )
         return
-        
+
     print(f"Color scale determined: vmin={vmin:.2e}, vmax={vmax:.2e}")
 
     images = []
     print("Generating images for GIF...")
     for i, frame_info in enumerate(reconstructed_frames_info):
-        if (i + 1) % 10 == 0 or (i + 1) == len(reconstructed_frames_info) or len(reconstructed_frames_info) < 10:
-            print(f"Processing frame {i+1}/{len(reconstructed_frames_info)} for iter {frame_info['iter_num']}")
-        
-        qoi_data_to_plot = frame_info['qoi_data']
-        sim_time = frame_info['sim_time']
-        iter_num_for_title = frame_info['iter_num']
+        if (
+            (i + 1) % 10 == 0
+            or (i + 1) == len(reconstructed_frames_info)
+            or len(reconstructed_frames_info) < 10
+        ):
+            print(
+                f"Processing frame {i+1}/{len(reconstructed_frames_info)} for iter {frame_info['iter_num']}"
+            )
+
+        qoi_data_to_plot = frame_info["qoi_data"]
+        sim_time = frame_info["sim_time"]
+        iter_num_for_title = frame_info["iter_num"]
 
         nz_dim, nx_dim = qoi_data_to_plot.shape
         aspect_ratio = nx_dim / nz_dim if nz_dim > 0 else 1.0
 
-        base_fig_height = 5 
+        base_fig_height = 5
         fig_width = base_fig_height * aspect_ratio
-        max_fig_width = 10 
+        max_fig_width = 10
         if fig_width > max_fig_width:
             fig_width = max_fig_width
-            base_fig_height = fig_width / aspect_ratio if aspect_ratio > 0 else base_fig_height
+            base_fig_height = (
+                fig_width / aspect_ratio if aspect_ratio > 0 else base_fig_height
+            )
 
         fig, ax = plt.subplots(figsize=(fig_width, base_fig_height))
-        im = ax.imshow(qoi_data_to_plot, aspect='auto', origin='lower', cmap='viridis', vmin=vmin, vmax=vmax)
+        im = ax.imshow(
+            qoi_data_to_plot,
+            aspect="auto",
+            origin="lower",
+            cmap="viridis",
+            vmin=vmin,
+            vmax=vmax,
+        )
         plt.colorbar(im, ax=ax, label=f"QoI {qoi_index}")
-        
-        ax.set_title(f"Sim Time: {sim_time:.3f}s (Iter: {iter_num_for_title}) - QoI {qoi_index}")
+
+        ax.set_title(
+            f"Sim Time: {sim_time:.3f}s (Iter: {iter_num_for_title}) - QoI {qoi_index}"
+        )
         ax.set_xlabel("Global X-index")
         ax.set_ylabel("Global Z-index")
 
         fig.canvas.draw()
-        buf = fig.canvas.buffer_rgba() 
+        buf = fig.canvas.buffer_rgba()
         image_rgba = np.frombuffer(buf, dtype=np.uint8)
-        canvas_width, canvas_height = fig.canvas.get_width_height() 
-        image_rgba = image_rgba.reshape(canvas_height, canvas_width, 4) 
-        images.append(image_rgba[:, :, :3]) 
+        canvas_width, canvas_height = fig.canvas.get_width_height()
+        image_rgba = image_rgba.reshape(canvas_height, canvas_width, 4)
+        images.append(image_rgba[:, :, :3])
 
         plt.close(fig)
 
@@ -199,16 +255,33 @@ def create_gif(input_dir, output_gif_filename, qoi_index):
     except Exception as e:
         print(f"Error saving GIF: {e}")
 
+
 if __name__ == "__main__":
-    parser = argparse.ArgumentParser(description="Create a GIF from MiniWeather multi-chare simulation output .npz files.")
-    parser.add_argument("input_dir", type=str, help="Directory containing the .npz simulation output files (e.g., data_iter_*_chare_*.npz).")
-    parser.add_argument("--out", type=str, default="simulation_qoi0.gif", help="Output GIF filename (default: simulation_qoi0.gif).")
-    parser.add_argument("--qoi", type=int, default=0, help="Index of the Quantity of Interest to visualize (default: 0, e.g., density).")
-    
+    parser = argparse.ArgumentParser(
+        description="Create a GIF from MiniWeather multi-chare simulation output .npz files."
+    )
+    parser.add_argument(
+        "input_dir",
+        type=str,
+        help="Directory containing the .npz simulation output files (e.g., data_iter_*_chare_*.npz).",
+    )
+    parser.add_argument(
+        "--out",
+        type=str,
+        default="simulation_qoi0.gif",
+        help="Output GIF filename (default: simulation_qoi0.gif).",
+    )
+    parser.add_argument(
+        "--qoi",
+        type=int,
+        default=0,
+        help="Index of the Quantity of Interest to visualize (default: 0, e.g., density).",
+    )
+
     args = parser.parse_args()
 
     output_filename = args.out
-    if args.qoi != 0 and args.out == "simulation_qoi0.gif": 
+    if args.qoi != 0 and args.out == "simulation_qoi0.gif":
         output_filename = f"simulation_qoi{args.qoi}.gif"
-        
-    create_gif(args.input_dir, output_filename, args.qoi) 
\ No newline at end of file
+
+    create_gif(args.input_dir, output_filename, args.qoi)
diff --git a/examples/miniapps/MiniWeather/kernels.py b/examples/miniapps/MiniWeather/kernels.py
index bc4c7ad9..1d3d850a 100644
--- a/examples/miniapps/MiniWeather/kernels.py
+++ b/examples/miniapps/MiniWeather/kernels.py
@@ -12,16 +12,17 @@ def hydro_const_theta(z):
     z is the input coordinate
     Returns r and t, the background hydrostatic density and potential temperature
     """
-    theta0 = 300.  # Background potential temperature
-    exner0 = 1.    # Surface-level Exner pressure
+    theta0 = 300.0  # Background potential temperature
+    exner0 = 1.0  # Surface-level Exner pressure
     # Establish hydrostatic balance first using Exner pressure
-    t = theta0                                  # Potential Temperature at z
-    exner = exner0 - grav * z / (cp * theta0)   # Exner pressure at z
-    p = p0 * (exner**(cp/rd))                 # Pressure at z
-    rt = (p / C0)**(1. / gamm)             # rho*theta at z
-    r = rt / t                                  # Density at z
+    t = theta0  # Potential Temperature at z
+    exner = exner0 - grav * z / (cp * theta0)  # Exner pressure at z
+    p = p0 * (exner ** (cp / rd))  # Pressure at z
+    rt = (p / C0) ** (1.0 / gamm)  # rho*theta at z
+    r = rt / t  # Density at z
     return r, t
 
+
 @numba.jit(nopython=True)
 def hydro_const_bvfreq(z, bv_freq0):
     """
@@ -30,15 +31,18 @@ def hydro_const_bvfreq(z, bv_freq0):
     bv_freq0 is the constant Brunt-Vaisala frequency
     Returns r and t, the background hydrostatic density and potential temperature
     """
-    theta0 = 300.  # Background potential temperature
-    exner0 = 1.    # Surface-level Exner pressure
-    t = theta0 * np.exp( bv_freq0*bv_freq0 / grav * z )                                    # Pot temp at z
-    exner = exner0 - grav*grav / (cp * bv_freq0*bv_freq0) * (t - theta0) / (t * theta0) # Exner pressure at z
-    p = p0 * (exner**(cp/rd))                                                         # Pressure at z
-    rt = (p / C0)**(1. / gamm)                                                  # rho*theta at z
-    r = rt / t                                                                          # Density at z
+    theta0 = 300.0  # Background potential temperature
+    exner0 = 1.0  # Surface-level Exner pressure
+    t = theta0 * np.exp(bv_freq0 * bv_freq0 / grav * z)  # Pot temp at z
+    exner = exner0 - grav * grav / (cp * bv_freq0 * bv_freq0) * (t - theta0) / (
+        t * theta0
+    )  # Exner pressure at z
+    p = p0 * (exner ** (cp / rd))  # Pressure at z
+    rt = (p / C0) ** (1.0 / gamm)  # rho*theta at z
+    r = rt / t  # Density at z
     return r, t
 
+
 @numba.jit(nopython=True)
 def sample_ellipse_cosine(x, z, amp, x0, z0, xrad, zrad):
     """
@@ -48,11 +52,12 @@ def sample_ellipse_cosine(x, z, amp, x0, z0, xrad, zrad):
     Returns a double.
     """
     # Compute distance from bubble center
-    dist = np.sqrt( ((x-x0)/xrad)**2 + ((z-z0)/zrad)**2 ) * math.pi / 2.
-    if dist <= math.pi / 2.:
-        return amp * (np.cos(dist)**2.)
+    dist = np.sqrt(((x - x0) / xrad) ** 2 + ((z - z0) / zrad) ** 2) * math.pi / 2.0
+    if dist <= math.pi / 2.0:
+        return amp * (np.cos(dist) ** 2.0)
     else:
-        return 0.
+        return 0.0
+
 
 @numba.jit(nopython=True)
 def injection(x, z):
@@ -62,12 +67,13 @@ def injection(x, z):
     Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature)
     """
     hr, ht = hydro_const_theta(z)
-    r = 0.
-    t = 0.
-    u = 0.
-    w = 0.
+    r = 0.0
+    t = 0.0
+    u = 0.0
+    w = 0.0
     return r, u, w, t, hr, ht
 
+
 @numba.jit(nopython=True)
 def density_current(x, z):
     """
@@ -76,13 +82,14 @@ def density_current(x, z):
     Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature)
     """
     hr, ht = hydro_const_theta(z)
-    r = 0.
-    t = 0.
-    u = 0.
-    w = 0.
-    t = t + sample_ellipse_cosine(x,z,-20. ,xlen/2,5000.,4000.,2000.)
+    r = 0.0
+    t = 0.0
+    u = 0.0
+    w = 0.0
+    t = t + sample_ellipse_cosine(x, z, -20.0, xlen / 2, 5000.0, 4000.0, 2000.0)
     return r, u, w, t, hr, ht
 
+
 @numba.jit(nopython=True)
 def turbulence(x, z):
     """
@@ -90,29 +97,31 @@ def turbulence(x, z):
     Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature)
     """
     hr, ht = hydro_const_theta(z)
-    r = 0.
-    t = 0.
-    u = 0.
-    w = 0.
+    r = 0.0
+    t = 0.0
+    u = 0.0
+    w = 0.0
     # call random_number(u);
     # call random_number(w)
     # u = (u_rand - 0.5) * 20.
     # w = (w_rand - 0.5) * 20.
     return r, u, w, t, hr, ht
 
+
 @numba.jit(nopython=True)
 def mountain_waves(x, z):
     """
     x and z are input coordinates at which to sample
     Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature)
     """
-    hr, ht = hydro_const_bvfreq(z,0.02)
-    r = 0.
-    t = 0.
-    u = 15.
-    w = 0.
+    hr, ht = hydro_const_bvfreq(z, 0.02)
+    r = 0.0
+    t = 0.0
+    u = 15.0
+    w = 0.0
     return r, u, w, t, hr, ht
 
+
 @numba.jit(nopython=True)
 def thermal(x, z):
     """
@@ -121,13 +130,14 @@ def thermal(x, z):
     Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature)
     """
     hr, ht = hydro_const_theta(z)
-    r = 0.
-    t = 0.
-    u = 0.
-    w = 0.
-    t = t + sample_ellipse_cosine(x,z, 3. ,xlen/2,2000.,2000.,2000.)
+    r = 0.0
+    t = 0.0
+    u = 0.0
+    w = 0.0
+    t = t + sample_ellipse_cosine(x, z, 3.0, xlen / 2, 2000.0, 2000.0, 2000.0)
     return r, u, w, t, hr, ht
 
+
 @numba.jit(nopython=True)
 def collision(x, z):
     """
@@ -136,22 +146,26 @@ def collision(x, z):
     Returns r,u,w,t (density, u-wind, w-wind, potential temperature) and hr,ht (background hydrostatic density and potential temperature)
     """
     hr, ht = hydro_const_theta(z)
-    r = 0.
-    t = 0.
-    u = 0.
-    w = 0.
-    t = t + sample_ellipse_cosine(x,z, 20.,xlen/2,2000.,2000.,2000.)
-    t = t + sample_ellipse_cosine(x,z,-20.,xlen/2,8000.,2000.,2000.)
+    r = 0.0
+    t = 0.0
+    u = 0.0
+    w = 0.0
+    t = t + sample_ellipse_cosine(x, z, 20.0, xlen / 2, 2000.0, 2000.0, 2000.0)
+    t = t + sample_ellipse_cosine(x, z, -20.0, xlen / 2, 8000.0, 2000.0, 2000.0)
     return r, u, w, t, hr, ht
 
+
 # End of CPU JIT functions
 
 ####################################################################################
 # CUDA GPU KERNELS
 ####################################################################################
 
+
 @cuda.jit
-def compute_flux_x_kernel(state, flux, hy_dens_cell, hy_dens_theta_cell, hv_coef, nx, nz, hs):
+def compute_flux_x_kernel(
+    state, flux, hy_dens_cell, hy_dens_theta_cell, hv_coef, nx, nz, hs
+):
     k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
     i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
@@ -163,21 +177,29 @@ def compute_flux_x_kernel(state, flux, hy_dens_cell, hy_dens_theta_cell, hv_coef
         for ll in range(NUM_VARS):
             for s in range(sten_size):
                 stencil[s] = state[ll, k_idx + hs, i_idx + s]
-            
-            vals[ll] = -stencil[0]/12 + 7*stencil[1]/12 + 7*stencil[2]/12 - stencil[3]/12
-            d3_vals[ll] = -stencil[0] + 3*stencil[1] - 3*stencil[2] + stencil[3]
+
+            vals[ll] = (
+                -stencil[0] / 12
+                + 7 * stencil[1] / 12
+                + 7 * stencil[2] / 12
+                - stencil[3] / 12
+            )
+            d3_vals[ll] = -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3]
 
         r_val = vals[ID_DENS] + hy_dens_cell[k_idx + hs]
         u_val = vals[ID_UMOM] / r_val
         w_val = vals[ID_WMOM] / r_val
         t_val = (vals[ID_RHOT] + hy_dens_theta_cell[k_idx + hs]) / r_val
-        p_val = C0 * (r_val * t_val)**gamm
+        p_val = C0 * (r_val * t_val) ** gamm
 
         flux[ID_DENS, k_idx, i_idx] = r_val * u_val - hv_coef * d3_vals[ID_DENS]
-        flux[ID_UMOM, k_idx, i_idx] = r_val * u_val * u_val + p_val - hv_coef * d3_vals[ID_UMOM]
+        flux[ID_UMOM, k_idx, i_idx] = (
+            r_val * u_val * u_val + p_val - hv_coef * d3_vals[ID_UMOM]
+        )
         flux[ID_WMOM, k_idx, i_idx] = r_val * u_val * w_val - hv_coef * d3_vals[ID_WMOM]
         flux[ID_RHOT, k_idx, i_idx] = r_val * u_val * t_val - hv_coef * d3_vals[ID_RHOT]
 
+
 @cuda.jit
 def compute_tend_x_kernel(flux, tend, nx, nz, grid_dx):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -185,10 +207,25 @@ def compute_tend_x_kernel(flux, tend, nx, nz, grid_dx):
     i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
     if i_idx < nx and k_idx < nz and ll < NUM_VARS:
-        tend[ll, k_idx, i_idx] = -(flux[ll, k_idx, i_idx + 1] - flux[ll, k_idx, i_idx]) / grid_dx
+        tend[ll, k_idx, i_idx] = (
+            -(flux[ll, k_idx, i_idx + 1] - flux[ll, k_idx, i_idx]) / grid_dx
+        )
+
 
 @cuda.jit
-def compute_flux_z_kernel(state, flux, hy_dens_int, hy_pressure_int, hy_dens_theta_int, hv_coef, nx, nz, hs, k_beg_global, nz_global):
+def compute_flux_z_kernel(
+    state,
+    flux,
+    hy_dens_int,
+    hy_pressure_int,
+    hy_dens_theta_int,
+    hv_coef,
+    nx,
+    nz,
+    hs,
+    k_beg_global,
+    nz_global,
+):
     k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
     i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
@@ -200,16 +237,21 @@ def compute_flux_z_kernel(state, flux, hy_dens_int, hy_pressure_int, hy_dens_the
         for ll in range(NUM_VARS):
             for s in range(sten_size):
                 stencil[s] = state[ll, k_idx + s, i_idx + hs]
-            
-            vals[ll] = -stencil[0]/12 + 7*stencil[1]/12 + 7*stencil[2]/12 - stencil[3]/12
-            d3_vals[ll] = -stencil[0] + 3*stencil[1] - 3*stencil[2] + stencil[3]
+
+            vals[ll] = (
+                -stencil[0] / 12
+                + 7 * stencil[1] / 12
+                + 7 * stencil[2] / 12
+                - stencil[3] / 12
+            )
+            d3_vals[ll] = -stencil[0] + 3 * stencil[1] - 3 * stencil[2] + stencil[3]
 
         r_val = vals[ID_DENS] + hy_dens_int[k_idx]
         u_val = vals[ID_UMOM] / r_val
         w_val = vals[ID_WMOM] / r_val
         t_val = (vals[ID_RHOT] + hy_dens_theta_int[k_idx]) / r_val
-        p_val = C0 * (r_val * t_val)**gamm - hy_pressure_int[k_idx]
-        
+        p_val = C0 * (r_val * t_val) ** gamm - hy_pressure_int[k_idx]
+
         # Boundary conditions for w and density flux at global boundaries only
         actual_w_val = w_val
         actual_d3_dens = d3_vals[ID_DENS]
@@ -221,9 +263,16 @@ def compute_flux_z_kernel(state, flux, hy_dens_int, hy_pressure_int, hy_dens_the
             actual_d3_dens = 0.0
 
         flux[ID_DENS, k_idx, i_idx] = r_val * actual_w_val - hv_coef * actual_d3_dens
-        flux[ID_UMOM, k_idx, i_idx] = r_val * actual_w_val * u_val - hv_coef * d3_vals[ID_UMOM]
-        flux[ID_WMOM, k_idx, i_idx] = r_val * actual_w_val * actual_w_val + p_val - hv_coef * d3_vals[ID_WMOM]
-        flux[ID_RHOT, k_idx, i_idx] = r_val * actual_w_val * t_val - hv_coef * d3_vals[ID_RHOT]
+        flux[ID_UMOM, k_idx, i_idx] = (
+            r_val * actual_w_val * u_val - hv_coef * d3_vals[ID_UMOM]
+        )
+        flux[ID_WMOM, k_idx, i_idx] = (
+            r_val * actual_w_val * actual_w_val + p_val - hv_coef * d3_vals[ID_WMOM]
+        )
+        flux[ID_RHOT, k_idx, i_idx] = (
+            r_val * actual_w_val * t_val - hv_coef * d3_vals[ID_RHOT]
+        )
+
 
 @cuda.jit
 def compute_tend_z_kernel(state, flux, tend, nx, nz, hs, grid_dz):
@@ -232,10 +281,13 @@ def compute_tend_z_kernel(state, flux, tend, nx, nz, hs, grid_dz):
     i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
     if i_idx < nx and k_idx < nz and ll < NUM_VARS:
-        tend[ll, k_idx, i_idx] = -(flux[ll, k_idx + 1, i_idx] - flux[ll, k_idx, i_idx]) / grid_dz
+        tend[ll, k_idx, i_idx] = (
+            -(flux[ll, k_idx + 1, i_idx] - flux[ll, k_idx, i_idx]) / grid_dz
+        )
         if ll == ID_WMOM:
             tend[ll, k_idx, i_idx] -= state[ID_DENS, k_idx + hs, i_idx + hs] * grav
 
+
 @cuda.jit
 def pack_send_buf_kernel(state, sendbuf_l, sendbuf_r, nx, nz, hs):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -246,6 +298,7 @@ def pack_send_buf_kernel(state, sendbuf_l, sendbuf_r, nx, nz, hs):
         sendbuf_l[ll, k_idx, s_idx] = state[ll, k_idx + hs, hs + s_idx]
         sendbuf_r[ll, k_idx, s_idx] = state[ll, k_idx + hs, nx + s_idx]
 
+
 @cuda.jit
 def unpack_recv_buf_kernel(state, recvbuf_l, recvbuf_r, nx, nz, hs):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -256,8 +309,11 @@ def unpack_recv_buf_kernel(state, recvbuf_l, recvbuf_r, nx, nz, hs):
         state[ll, k_idx + hs, s_idx] = recvbuf_l[ll, k_idx, s_idx]
         state[ll, k_idx + hs, nx + hs + s_idx] = recvbuf_r[ll, k_idx, s_idx]
 
+
 @cuda.jit
-def update_state_x_kernel(state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, k_beg, grid_dz):
+def update_state_x_kernel(
+    state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, k_beg, grid_dz
+):
     k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
     i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
@@ -266,10 +322,15 @@ def update_state_x_kernel(state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, k
         if math.fabs(z - 3 * zlen / 4) <= zlen / 16:
             r_plus_hr = state[ID_DENS, k_idx + hs, i_idx] + hy_dens_cell[k_idx + hs]
             state[ID_UMOM, k_idx + hs, i_idx] = r_plus_hr * 50.0
-            state[ID_RHOT, k_idx + hs, i_idx] = r_plus_hr * 298.0 - hy_dens_theta_cell[k_idx + hs]
+            state[ID_RHOT, k_idx + hs, i_idx] = (
+                r_plus_hr * 298.0 - hy_dens_theta_cell[k_idx + hs]
+            )
+
 
 @cuda.jit
-def update_state_z_kernel(state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_width, k_beg_global, nz_global):
+def update_state_z_kernel(
+    state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_width, k_beg_global, nz_global
+):
     ll = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
     i_glob_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
@@ -278,18 +339,28 @@ def update_state_z_kernel(state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_
             if ll == ID_WMOM:
                 state[ID_WMOM, 0, i_glob_idx] = 0.0
                 state[ID_WMOM, 1, i_glob_idx] = 0.0
-                
+
                 if data_spec_int == DATA_SPEC_MOUNTAIN:
                     x = (i_beg + i_glob_idx - hs + 0.5) * grid_dx
                     if math.fabs(x - xlen / 4.0) < mnt_width:
                         xloc = (x - (xlen / 4.0)) / mnt_width
-                        mnt_deriv = -pi * math.cos(pi * xloc / 2.0) * math.sin(pi * xloc / 2.0) * 10.0 / grid_dx 
-                        state[ID_WMOM, 0, i_glob_idx] = mnt_deriv * state[ID_UMOM, hs, i_glob_idx]
-                        state[ID_WMOM, 1, i_glob_idx] = mnt_deriv * state[ID_UMOM, hs, i_glob_idx]
+                        mnt_deriv = (
+                            -pi
+                            * math.cos(pi * xloc / 2.0)
+                            * math.sin(pi * xloc / 2.0)
+                            * 10.0
+                            / grid_dx
+                        )
+                        state[ID_WMOM, 0, i_glob_idx] = (
+                            mnt_deriv * state[ID_UMOM, hs, i_glob_idx]
+                        )
+                        state[ID_WMOM, 1, i_glob_idx] = (
+                            mnt_deriv * state[ID_UMOM, hs, i_glob_idx]
+                        )
             else:
                 state[ll, 0, i_glob_idx] = state[ll, hs, i_glob_idx]
                 state[ll, 1, i_glob_idx] = state[ll, hs, i_glob_idx]
-                
+
         if k_beg_global + nz == nz_global:
             if ll == ID_WMOM:
                 state[ID_WMOM, nz + hs, i_glob_idx] = 0.0
@@ -298,13 +369,25 @@ def update_state_z_kernel(state, data_spec_int, i_beg, nx, nz, hs, grid_dx, mnt_
                 state[ll, nz + hs, i_glob_idx] = state[ll, nz + hs - 1, i_glob_idx]
                 state[ll, nz + hs + 1, i_glob_idx] = state[ll, nz + hs - 1, i_glob_idx]
 
+
 @cuda.jit
-def acc_mass_te_kernel(mass_arr, te_arr, state, hy_dens_cell, hy_dens_theta_cell, nx, nz, hs, grid_dx, grid_dz):
+def acc_mass_te_kernel(
+    mass_arr,
+    te_arr,
+    state,
+    hy_dens_cell,
+    hy_dens_theta_cell,
+    nx,
+    nz,
+    hs,
+    grid_dx,
+    grid_dz,
+):
     k_idx = cuda.blockIdx.y * cuda.blockDim.y + cuda.threadIdx.y
     i_idx = cuda.blockIdx.x * cuda.blockDim.x + cuda.threadIdx.x
 
     if k_idx < nz and i_idx < nx:
-        r_pert  = state[ID_DENS, k_idx + hs, i_idx + hs]
+        r_pert = state[ID_DENS, k_idx + hs, i_idx + hs]
         u_mom = state[ID_UMOM, k_idx + hs, i_idx + hs]
         w_mom = state[ID_WMOM, k_idx + hs, i_idx + hs]
         rhot_pert = state[ID_RHOT, k_idx + hs, i_idx + hs]
@@ -313,16 +396,17 @@ def acc_mass_te_kernel(mass_arr, te_arr, state, hy_dens_cell, hy_dens_theta_cell
         u_vel = u_mom / r_full
         w_vel = w_mom / r_full
         th_full = (rhot_pert + hy_dens_theta_cell[hs + k_idx]) / r_full
-        
-        p_full = C0 * (r_full * th_full)**gamm
-        t_abs = th_full / ((p0 / p_full)**(rd / cp))
-        
+
+        p_full = C0 * (r_full * th_full) ** gamm
+        t_abs = th_full / ((p0 / p_full) ** (rd / cp))
+
         ke = 0.5 * r_full * (u_vel**2 + w_vel**2)
         ie = r_full * cv * t_abs
 
         cuda.atomic.add(mass_arr, 0, r_full * grid_dx * grid_dz)
         cuda.atomic.add(te_arr, 0, (ke + ie) * grid_dx * grid_dz)
 
+
 @cuda.jit
 def update_fluid_state_kernel(state_init, state_out, tend, nx, nz, hs, dt_arg):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -332,7 +416,10 @@ def update_fluid_state_kernel(state_init, state_out, tend, nx, nz, hs, dt_arg):
     if i_idx < nx and k_idx < nz and ll < NUM_VARS:
         state_idx_k = k_idx + hs
         state_idx_i = i_idx + hs
-        state_out[ll, state_idx_k, state_idx_i] = state_init[ll, state_idx_k, state_idx_i] + dt_arg * tend[ll, k_idx, i_idx]
+        state_out[ll, state_idx_k, state_idx_i] = (
+            state_init[ll, state_idx_k, state_idx_i] + dt_arg * tend[ll, k_idx, i_idx]
+        )
+
 
 @cuda.jit
 def pack_send_buf_z_kernel(state, sendbuf_b, sendbuf_t, nx, nz, hs):
@@ -344,6 +431,7 @@ def pack_send_buf_z_kernel(state, sendbuf_b, sendbuf_t, nx, nz, hs):
         sendbuf_b[ll, s_idx, i_idx] = state[ll, hs + s_idx, i_idx + hs]
         sendbuf_t[ll, s_idx, i_idx] = state[ll, nz + s_idx, i_idx + hs]
 
+
 @cuda.jit
 def unpack_recv_buf_z_kernel(state, recvbuf_b, recvbuf_t, nx, nz, hs):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -354,6 +442,7 @@ def unpack_recv_buf_z_kernel(state, recvbuf_b, recvbuf_t, nx, nz, hs):
         state[ll, s_idx, i_idx + hs] = recvbuf_b[ll, s_idx, i_idx]
         state[ll, nz + hs + s_idx, i_idx + hs] = recvbuf_t[ll, s_idx, i_idx]
 
+
 @cuda.jit
 def unpack_recv_buf_z_bottom_kernel(state, recvbuf_b, nx, nz, hs):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -363,6 +452,7 @@ def unpack_recv_buf_z_bottom_kernel(state, recvbuf_b, nx, nz, hs):
     if s_idx < hs and i_idx < nx and ll < NUM_VARS:
         state[ll, s_idx, i_idx + hs] = recvbuf_b[ll, s_idx, i_idx]
 
+
 @cuda.jit
 def unpack_recv_buf_z_top_kernel(state, recvbuf_t, nx, nz, hs):
     ll = cuda.blockIdx.z * cuda.blockDim.z + cuda.threadIdx.z
@@ -371,4 +461,3 @@ def unpack_recv_buf_z_top_kernel(state, recvbuf_t, nx, nz, hs):
 
     if s_idx < hs and i_idx < nx and ll < NUM_VARS:
         state[ll, nz + hs + s_idx, i_idx + hs] = recvbuf_t[ll, s_idx, i_idx]
-
diff --git a/examples/miniapps/MiniWeather/miniweather.py b/examples/miniapps/MiniWeather/miniweather.py
index 95191b2c..5dfb89c6 100644
--- a/examples/miniapps/MiniWeather/miniweather.py
+++ b/examples/miniapps/MiniWeather/miniweather.py
@@ -7,25 +7,55 @@
 from charm4py import charm, Chare, Array, Future, Reducer, coro, Channel
 
 from constants import (
-    pi, grav, cp, cv, rd, p0, C0, gamm, xlen, zlen, hv_beta, cfl, max_speed, hs,
-    sten_size, NUM_VARS, ID_DENS, ID_UMOM, ID_WMOM, ID_RHOT, DIR_X, DIR_Z,
-    DATA_SPEC_COLLISION, DATA_SPEC_THERMAL, DATA_SPEC_MOUNTAIN,
-    DATA_SPEC_TURBULENCE, DATA_SPEC_DENSITY_CURRENT, DATA_SPEC_INJECTION,
-    nqpoints, qpoints, qweights
+    C0,
+    gamm,
+    xlen,
+    zlen,
+    hv_beta,
+    cfl,
+    max_speed,
+    hs,
+    NUM_VARS,
+    ID_DENS,
+    ID_UMOM,
+    ID_WMOM,
+    ID_RHOT,
+    DIR_X,
+    DIR_Z,
+    DATA_SPEC_COLLISION,
+    DATA_SPEC_THERMAL,
+    DATA_SPEC_MOUNTAIN,
+    DATA_SPEC_TURBULENCE,
+    DATA_SPEC_DENSITY_CURRENT,
+    DATA_SPEC_INJECTION,
+    nqpoints,
+    qpoints,
+    qweights,
 )
 from kernels import (
-    hydro_const_theta, hydro_const_bvfreq, sample_ellipse_cosine,
-    collision as collision_init, thermal as thermal_init, mountain_waves as mountain_waves_init, 
-    turbulence as turbulence_init, density_current as density_current_init, injection as injection_init,
-    compute_flux_x_kernel, compute_tend_x_kernel,
-    compute_flux_z_kernel, compute_tend_z_kernel,
-    pack_send_buf_kernel, unpack_recv_buf_kernel,
-    pack_send_buf_z_kernel, unpack_recv_buf_z_kernel,
-    unpack_recv_buf_z_bottom_kernel, unpack_recv_buf_z_top_kernel,
-    update_state_x_kernel, update_state_z_kernel,
-    acc_mass_te_kernel, update_fluid_state_kernel
+    collision as collision_init,
+    thermal as thermal_init,
+    mountain_waves as mountain_waves_init,
+    turbulence as turbulence_init,
+    density_current as density_current_init,
+    injection as injection_init,
+    compute_flux_x_kernel,
+    compute_tend_x_kernel,
+    compute_flux_z_kernel,
+    compute_tend_z_kernel,
+    pack_send_buf_kernel,
+    unpack_recv_buf_kernel,
+    pack_send_buf_z_kernel,
+    unpack_recv_buf_z_kernel,
+    unpack_recv_buf_z_bottom_kernel,
+    unpack_recv_buf_z_top_kernel,
+    update_state_x_kernel,
+    update_state_z_kernel,
+    acc_mass_te_kernel,
+    update_fluid_state_kernel,
 )
 
+
 # Helper for domain decomposition
 def calculate_domain_decomposition_x(chare_idx, num_chares_x, nx_glob):
     nx_local_base = nx_glob // num_chares_x
@@ -34,6 +64,7 @@ def calculate_domain_decomposition_x(chare_idx, num_chares_x, nx_glob):
         raise ValueError("nx_glob must be divisible by num_chares_x")
     return nx_local_base, chare_idx * nx_local_base
 
+
 def calculate_domain_decomposition_z(chare_idx, num_chares_z, nz_glob):
     nz_local_base = nz_glob // num_chares_z
     remainder = nz_glob % num_chares_z
@@ -41,16 +72,17 @@ def calculate_domain_decomposition_z(chare_idx, num_chares_z, nz_glob):
         raise ValueError("nz_glob must be divisible by num_chares_z")
     return nz_local_base, chare_idx * nz_local_base
 
+
 class MiniWeatherChare(Chare):
     def __init__(self, args):
-        args_dict               = args[0]
-        num_chares_x_in         = args[1]
-        num_chares_z_in         = args[2]
-        global_nx_in            = args[3]
-        global_nz_in            = args[4]
-        data_spec_int_in        = args[5]
-        dt_in                   = args[6]
-        initial_etime_in        = args[7]
+        args_dict = args[0]
+        num_chares_x_in = args[1]
+        num_chares_z_in = args[2]
+        global_nx_in = args[3]
+        global_nz_in = args[4]
+        data_spec_int_in = args[5]
+        dt_in = args[6]
+        initial_etime_in = args[7]
 
         self.args = argparse.Namespace(**args_dict)
         # For 2D chare array, thisIndex is a tuple (i, j)
@@ -58,11 +90,11 @@ def __init__(self, args):
         self.chare_idx_z = self.thisIndex[1]
         self.num_chares_x = num_chares_x_in
         self.num_chares_z = num_chares_z_in
-        
+
         self.nx_glob = global_nx_in
         self.nz_glob = global_nz_in
         self.data_spec_int = data_spec_int_in
-        self.dt = dt_in 
+        self.dt = dt_in
         self.etime = initial_etime_in
 
         self.grid_dx = xlen / self.nx_glob
@@ -98,7 +130,7 @@ def __init__(self, args):
         self.d_sendbuf_t = None
         self.d_recvbuf_b = None
         self.d_recvbuf_t = None
-        
+
         self._direction_switch = True
 
         # Channel-based communication attributes
@@ -112,12 +144,16 @@ def __init__(self, args):
         self.initial_te_val = 0.0
 
         self.setup_channels()
-        
+
         if charm.myPe() == 0 and self.chare_idx_x == 0 and self.chare_idx_z == 0:
-            print(f"Chare {self.chare_idx_x}, {self.chare_idx_z} initialized on PE {charm.myPe()}")
+            print(
+                f"Chare {self.chare_idx_x}, {self.chare_idx_z} initialized on PE {charm.myPe()}"
+            )
 
     def setup_channels(self):
-        left_proxy_idx_x = (self.chare_idx_x - 1 + self.num_chares_x) % self.num_chares_x
+        left_proxy_idx_x = (
+            self.chare_idx_x - 1 + self.num_chares_x
+        ) % self.num_chares_x
         left_neighbor_proxy = self.thisProxy[left_proxy_idx_x, self.chare_idx_z]
         self.left_channel = Channel(self, remote=left_neighbor_proxy)
 
@@ -139,13 +175,17 @@ def setup_channels(self):
         else:
             self.top_channel = None
 
-    def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, setup_done_future):
+    def setup_chare_domain(
+        self, local_nx, i_beg_global, local_nz, k_beg_global, setup_done_future
+    ):
         self.nx = local_nx
         self.nz = local_nz
         self.i_beg_global_idx = i_beg_global
         self.k_beg_global_idx = k_beg_global
 
-        self.state_host = np.zeros((NUM_VARS, self.nz + 2 * hs, self.nx + 2 * hs), dtype=np.float64)
+        self.state_host = np.zeros(
+            (NUM_VARS, self.nz + 2 * hs, self.nx + 2 * hs), dtype=np.float64
+        )
         self.hy_dens_cell_host = np.zeros(self.nz + 2 * hs, dtype=np.float64)
         self.hy_dens_theta_cell_host = np.zeros(self.nz + 2 * hs, dtype=np.float64)
         self.hy_dens_int_host = np.zeros(self.nz + 1, dtype=np.float64)
@@ -153,9 +193,12 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set
         self.hy_pressure_int_host = np.zeros(self.nz + 1, dtype=np.float64)
 
         problem_init_map = {
-            DATA_SPEC_COLLISION: collision_init, DATA_SPEC_THERMAL: thermal_init,
-            DATA_SPEC_MOUNTAIN: mountain_waves_init, DATA_SPEC_TURBULENCE: turbulence_init,
-            DATA_SPEC_DENSITY_CURRENT: density_current_init, DATA_SPEC_INJECTION: injection_init,
+            DATA_SPEC_COLLISION: collision_init,
+            DATA_SPEC_THERMAL: thermal_init,
+            DATA_SPEC_MOUNTAIN: mountain_waves_init,
+            DATA_SPEC_TURBULENCE: turbulence_init,
+            DATA_SPEC_DENSITY_CURRENT: density_current_init,
+            DATA_SPEC_INJECTION: injection_init,
         }
         init_routine = problem_init_map[self.data_spec_int]
 
@@ -163,19 +206,35 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set
             for i_loop_idx in range(self.nx + 2 * hs):
                 for kk_quad in range(nqpoints):
                     for ii_quad in range(nqpoints):
-                        x_glob = (self.i_beg_global_idx + i_loop_idx - hs + 0.5) * self.grid_dx + (qpoints[ii_quad] - 0.5) * self.grid_dx
-                        z_glob = (self.k_beg_global_idx + k_loop_idx - hs + 0.5) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz
-                        
+                        x_glob = (
+                            self.i_beg_global_idx + i_loop_idx - hs + 0.5
+                        ) * self.grid_dx + (qpoints[ii_quad] - 0.5) * self.grid_dx
+                        z_glob = (
+                            self.k_beg_global_idx + k_loop_idx - hs + 0.5
+                        ) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz
+
                         r, u, w, t, hr, ht = init_routine(x_glob, z_glob)
 
-                        self.state_host[ID_DENS, k_loop_idx, i_loop_idx] += r * qweights[ii_quad] * qweights[kk_quad]
-                        self.state_host[ID_UMOM, k_loop_idx, i_loop_idx] += (r + hr) * u * qweights[ii_quad] * qweights[kk_quad]
-                        self.state_host[ID_WMOM, k_loop_idx, i_loop_idx] += (r + hr) * w * qweights[ii_quad] * qweights[kk_quad]
-                        self.state_host[ID_RHOT, k_loop_idx, i_loop_idx] += ((r + hr) * (t + ht) - hr * ht) * qweights[ii_quad] * qweights[kk_quad]
-        
+                        self.state_host[ID_DENS, k_loop_idx, i_loop_idx] += (
+                            r * qweights[ii_quad] * qweights[kk_quad]
+                        )
+                        self.state_host[ID_UMOM, k_loop_idx, i_loop_idx] += (
+                            (r + hr) * u * qweights[ii_quad] * qweights[kk_quad]
+                        )
+                        self.state_host[ID_WMOM, k_loop_idx, i_loop_idx] += (
+                            (r + hr) * w * qweights[ii_quad] * qweights[kk_quad]
+                        )
+                        self.state_host[ID_RHOT, k_loop_idx, i_loop_idx] += (
+                            ((r + hr) * (t + ht) - hr * ht)
+                            * qweights[ii_quad]
+                            * qweights[kk_quad]
+                        )
+
         for k_loop_idx in range(self.nz + 2 * hs):
             for kk_quad in range(nqpoints):
-                z_quad_hydro = (self.k_beg_global_idx + k_loop_idx - hs + 0.5) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz
+                z_quad_hydro = (
+                    self.k_beg_global_idx + k_loop_idx - hs + 0.5
+                ) * self.grid_dz + (qpoints[kk_quad] - 0.5) * self.grid_dz
                 _r, _u, _w, _t, hr, ht = init_routine(0.0, z_quad_hydro)
                 self.hy_dens_cell_host[k_loop_idx] += hr * qweights[kk_quad]
                 self.hy_dens_theta_cell_host[k_loop_idx] += hr * ht * qweights[kk_quad]
@@ -185,7 +244,7 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set
             _r, _u, _w, _t, hr, ht = init_routine(0.0, z_interface)
             self.hy_dens_int_host[k_loop_idx] = hr
             self.hy_dens_theta_int_host[k_loop_idx] = hr * ht
-            self.hy_pressure_int_host[k_loop_idx] = C0 * ((hr * ht)**gamm)
+            self.hy_pressure_int_host[k_loop_idx] = C0 * ((hr * ht) ** gamm)
 
         self.d_state = cuda.to_device(self.state_host)
         self.d_state_tmp = cuda.to_device(self.state_host)
@@ -200,7 +259,7 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set
         self.d_flux = cuda.device_array(shape=flux_shape, dtype=np.float64)
         self.d_tend = cuda.device_array(shape=tend_shape, dtype=np.float64)
 
-        sendrecv_shape = (NUM_VARS, self.nz, hs) 
+        sendrecv_shape = (NUM_VARS, self.nz, hs)
         self.d_sendbuf_l = cuda.device_array(shape=sendrecv_shape, dtype=np.float64)
         self.d_sendbuf_r = cuda.device_array(shape=sendrecv_shape, dtype=np.float64)
         self.d_recvbuf_l = cuda.device_array(shape=sendrecv_shape, dtype=np.float64)
@@ -215,7 +274,7 @@ def setup_chare_domain(self, local_nx, i_beg_global, local_nz, k_beg_global, set
         local_mass, local_te = self._reductions()
         self.initial_mass_val = local_mass
         self.initial_te_val = local_te
-        
+
         self.reduce(setup_done_future, [local_mass, local_te], Reducer.sum)
 
     def _reductions(self):
@@ -223,13 +282,23 @@ def _reductions(self):
         d_te_val = cuda.to_device(np.zeros(1, dtype=np.float64))
 
         threadsperblock = (16, 16, 1)
-        blockspergrid = (math.ceil(self.nx / threadsperblock[0]),
-                         math.ceil(self.nz / threadsperblock[1]), 
-                         1)
+        blockspergrid = (
+            math.ceil(self.nx / threadsperblock[0]),
+            math.ceil(self.nz / threadsperblock[1]),
+            1,
+        )
 
         acc_mass_te_kernel[blockspergrid, threadsperblock](
-            d_mass_val, d_te_val, self.d_state, self.d_hy_dens_cell, self.d_hy_dens_theta_cell,
-            self.nx, self.nz, hs, self.grid_dx, self.grid_dz
+            d_mass_val,
+            d_te_val,
+            self.d_state,
+            self.d_hy_dens_cell,
+            self.d_hy_dens_theta_cell,
+            self.nx,
+            self.nz,
+            hs,
+            self.grid_dx,
+            self.grid_dz,
         )
         mass_host = d_mass_val.copy_to_host()
         te_host = d_te_val.copy_to_host()
@@ -237,10 +306,12 @@ def _reductions(self):
 
     @coro
     def _set_halo_values_x(self, d_state_forcing):
-        threadsperblock_buffer = (16, 16, 1) 
-        blockspergrid_buffer = (math.ceil(hs / threadsperblock_buffer[0]),
-                                math.ceil(self.nz / threadsperblock_buffer[1]),
-                                NUM_VARS)
+        threadsperblock_buffer = (16, 16, 1)
+        blockspergrid_buffer = (
+            math.ceil(hs / threadsperblock_buffer[0]),
+            math.ceil(self.nz / threadsperblock_buffer[1]),
+            NUM_VARS,
+        )
         pack_send_buf_kernel[blockspergrid_buffer, threadsperblock_buffer](
             d_state_forcing, self.d_sendbuf_l, self.d_sendbuf_r, self.nx, self.nz, hs
         )
@@ -251,10 +322,10 @@ def _set_halo_values_x(self, d_state_forcing):
 
         self.left_channel.send(data_to_send_left)
         self.right_channel.send(data_to_send_right)
-        
+
         data_for_my_d_recvbuf_l = self.left_channel.recv()
         data_for_my_d_recvbuf_r = self.right_channel.recv()
-        
+
         self.d_recvbuf_l.copy_to_device(data_for_my_d_recvbuf_l)
         self.d_recvbuf_r.copy_to_device(data_for_my_d_recvbuf_r)
 
@@ -264,64 +335,91 @@ def _set_halo_values_x(self, d_state_forcing):
 
         if self.data_spec_int == DATA_SPEC_INJECTION and self.chare_idx_x == 0:
             threadsperblock_inj = (16, 16, 1)
-            blockspergrid_inj = (math.ceil(hs / threadsperblock_inj[0]),
-                                 math.ceil(self.nz / threadsperblock_inj[1]),
-                                 1)
+            blockspergrid_inj = (
+                math.ceil(hs / threadsperblock_inj[0]),
+                math.ceil(self.nz / threadsperblock_inj[1]),
+                1,
+            )
             update_state_x_kernel[blockspergrid_inj, threadsperblock_inj](
-                self.d_state, self.d_hy_dens_cell, self.d_hy_dens_theta_cell, 
-                self.nx, self.nz, hs, self.k_beg_global_idx, self.grid_dz
+                self.d_state,
+                self.d_hy_dens_cell,
+                self.d_hy_dens_theta_cell,
+                self.nx,
+                self.nz,
+                hs,
+                self.k_beg_global_idx,
+                self.grid_dz,
             )
         cuda.synchronize()
 
     @coro
     def _set_halo_values_z(self, d_state_forcing):
-        threadsperblock_buffer = (16, 16, 1) 
-        blockspergrid_buffer = (math.ceil(self.nx / threadsperblock_buffer[0]),
-                                math.ceil(hs / threadsperblock_buffer[1]),
-                                NUM_VARS)
+        threadsperblock_buffer = (16, 16, 1)
+        blockspergrid_buffer = (
+            math.ceil(self.nx / threadsperblock_buffer[0]),
+            math.ceil(hs / threadsperblock_buffer[1]),
+            NUM_VARS,
+        )
         pack_send_buf_z_kernel[blockspergrid_buffer, threadsperblock_buffer](
             d_state_forcing, self.d_sendbuf_b, self.d_sendbuf_t, self.nx, self.nz, hs
         )
         cuda.synchronize()
 
-        if self.k_beg_global_idx == 0 or self.k_beg_global_idx + self.nz == self.nz_glob:
+        if (
+            self.k_beg_global_idx == 0
+            or self.k_beg_global_idx + self.nz == self.nz_glob
+        ):
             mnt_width = xlen / 8.0
-            threadsperblock_update_z = (16, 16, 1) 
-            blockspergrid_x = math.ceil((self.nx + 2 * hs) / threadsperblock_update_z[0]) 
+            threadsperblock_update_z = (16, 16, 1)
+            blockspergrid_x = math.ceil(
+                (self.nx + 2 * hs) / threadsperblock_update_z[0]
+            )
             blockspergrid_y = math.ceil(NUM_VARS / threadsperblock_update_z[1])
             blockspergrid_update_z = (blockspergrid_x, blockspergrid_y, 1)
 
             update_state_z_kernel[blockspergrid_update_z, threadsperblock_update_z](
-                d_state_forcing, self.data_spec_int, 
-                self.i_beg_global_idx, self.nx, self.nz, hs,
-                self.grid_dx, mnt_width, self.k_beg_global_idx, self.nz_glob
+                d_state_forcing,
+                self.data_spec_int,
+                self.i_beg_global_idx,
+                self.nx,
+                self.nz,
+                hs,
+                self.grid_dx,
+                mnt_width,
+                self.k_beg_global_idx,
+                self.nz_glob,
             )
         cuda.synchronize()
 
         if self.bottom_channel is not None:
             data_to_send_bottom = self.d_sendbuf_b.copy_to_host()
             self.bottom_channel.send(data_to_send_bottom)
-        
+
         if self.top_channel is not None:
             data_to_send_top = self.d_sendbuf_t.copy_to_host()
             self.top_channel.send(data_to_send_top)
-        
+
         if self.bottom_channel is not None:
             data_for_my_d_recvbuf_b = self.bottom_channel.recv()
             self.d_recvbuf_b.copy_to_device(data_for_my_d_recvbuf_b)
-        
+
         if self.top_channel is not None:
             data_for_my_d_recvbuf_t = self.top_channel.recv()
             self.d_recvbuf_t.copy_to_device(data_for_my_d_recvbuf_t)
 
         if self.bottom_channel is not None and self.top_channel is not None:
             unpack_recv_buf_z_kernel[blockspergrid_buffer, threadsperblock_buffer](
-                d_state_forcing, self.d_recvbuf_b, self.d_recvbuf_t, self.nx, self.nz, hs
+                d_state_forcing,
+                self.d_recvbuf_b,
+                self.d_recvbuf_t,
+                self.nx,
+                self.nz,
+                hs,
             )
         elif self.bottom_channel is not None:
-            unpack_recv_buf_z_bottom_kernel[blockspergrid_buffer, threadsperblock_buffer](
-                d_state_forcing, self.d_recvbuf_b, self.nx, self.nz, hs
-            )
+            unpack_recv_buf_z_bottom_kernel[
+                blockspergrid_buffer, threadsperblock_buffer
+            ](d_state_forcing, self.d_recvbuf_b, self.nx, self.nz, hs)
         elif self.top_channel is not None:
             unpack_recv_buf_z_top_kernel[blockspergrid_buffer, threadsperblock_buffer](
                 d_state_forcing, self.d_recvbuf_t, self.nx, self.nz, hs
@@ -331,17 +429,29 @@ def _set_halo_values_z(self, d_state_forcing):
 
     def _compute_tendencies_x(self, dt_arg_for_hv_coef, d_state_forcing):
         threadsperblock_flux = (16, 16, 1)
-        blockspergrid_flux_x = (math.ceil((self.nx + 1) / threadsperblock_flux[0]), 
-                                  math.ceil(self.nz / threadsperblock_flux[1]), 1)
+        blockspergrid_flux_x = (
+            math.ceil((self.nx + 1) / threadsperblock_flux[0]),
+            math.ceil(self.nz / threadsperblock_flux[1]),
+            1,
+        )
         hv_coef = -hv_beta * self.grid_dx / (16.0 * dt_arg_for_hv_coef)
         compute_flux_x_kernel[blockspergrid_flux_x, threadsperblock_flux](
-            d_state_forcing, self.d_flux, self.d_hy_dens_cell, self.d_hy_dens_theta_cell,
-            hv_coef, self.nx, self.nz, hs
+            d_state_forcing,
+            self.d_flux,
+            self.d_hy_dens_cell,
+            self.d_hy_dens_theta_cell,
+            hv_coef,
+            self.nx,
+            self.nz,
+            hs,
         )
 
-        threadsperblock_tend = (16, 16, 1) 
-        blockspergrid_tend_x = (math.ceil(self.nx / threadsperblock_tend[0]),
-                                  math.ceil(self.nz / threadsperblock_tend[1]), NUM_VARS)
+        threadsperblock_tend = (16, 16, 1)
+        blockspergrid_tend_x = (
+            math.ceil(self.nx / threadsperblock_tend[0]),
+            math.ceil(self.nz / threadsperblock_tend[1]),
+            NUM_VARS,
+        )
         compute_tend_x_kernel[blockspergrid_tend_x, threadsperblock_tend](
             self.d_flux, self.d_tend, self.nx, self.nz, self.grid_dx
         )
@@ -350,33 +460,59 @@ def _compute_tendencies_x(self, dt_arg_for_hv_coef, d_state_forcing):
     def _compute_tendencies_z(self, dt_arg_for_hv_coef, d_state_forcing):
         hv_coef = -hv_beta * self.grid_dz / (16.0 * dt_arg_for_hv_coef)
         threadsperblock_flux = (16, 16, 1)
-        blockspergrid_flux_z = (math.ceil(self.nx / threadsperblock_flux[0]),
-                                  math.ceil((self.nz + 1) / threadsperblock_flux[1]), 1)
+        blockspergrid_flux_z = (
+            math.ceil(self.nx / threadsperblock_flux[0]),
+            math.ceil((self.nz + 1) / threadsperblock_flux[1]),
+            1,
+        )
         compute_flux_z_kernel[blockspergrid_flux_z, threadsperblock_flux](
-            d_state_forcing, self.d_flux, self.d_hy_dens_int, self.d_hy_pressure_int, self.d_hy_dens_theta_int,
-            hv_coef, self.nx, self.nz, hs, self.k_beg_global_idx, self.nz_glob
+            d_state_forcing,
+            self.d_flux,
+            self.d_hy_dens_int,
+            self.d_hy_pressure_int,
+            self.d_hy_dens_theta_int,
+            hv_coef,
+            self.nx,
+            self.nz,
+            hs,
+            self.k_beg_global_idx,
+            self.nz_glob,
         )
 
         threadsperblock_tend = (16, 16, 1)
-        blockspergrid_tend_z = (math.ceil(self.nx / threadsperblock_tend[0]),
-                                  math.ceil(self.nz / threadsperblock_tend[1]), NUM_VARS)
+        blockspergrid_tend_z = (
+            math.ceil(self.nx / threadsperblock_tend[0]),
+            math.ceil(self.nz / threadsperblock_tend[1]),
+            NUM_VARS,
+        )
         compute_tend_z_kernel[blockspergrid_tend_z, threadsperblock_tend](
-            d_state_forcing, self.d_flux, self.d_tend, self.nx, self.nz, hs, self.grid_dz
+            d_state_forcing,
+            self.d_flux,
+            self.d_tend,
+            self.nx,
+            self.nz,
+            hs,
+            self.grid_dz,
         )
         cuda.synchronize()
 
     @coro
-    def _semi_discrete_step(self, dt_arg, current_dir, d_state_init, d_state_forcing, d_state_out):
+    def _semi_discrete_step(
+        self, dt_arg, current_dir, d_state_init, d_state_forcing, d_state_out
+    ):
         if current_dir == DIR_X:
             self._set_halo_values_x(d_state_forcing)
             self._compute_tendencies_x(dt_arg, d_state_forcing)
         elif current_dir == DIR_Z:
             self._set_halo_values_z(d_state_forcing)
             self._compute_tendencies_z(dt_arg, d_state_forcing)
-        
+
         threadsperblock_update = (16, 16, 1)
-        blockspergrid_update = (math.ceil(self.nx / threadsperblock_update[0]),
-                                  math.ceil(self.nz / threadsperblock_update[1]), NUM_VARS)
+        blockspergrid_update = (
+            math.ceil(self.nx / threadsperblock_update[0]),
+            math.ceil(self.nz / threadsperblock_update[1]),
+            NUM_VARS,
+        )
         update_fluid_state_kernel[blockspergrid_update, threadsperblock_update](
             d_state_init, d_state_out, self.d_tend, self.nx, self.nz, hs, dt_arg
         )
@@ -389,19 +525,43 @@ def _perform_timestep(self, dt_full_step):
         dt_rk_stage3 = dt_full_step / 1.0
 
         if self._direction_switch:
-            self._semi_discrete_step(dt_rk_stage1, DIR_X, self.d_state, self.d_state,     self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state)
-            self._semi_discrete_step(dt_rk_stage1, DIR_Z, self.d_state, self.d_state,     self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state)
+            self._semi_discrete_step(
+                dt_rk_stage1, DIR_X, self.d_state, self.d_state, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state
+            )
+            self._semi_discrete_step(
+                dt_rk_stage1, DIR_Z, self.d_state, self.d_state, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state
+            )
         else:
-            self._semi_discrete_step(dt_rk_stage1, DIR_Z, self.d_state, self.d_state,     self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state)
-            self._semi_discrete_step(dt_rk_stage1, DIR_X, self.d_state, self.d_state,     self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp)
-            self._semi_discrete_step(dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state)
+            self._semi_discrete_step(
+                dt_rk_stage1, DIR_Z, self.d_state, self.d_state, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage2, DIR_Z, self.d_state, self.d_state_tmp, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage3, DIR_Z, self.d_state, self.d_state_tmp, self.d_state
+            )
+            self._semi_discrete_step(
+                dt_rk_stage1, DIR_X, self.d_state, self.d_state, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage2, DIR_X, self.d_state, self.d_state_tmp, self.d_state_tmp
+            )
+            self._semi_discrete_step(
+                dt_rk_stage3, DIR_X, self.d_state, self.d_state_tmp, self.d_state
+            )
 
         self._direction_switch = not self._direction_switch
 
@@ -409,24 +569,24 @@ def _perform_timestep(self, dt_full_step):
     # Main simulation loop
     def start_main_loop(self, all_chares_done_future):
         current_sim_time_target = self.args.sim_time
-        
+
         chare_loop_start_time = time.time()
-        
+
         n_iters = 0
         while self.etime < current_sim_time_target and n_iters < self.args.max_iters:
             actual_dt = self.dt
             if self.etime + self.dt > current_sim_time_target:
                 actual_dt = current_sim_time_target - self.etime
-            
+
             self._perform_timestep(actual_dt)
-            
+
             self.etime += actual_dt
             n_iters += 1
 
             if self.args.output_freq > 0 and n_iters % self.args.output_freq == 0:
-                cuda.synchronize() 
+                cuda.synchronize()
                 state_host_output_local = self.d_state.copy_to_host()
-                
+
                 if hs > 0:
                     state_ext = state_host_output_local[:, hs:-hs, hs:-hs]
                 else:
@@ -443,58 +603,134 @@ def start_main_loop(self, all_chares_done_future):
                 denom = hy_dens_cell_local[:, None] + dens
                 uwnd = state_ext[ID_UMOM, :, :] / denom
                 wwnd = state_ext[ID_WMOM, :, :] / denom
-                theta = (state_ext[ID_RHOT, :, :] + hy_dens_theta_cell_local[:, None]) / denom \
-                        - (hy_dens_theta_cell_local / hy_dens_cell_local)[:, None]
+                theta = (
+                    state_ext[ID_RHOT, :, :] + hy_dens_theta_cell_local[:, None]
+                ) / denom - (hy_dens_theta_cell_local / hy_dens_cell_local)[:, None]
 
                 norm_state = np.stack([dens, uwnd, wwnd, theta], axis=0)
-                output_filename = os.path.join(self.args.output_dir, f"data_iter_{n_iters:06d}_chare_{self.chare_idx_x}_{self.chare_idx_z}.npz")
-                np.savez(output_filename,
-                         state=norm_state,
-                         etime=self.etime,
-                         chare_nx=self.nx,
-                         chare_i_beg=self.i_beg_global_idx,
-                         chare_nz=self.nz,
-                         chare_k_beg=self.k_beg_global_idx)
+                output_filename = os.path.join(
+                    self.args.output_dir,
+                    f"data_iter_{n_iters:06d}_chare_{self.chare_idx_x}_{self.chare_idx_z}.npz",
+                )
+                np.savez(
+                    output_filename,
+                    state=norm_state,
+                    etime=self.etime,
+                    chare_nx=self.nx,
+                    chare_i_beg=self.i_beg_global_idx,
+                    chare_nz=self.nz,
+                    chare_k_beg=self.k_beg_global_idx,
+                )
                 if self.chare_idx_x == 0 and self.chare_idx_z == 0:
-                    print(f"Iter: {n_iters}, Chare 0,0 output data to {output_filename} pattern at SimTime: {self.etime:.4f}s")
-            
-            if self.chare_idx_x == 0 and self.chare_idx_z == 0 and (n_iters % 10 == 0 or n_iters == 1 or (self.etime >= current_sim_time_target) or (n_iters == self.args.max_iters)):
-                print(f"Chare 0,0 - Iter: {n_iters:5d}, Sim Time: {self.etime:8.4f}s / {current_sim_time_target:.2f}s, Step dt: {actual_dt:.6f}s")
+                    print(
+                        f"Iter: {n_iters}, Chare 0,0 output data to {output_filename} pattern at SimTime: {self.etime:.4f}s"
+                    )
+
+            if (
+                self.chare_idx_x == 0
+                and self.chare_idx_z == 0
+                and (
+                    n_iters % 10 == 0
+                    or n_iters == 1
+                    or (self.etime >= current_sim_time_target)
+                    or (n_iters == self.args.max_iters)
+                )
+            ):
+                print(
+                    f"Chare 0,0 - Iter: {n_iters:5d}, Sim Time: {self.etime:8.4f}s / {current_sim_time_target:.2f}s, Step dt: {actual_dt:.6f}s"
+                )
 
         chare_loop_end_time = time.time()
         cuda.synchronize()
-        
+
         if self.chare_idx_x == 0 and self.chare_idx_z == 0:
-             print(f"\nChare 0,0 finished main loop after {n_iters} iterations. Local loop wall time: {chare_loop_end_time - chare_loop_start_time:.3f} s.")
-             print(f"Chare 0,0 final simulation time: {self.etime:.4f}s")
+            print(
+                f"\nChare 0,0 finished main loop after {n_iters} iterations. Local loop wall time: {chare_loop_end_time - chare_loop_start_time:.3f} s."
+            )
+            print(f"Chare 0,0 final simulation time: {self.etime:.4f}s")
 
         final_mass_local, final_te_local = self._reductions()
-        
-        self.reduce(all_chares_done_future, [final_mass_local, final_te_local, self.etime, float(n_iters)], 
-                    Reducer.gather)
+
+        self.reduce(
+            all_chares_done_future,
+            [final_mass_local, final_te_local, self.etime, float(n_iters)],
+            Reducer.gather,
+        )
+
 
 def main_charm_wrapper(charm_args_list):
-    parser = argparse.ArgumentParser(description="MiniWeather Python Numba CUDA Simulation (Charm4Py)")
-    parser.add_argument("--nx_glob", type=int, default=200, help="Number of global cells in x-direction (default: 200)")
-    parser.add_argument("--nz_glob", type=int, default=100, help="Number of global cells in z-direction (default: 100)")
-    parser.add_argument("--sim_time", type=float, default=1.0, help="How many seconds to run the simulation (default: 1.0s)")
-    parser.add_argument("--max_iters", type=int, default=10000, help="Maximum number of iterations (default: 10000)")
-    parser.add_argument("--output_freq", type=int, default=0, help="Frequency of outputting data in iterations (0 for no output, default: 0)")
-    parser.add_argument("--output_dir", type=str, default="output_data_charm", help="Directory to save output files (default: output_data_charm)")
-    
+    parser = argparse.ArgumentParser(
+        description="MiniWeather Python Numba CUDA Simulation (Charm4Py)"
+    )
+    parser.add_argument(
+        "--nx_glob",
+        type=int,
+        default=200,
+        help="Number of global cells in x-direction (default: 200)",
+    )
+    parser.add_argument(
+        "--nz_glob",
+        type=int,
+        default=100,
+        help="Number of global cells in z-direction (default: 100)",
+    )
+    parser.add_argument(
+        "--sim_time",
+        type=float,
+        default=1.0,
+        help="How many seconds to run the simulation (default: 1.0s)",
+    )
+    parser.add_argument(
+        "--max_iters",
+        type=int,
+        default=10000,
+        help="Maximum number of iterations (default: 10000)",
+    )
+    parser.add_argument(
+        "--output_freq",
+        type=int,
+        default=0,
+        help="Frequency of outputting data in iterations (0 for no output, default: 0)",
+    )
+    parser.add_argument(
+        "--output_dir",
+        type=str,
+        default="output_data_charm",
+        help="Directory to save output files (default: output_data_charm)",
+    )
+
     data_spec_choices_map = {
-        DATA_SPEC_COLLISION: "collision", DATA_SPEC_THERMAL: "thermal",
-        DATA_SPEC_MOUNTAIN: "mountain_waves", DATA_SPEC_TURBULENCE: "turbulence",
-        DATA_SPEC_DENSITY_CURRENT: "density_current", DATA_SPEC_INJECTION: "injection"
+        DATA_SPEC_COLLISION: "collision",
+        DATA_SPEC_THERMAL: "thermal",
+        DATA_SPEC_MOUNTAIN: "mountain_waves",
+        DATA_SPEC_TURBULENCE: "turbulence",
+        DATA_SPEC_DENSITY_CURRENT: "density_current",
+        DATA_SPEC_INJECTION: "injection",
     }
-    default_data_spec_name = data_spec_choices_map.get(DATA_SPEC_THERMAL, str(DATA_SPEC_THERMAL))
-    parser.add_argument("--data_spec", type=str, default=default_data_spec_name,
-                        choices=list(data_spec_choices_map.values()),
-                        help=f"Data specification name (default: {default_data_spec_name})")
-    
-    parser.add_argument("--num_chares_x", type=int, default=1, help="Number of chares in X-direction for domain decomposition (default: 1)")
-    parser.add_argument("--num_chares_z", type=int, default=1, help="Number of chares in Z-direction for domain decomposition (default: 1)")
-    
+    default_data_spec_name = data_spec_choices_map.get(
+        DATA_SPEC_THERMAL, str(DATA_SPEC_THERMAL)
+    )
+    parser.add_argument(
+        "--data_spec",
+        type=str,
+        default=default_data_spec_name,
+        choices=list(data_spec_choices_map.values()),
+        help=f"Data specification name (default: {default_data_spec_name})",
+    )
+
+    parser.add_argument(
+        "--num_chares_x",
+        type=int,
+        default=1,
+        help="Number of chares in X-direction for domain decomposition (default: 1)",
+    )
+    parser.add_argument(
+        "--num_chares_z",
+        type=int,
+        default=1,
+        help="Number of chares in Z-direction for domain decomposition (default: 1)",
+    )
+
     args = parser.parse_args(charm_args_list[1:])
 
     data_spec_int = None
@@ -502,12 +738,14 @@ def main_charm_wrapper(charm_args_list):
         if name == args.data_spec:
             data_spec_int = val
             break
-    
+
     if charm.myPe() == 0:
-        print(f"Running MiniWeather (Charm4Py) with: "
-              f"nx_glob={args.nx_glob}, nz_glob={args.nz_glob}, num_chares_x={args.num_chares_x}, "
-              f"num_chares_z={args.num_chares_z}, data_spec='{args.data_spec}' (ID: {data_spec_int}), sim_time={args.sim_time:.2f}s, "
-              f"max_iters={args.max_iters}, output_freq={args.output_freq}, output_dir='{args.output_dir}'")
+        print(
+            f"Running MiniWeather (Charm4Py) with: "
+            f"nx_glob={args.nx_glob}, nz_glob={args.nz_glob}, num_chares_x={args.num_chares_x}, "
+            f"num_chares_z={args.num_chares_z}, data_spec='{args.data_spec}' (ID: {data_spec_int}), sim_time={args.sim_time:.2f}s, "
+            f"max_iters={args.max_iters}, output_freq={args.output_freq}, output_dir='{args.output_dir}'"
+        )
 
         if args.output_freq > 0:
             if not os.path.exists(args.output_dir):
@@ -515,7 +753,9 @@ def main_charm_wrapper(charm_args_list):
                     os.makedirs(args.output_dir)
                     print(f"Created output directory: {args.output_dir}")
                 except FileExistsError:
-                    print(f"Output directory already exists or was just created: {args.output_dir}")
+                    print(
+                        f"Output directory already exists or was just created: {args.output_dir}"
+                    )
             else:
                 print(f"Output directory already exists: {args.output_dir}")
 
@@ -523,48 +763,77 @@ def main_charm_wrapper(charm_args_list):
     grid_dz = zlen / args.nz_glob
     initial_dt = min(grid_dx, grid_dz) / max_speed * cfl
     initial_etime = 0.0
-    
+
     num_chares_x = args.num_chares_x
     if num_chares_x > args.nx_glob:
         if charm.myPe() == 0:
-            print(f"Warning: num_chares_x ({num_chares_x}) > nx_glob ({args.nx_glob}). Setting num_chares_x = nx_glob.")
+            print(
+                f"Warning: num_chares_x ({num_chares_x}) > nx_glob ({args.nx_glob}). Setting num_chares_x = nx_glob."
+            )
         num_chares_x = args.nx_glob
         args.num_chares_x = num_chares_x
 
     num_chares_z = args.num_chares_z
     if num_chares_z > args.nz_glob:
         if charm.myPe() == 0:
-            print(f"Warning: num_chares_z ({num_chares_z}) > nz_glob ({args.nz_glob}). Setting num_chares_z = nz_glob.")
+            print(
+                f"Warning: num_chares_z ({num_chares_z}) > nz_glob ({args.nz_glob}). Setting num_chares_z = nz_glob."
+            )
         num_chares_z = args.nz_glob
         args.num_chares_z = num_chares_z
 
-    chare_constructor_args = (vars(args), num_chares_x, num_chares_z, args.nx_glob, args.nz_glob, data_spec_int, initial_dt, initial_etime)
-    
-    chares = Array(MiniWeatherChare, dims=(num_chares_x, num_chares_z), args=[chare_constructor_args])
-    
+    chare_constructor_args = (
+        vars(args),
+        num_chares_x,
+        num_chares_z,
+        args.nx_glob,
+        args.nz_glob,
+        data_spec_int,
+        initial_dt,
+        initial_etime,
+    )
+
+    chares = Array(
+        MiniWeatherChare,
+        dims=(num_chares_x, num_chares_z),
+        args=[chare_constructor_args],
+    )
+
     setup_completion_future = Future()
-    
+
     for i in range(num_chares_x):
         for j in range(num_chares_z):
-            local_nx, i_beg_global = calculate_domain_decomposition_x(i, num_chares_x, args.nx_glob)
-            local_nz, k_beg_global = calculate_domain_decomposition_z(j, num_chares_z, args.nz_glob)
-            chares[i, j].setup_chare_domain(local_nx, i_beg_global, local_nz, k_beg_global, setup_completion_future)
-    
+            local_nx, i_beg_global = calculate_domain_decomposition_x(
+                i, num_chares_x, args.nx_glob
+            )
+            local_nz, k_beg_global = calculate_domain_decomposition_z(
+                j, num_chares_z, args.nz_glob
+            )
+            chares[i, j].setup_chare_domain(
+                local_nx, i_beg_global, local_nz, k_beg_global, setup_completion_future
+            )
+
     initial_reductions_sum = setup_completion_future.get()
     mass0_sum = initial_reductions_sum[0]
     te0_sum = initial_reductions_sum[1]
 
     if charm.myPe() == 0:
-        print(f"Initial Global Mass: {mass0_sum:.6e}, Initial Global Total Energy: {te0_sum:.6e}")
-        print("\nCUDA device array setup and initial reductions complete for all chares.")
-        print(f"Starting main simulation loop up to sim_time: {args.sim_time:.2f}s or max_iters: {args.max_iters}")
+        print(
+            f"Initial Global Mass: {mass0_sum:.6e}, Initial Global Total Energy: {te0_sum:.6e}"
+        )
+        print(
+            "\nCUDA device array setup and initial reductions complete for all chares."
+        )
+        print(
+            f"Starting main simulation loop up to sim_time: {args.sim_time:.2f}s or max_iters: {args.max_iters}"
+        )
 
     main_loop_done_future = Future()
 
     chares.start_main_loop(main_loop_done_future)
 
     gathered_results = main_loop_done_future.get()
-    
+
     total_final_mass = sum(res[0] for res in gathered_results)
     total_final_te = sum(res[1] for res in gathered_results)
     max_etime = 0.0
@@ -573,24 +842,28 @@ def main_charm_wrapper(charm_args_list):
         max_etime = max(res[2] for res in gathered_results)
         max_niters = max(int(res[3]) for res in gathered_results)
 
-
     if charm.myPe() == 0:
         main_loop_wall_time = -1
         print(f"\nAll chares finished main simulation loop (max_iters {max_niters}).")
         print(f"Max final simulation time reached: {max_etime:.4f}s")
 
-        print(f"Final Global Mass:   {total_final_mass:.6e}, Final Global Total Energy:   {total_final_te:.6e}")
+        print(
+            f"Final Global Mass:   {total_final_mass:.6e}, Final Global Total Energy:   {total_final_te:.6e}"
+        )
         if abs(mass0_sum) > 1e-12:
-            print(f"Relative mass change: {(total_final_mass - mass0_sum) / mass0_sum:.6e}")
+            print(
+                f"Relative mass change: {(total_final_mass - mass0_sum) / mass0_sum:.6e}"
+            )
         else:
-            print(f"Relative mass change: (initial mass was near zero)")
+            print("Relative mass change: (initial mass was near zero)")
         if abs(te0_sum) > 1e-12:
             print(f"Relative TE change:   {(total_final_te - te0_sum) / te0_sum:.6e}")
         else:
-            print(f"Relative TE change:   (initial TE was near zero)")
+            print("Relative TE change:   (initial TE was near zero)")
 
         print("\nMiniWeather Charm4Py Numba CUDA simulation finished.")
-    
+
     charm.exit()
 
-charm.start(main_charm_wrapper)
\ No newline at end of file
+
+charm.start(main_charm_wrapper)
diff --git a/examples/mnist/mnist-mpi4py.py b/examples/mnist/mnist-mpi4py.py
index c3d1902a..f6d7d331 100644
--- a/examples/mnist/mnist-mpi4py.py
+++ b/examples/mnist/mnist-mpi4py.py
@@ -9,8 +9,6 @@
 import math
 import random
 import time
-import sys
-from torch.autograd import Variable
 from torchvision import datasets, transforms
 from mpi4py import MPI
 import numpy as np
@@ -19,6 +17,7 @@
 rank = comm.Get_rank()
 nprocs = comm.Get_size()
 
+
 # Dataset partitioning helper
 class Partition(object):
 
@@ -33,6 +32,7 @@ def __getitem__(self, index):
         data_idx = self.index[index]
         return self.data[data_idx]
 
+
 class DataPartitioner(object):
 
     def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234):
@@ -52,6 +52,7 @@ def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234):
     def use(self, partition):
         return Partition(self.data, self.partitions[partition])
 
+
 # Neural network architecture
 class Net(nn.Module):
 
@@ -72,6 +73,7 @@ def forward(self, x):
         x = self.fc2(x)
         return F.log_softmax(x, dim=1)
 
+
 # Worker object (1 per MPI rank)
 class Worker(object):
 
@@ -84,19 +86,20 @@ def __init__(self, num_workers, epochs):
 
     # Partitioning MNIST dataset
     def partition_dataset(self):
-        dataset = datasets.MNIST('./data', train=True, download=True,
-                                 transform=transforms.Compose([
-                                     transforms.ToTensor(),
-                                     transforms.Normalize((0.1307,), (0.3081,))
-                                 ]))
+        dataset = datasets.MNIST(
+            "./data",
+            train=True,
+            download=True,
+            transform=transforms.Compose(
+                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+            ),
+        )
         size = self.num_workers
         bsz = int(128 / float(size))  # my batch size
         partition_sizes = [1.0 / size for _ in range(size)]
         partition = DataPartitioner(dataset, partition_sizes)
         partition = partition.use(rank)
-        train_set = torch.utils.data.DataLoader(partition,
-                                                batch_size=bsz,
-                                                shuffle=True)
+        train_set = torch.utils.data.DataLoader(partition, batch_size=bsz, shuffle=True)
         return train_set, bsz
 
     # Distributed SGD
@@ -120,17 +123,22 @@ def run(self, device):
                 loss.backward()
                 self.average_gradients(self.model, device)
                 self.optimizer.step()
-            print(f'Rank {rank:4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}')
+            print(
+                f"Rank {rank:4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}"
+            )
             self.epoch += 1
 
-        print(f'Rank {rank:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}')
+        print(
+            f"Rank {rank:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}"
+        )
         agg_time_arr = np.array([self.agg_time])
         agg_time_all_arr = np.array([0.0])
         comm.Allreduce(agg_time_arr, agg_time_all_arr, op=MPI.SUM)
         self.agg_time_all = agg_time_all_arr[0]
         if rank == 0:
-            print(f'Rank {rank:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}')
-
+            print(
+                f"Rank {rank:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}"
+            )
 
     # Gradient averaging
     def average_gradients(self, model, device):
@@ -149,15 +157,17 @@ def average_gradients(self, model, device):
 
             # Restore original shape of gradient data
             param.grad.data = torch.from_numpy(recv_data).to(device)
-            param.grad.data = param.grad.data.reshape(data_shape) / float(self.num_workers)
-            
+            param.grad.data = param.grad.data.reshape(data_shape) / float(
+                self.num_workers
+            )
+
 
 def main():
     # Initialize PyTorch on all PEs
     num_threads = 1
     torch.set_num_threads(num_threads)
     torch.manual_seed(1234)
-    print(f'MPI rank {rank} initialized PyTorch with {num_threads} threads')
+    print(f"MPI rank {rank} initialized PyTorch with {num_threads} threads")
 
     if torch.cuda.is_available():
         # if multiple devices are available (running with mpirun, not srun), should assign round-robin
@@ -165,18 +175,21 @@ def main():
         device = torch.device("cuda:" + str(dev_id))
     else:
         device = torch.device("cpu")
-        
+
     # Create workers and start training
     epochs = 6
     workers = Worker(nprocs, epochs)
     t0 = time.time()
-    print(f'Starting MNIST dataset training with {nprocs} MPI processes for {epochs} epochs on device {device}')
+    print(
+        f"Starting MNIST dataset training with {nprocs} MPI processes for {epochs} epochs on device {device}"
+    )
     workers.run(device)
 
     comm.Barrier()
 
     # Training complete
     if rank == 0:
-        print(f'Done. Elapsed time: {(time.time() - t0):9.3f} s')
+        print(f"Done. Elapsed time: {(time.time() - t0):9.3f} s")
+
 
 main()
diff --git a/examples/mnist/mnist.py b/examples/mnist/mnist.py
index bfdb158d..3938f19e 100644
--- a/examples/mnist/mnist.py
+++ b/examples/mnist/mnist.py
@@ -11,13 +11,12 @@
 import random
 import time
 import sys
-from torch.autograd import Variable
 from torchvision import datasets, transforms
 from charm4py import charm, Chare, Group, Array, threaded, Reducer
-import numpy as np
 
 # Add LB command line arguments
-sys.argv += ['+LBOff', '+LBCommOff', '+LBObjOnly']
+sys.argv += ["+LBOff", "+LBCommOff", "+LBObjOnly"]
+
 
 # Dataset partitioning helper
 class Partition(object):
@@ -33,6 +32,7 @@ def __getitem__(self, index):
         data_idx = self.index[index]
         return self.data[data_idx]
 
+
 class DataPartitioner(object):
 
     def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234):
@@ -52,6 +52,7 @@ def __init__(self, data, sizes=[0.7, 0.2, 0.1], seed=1234):
     def use(self, partition):
         return Partition(self.data, self.partitions[partition])
 
+
 # Neural network architecture
 class Net(nn.Module):
 
@@ -72,14 +73,15 @@ def forward(self, x):
         x = self.fc2(x)
         return F.log_softmax(x, dim=1)
 
+
 # Initialize PyTorch on each PE
 class TorchInit(Chare):
 
     def init(self, num_threads):
         torch.set_num_threads(num_threads)
         torch.manual_seed(1234)
-        
-            
+
+
 # Chare array
 class Worker(Chare):
 
@@ -97,25 +99,23 @@ def __init__(self, num_workers, epochs, lb_epochs):
         else:
             # is group element
             self.myrank = self.thisIndex
-            
-            
-
 
     # Partitioning MNIST dataset
     def partition_dataset(self):
-        dataset = datasets.MNIST('./data', train=True, download=True,
-                                 transform=transforms.Compose([
-                                     transforms.ToTensor(),
-                                     transforms.Normalize((0.1307,), (0.3081,))
-                                 ]))
+        dataset = datasets.MNIST(
+            "./data",
+            train=True,
+            download=True,
+            transform=transforms.Compose(
+                [transforms.ToTensor(), transforms.Normalize((0.1307,), (0.3081,))]
+            ),
+        )
         size = self.num_workers
         bsz = int(128 / float(size))  # my batch size
         partition_sizes = [1.0 / size for _ in range(size)]
         partition = DataPartitioner(dataset, partition_sizes)
         partition = partition.use(self.myrank)
-        train_set = torch.utils.data.DataLoader(partition,
-                                                batch_size=bsz,
-                                                shuffle=True)
+        train_set = torch.utils.data.DataLoader(partition, batch_size=bsz, shuffle=True)
         return train_set, bsz
 
     # Distributed SGD
@@ -125,9 +125,9 @@ def run(self, done_future=None):
             # if multiple devices are available (running with charmrun, not srun), should assign round-robin
             device_index = charm.myPe() % torch.cuda.device_count()
             device = torch.device("cuda:" + str(device_index))
-        else:   
+        else:
             device = torch.device("cpu")
-        
+
         if done_future is not None:
             # Starting a new run
             self.done_future = done_future
@@ -136,7 +136,7 @@ def run(self, done_future=None):
             self.optimizer = optim.SGD(self.model.parameters(), lr=0.01, momentum=0.5)
             self.num_batches = math.ceil(len(self.train_set.dataset) / float(bsz))
             self.epoch = 0
-            
+
         while self.epoch < self.epochs:
             if self.epoch == 0:
                 charm.LBTurnInstrumentOn()
@@ -151,17 +151,23 @@ def run(self, done_future=None):
                 loss.backward()
                 self.average_gradients(self.model, device)
                 self.optimizer.step()
-            print(f'Chare {self.thisIndex[0]:4d} | PE {charm.myPe():4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}')
+            print(
+                f"Chare {self.thisIndex[0]:4d} | PE {charm.myPe():4d} | Epoch {self.epoch:4d} | Loss {(epoch_loss / self.num_batches):9.3f} | Time {(time.time() - t0):9.3f}"
+            )
             self.epoch += 1
             if (self.lb_epochs > 0) and (self.epoch % self.lb_epochs == 0):
                 # Start load balancing
                 self.AtSync()
                 return
 
-        print(f'Chare {self.thisIndex[0]:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}')
+        print(
+            f"Chare {self.thisIndex[0]:4d} training complete, average allreduce time (us): {((self.agg_time / self.time_cnt) * 1000000):9.3f}"
+        )
         self.agg_time_all = self.allreduce(self.agg_time, Reducer.sum).get()
         if self.myrank == 0:
-            print(f'Chare {self.thisIndex[0]:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}')
+            print(
+                f"Chare {self.thisIndex[0]:4d} all average allreduce time (us): {((self.agg_time_all / self.num_workers / self.time_cnt) * 1000000):9.3f}"
+            )
         self.contribute(None, None, self.done_future)
 
     # Gradient averaging
@@ -169,7 +175,7 @@ def average_gradients(self, model, device):
         for param in model.parameters():
             # send param to cpu
             param.grad.data = param.grad.data.cpu()
-            
+
             # Flatten gradient data
             data_shape = param.grad.data.shape
             reshaped_data = param.grad.data.reshape(-1)
@@ -179,38 +185,41 @@ def average_gradients(self, model, device):
             agg_data = self.allreduce(reshaped_data, Reducer.sum).get()
             self.agg_time += time.time() - start_time
             self.time_cnt += 1
-             
+
             # convert numpy array to torch tensor
             agg_data = torch.from_numpy(agg_data)
-            
+
             # Send to device and restore original shape of gradient data
             param.grad.data = agg_data.to(device)
-            param.grad.data = param.grad.data.reshape(data_shape) / float(self.num_workers)
-           
-            
+            param.grad.data = param.grad.data.reshape(data_shape) / float(
+                self.num_workers
+            )
 
     # Return method from load balancing
     def resumeFromSync(self):
         self.thisProxy[self.thisIndex].run()
 
+
 def main(args):
     # Initialize PyTorch on all PEs
     Group(TorchInit).init(1, ret=True).get()
 
-
     # Create chare array and start training
     num_workers = charm.numPes()
     epochs = 6
     lb_epochs = 0
-    workers = Array(Worker, num_workers, args=[num_workers, epochs, lb_epochs], useAtSync=True)
+    workers = Array(
+        Worker, num_workers, args=[num_workers, epochs, lb_epochs], useAtSync=True
+    )
     t0 = time.time()
     done = charm.createFuture()
-    
+
     workers.run(done)
     done.get()
 
     # Training complete
-    print(f'Done. Elapsed time: {(time.time() - t0):9.3f} s')
+    print(f"Done. Elapsed time: {(time.time() - t0):9.3f} s")
     charm.exit()
 
+
 charm.start(main)
diff --git a/examples/multi-module/goodbye.py b/examples/multi-module/goodbye.py
index 00c63769..eefbecc3 100644
--- a/examples/multi-module/goodbye.py
+++ b/examples/multi-module/goodbye.py
@@ -9,7 +9,7 @@ class Goodbye(Chare):
 
     def SayGoodbye(self):
         if charm.myPe() < 10:
-            print('Goodbye from PE', charm.myPe())
+            print("Goodbye from PE", charm.myPe())
         # goodbye chares do an empty reduction. after the reduction completes,
         # the 'done' method of the mainchare will be called.
         # mainProxy is a global of this module, set previously from the mainchare
diff --git a/examples/multi-module/hello.py b/examples/multi-module/hello.py
index d183564c..b22ee711 100644
--- a/examples/multi-module/hello.py
+++ b/examples/multi-module/hello.py
@@ -10,7 +10,7 @@ class Hello(Chare):
 
     def SayHi(self):
         if charm.myPe() < 10:
-            print('Hello from PE', charm.myPe(), 'on', time.strftime('%c'))
+            print("Hello from PE", charm.myPe(), "on", time.strftime("%c"))
         # call SayGoodbye method of the goodbye chare on my PE, bye_chares is
         # a global variable of this module, set previously from the mainchare
         bye_chares[charm.myPe()].SayGoodbye()
diff --git a/examples/multi-module/main.py b/examples/multi-module/main.py
index 360ccc48..2de0be1a 100644
--- a/examples/multi-module/main.py
+++ b/examples/multi-module/main.py
@@ -11,11 +11,13 @@ def __init__(self, args):
         # create Group of chares of type goodbye.Goodbye
         bye_chares = Group(goodbye.Goodbye)
         # add bye_chares proxy to globals of module hello on every process
-        future1 = charm.thisProxy.updateGlobals({'bye_chares': bye_chares},
-                                                module_name='hello', awaitable=True)
+        future1 = charm.thisProxy.updateGlobals(
+            {"bye_chares": bye_chares}, module_name="hello", awaitable=True
+        )
         # add mainchare proxy to globals of module goodbye on every process
-        future2 = charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy},
-                                                module_name='goodbye', awaitable=True)
+        future2 = charm.thisProxy.updateGlobals(
+            {"mainProxy": self.thisProxy}, module_name="goodbye", awaitable=True
+        )
         charm.wait((future1, future2))
         # broadcast a message to the hello chares
         hello_chares.SayHi()
diff --git a/examples/nqueen/nqueen-numba.py b/examples/nqueen/nqueen-numba.py
index 66c4a82c..2629ab55 100644
--- a/examples/nqueen/nqueen-numba.py
+++ b/examples/nqueen/nqueen-numba.py
@@ -46,7 +46,7 @@ class Util(Chare):
 
     def compile(self):
         # trigger compilation by running the function with dummy data (but correct types)
-        queen_seq(NUM_ROWS-1, numpy.full(NUM_ROWS, -1, dtype=numpy.int8))
+        queen_seq(NUM_ROWS - 1, numpy.full(NUM_ROWS, -1, dtype=numpy.int8))
 
     def getSolutionCount(self):
         return solution_count
@@ -62,14 +62,14 @@ def main(args):
     else:
         GRAINSIZE = max(1, NUM_ROWS - 2)
 
-    print('\nUsage: nqueen [numqueens] [grainsize]')
-    print('Number of queens is', NUM_ROWS, ', grainsize is', GRAINSIZE)
+    print("\nUsage: nqueen [numqueens] [grainsize]")
+    print("Number of queens is", NUM_ROWS, ", grainsize is", GRAINSIZE)
 
     # set NUM_ROWS and GRAINSIZE as global variables on every PE
     global_data = {}
-    global_data['NUM_ROWS'] = NUM_ROWS
-    global_data['GRAINSIZE'] = GRAINSIZE
-    global_data['solution_count'] = 0  # to count number of solutions found on each PE
+    global_data["NUM_ROWS"] = NUM_ROWS
+    global_data["GRAINSIZE"] = GRAINSIZE
+    global_data["solution_count"] = 0  # to count number of solutions found on each PE
     charm.thisProxy.updateGlobals(global_data, awaitable=True).get()
 
     # compile numba functions on every PE before starting, to get
@@ -85,7 +85,15 @@ def main(args):
     charm.waitQD()
     elapsed = time() - startTime
     numSolutions = sum(util.getSolutionCount(ret=True).get())
-    print('There are', numSolutions, 'solutions to', NUM_ROWS, 'queens. Time taken:', round(elapsed, 3), 'secs')
+    print(
+        "There are",
+        numSolutions,
+        "solutions to",
+        NUM_ROWS,
+        "queens. Time taken:",
+        round(elapsed, 3),
+        "secs",
+    )
     exit()
 
 
diff --git a/examples/nqueen/nqueen.py b/examples/nqueen/nqueen.py
index 64a1cf75..2c4856b3 100644
--- a/examples/nqueen/nqueen.py
+++ b/examples/nqueen/nqueen.py
@@ -50,25 +50,33 @@ def main(args):
     else:
         GRAINSIZE = max(1, NUM_ROWS - 2)
 
-    print('\nUsage: nqueen [numqueens] [grainsize]')
-    print('Number of queens is', NUM_ROWS, ', grainsize is', GRAINSIZE)
+    print("\nUsage: nqueen [numqueens] [grainsize]")
+    print("Number of queens is", NUM_ROWS, ", grainsize is", GRAINSIZE)
 
     # set NUM_ROWS and GRAINSIZE as global variables on every PE
     global_data = {}
-    global_data['NUM_ROWS'] = NUM_ROWS
-    global_data['GRAINSIZE'] = GRAINSIZE
-    global_data['solution_count'] = 0  # to count number of solutions found on each PE
+    global_data["NUM_ROWS"] = NUM_ROWS
+    global_data["GRAINSIZE"] = GRAINSIZE
+    global_data["solution_count"] = 0  # to count number of solutions found on each PE
     charm.thisProxy.updateGlobals(global_data, awaitable=True).get()
 
     startTime = time()
     # initialize empty solution, solution holds the column number where a queen is placed, for each row
-    solution = array.array('b', [-1] * NUM_ROWS)
+    solution = array.array("b", [-1] * NUM_ROWS)
     queen(0, solution)
     # wait until there is no work being done on any PE (quiescence detection)
     charm.waitQD()
     elapsed = time() - startTime
     numSolutions = sum(Group(Util).getSolutionCount(ret=True).get())
-    print('There are', numSolutions, 'solutions to', NUM_ROWS, 'queens. Time taken:', round(elapsed, 3), 'secs')
+    print(
+        "There are",
+        numSolutions,
+        "solutions to",
+        NUM_ROWS,
+        "queens. Time taken:",
+        round(elapsed, 3),
+        "secs",
+    )
     exit()
 
 
diff --git a/examples/particle/particle.py b/examples/particle/particle.py
index a6b5b432..7e529872 100644
--- a/examples/particle/particle.py
+++ b/examples/particle/particle.py
@@ -9,7 +9,7 @@
 
 # more info about load balancing command-line options here:
 # https://charm.readthedocs.io/en/latest/charm++/manual.html#compiler-and-runtime-options-to-use-load-balancing-module
-sys.argv += ['+LBCommOff', '+LBObjOnly']
+sys.argv += ["+LBCommOff", "+LBObjOnly"]
 
 NUM_ITER = 100
 SIM_BOX_SIZE = 100.0
@@ -21,9 +21,9 @@ def __init__(self, x, y):
         self.coords = [x, y]  # coordinate of this particle in the 2D space
 
     def perturb(self, cellsize):
-        """ randomly move the particle """
+        """randomly move the particle"""
         for i in range(len(self.coords)):
-            self.coords[i] += random.uniform(-cellsize[i]*0.1, cellsize[i]*0.1)
+            self.coords[i] += random.uniform(-cellsize[i] * 0.1, cellsize[i] * 0.1)
             # if particle goes out of bounds of the simulation space, appear on the other side
             if self.coords[i] > SIM_BOX_SIZE:
                 self.coords[i] -= SIM_BOX_SIZE
@@ -42,25 +42,42 @@ def __init__(self, array_dims, max_particles_per_cell_start, sim_done_future):
 
         # create particles in this cell, in random positions
         self.particles = []
-        N = self.getInitialNumParticles(array_dims, max_particles_per_cell_start, cellsize)
-        lo_x = self.thisIndex[0] * cellsize[0]  # x coordinate of lower left corner of my cell
-        lo_y = self.thisIndex[1] * cellsize[1]  # y coordinate of lower left corner of my cell
+        N = self.getInitialNumParticles(
+            array_dims, max_particles_per_cell_start, cellsize
+        )
+        lo_x = (
+            self.thisIndex[0] * cellsize[0]
+        )  # x coordinate of lower left corner of my cell
+        lo_y = (
+            self.thisIndex[1] * cellsize[1]
+        )  # y coordinate of lower left corner of my cell
         for _ in range(N):
-            self.particles.append(Particle(random.uniform(lo_x, lo_x + cellsize[0] - 0.001),
-                                           random.uniform(lo_y, lo_y + cellsize[1] - 0.001)))
+            self.particles.append(
+                Particle(
+                    random.uniform(lo_x, lo_x + cellsize[0] - 0.001),
+                    random.uniform(lo_y, lo_y + cellsize[1] - 0.001),
+                )
+            )
 
         # obtain list of my neighbors in the 2D cell grid, and establish a Channel with each
         self.neighbor_indexes = self.getNbIndexes(array_dims)
-        self.neighbors = [Channel(self, remote=self.thisProxy[idx]) for idx in self.neighbor_indexes]
+        self.neighbors = [
+            Channel(self, remote=self.thisProxy[idx]) for idx in self.neighbor_indexes
+        ]
 
     def getInitialNumParticles(self, dims, max_particles, cellsize):
         # return the number of particles to create on this cell at the start of
         # the simulation. The cells that are closer to the grid center start
         # with max_particles particles, the rest start with 0
         grid_center = (SIM_BOX_SIZE / 2, SIM_BOX_SIZE / 2)
-        cell_center = (self.thisIndex[0] * cellsize[0] + cellsize[0] / 2,
-                       self.thisIndex[1] * cellsize[1] + cellsize[1] / 2)
-        dist = math.sqrt((cell_center[0] - grid_center[0])**2 + (cell_center[1] - grid_center[1])**2)
+        cell_center = (
+            self.thisIndex[0] * cellsize[0] + cellsize[0] / 2,
+            self.thisIndex[1] * cellsize[1] + cellsize[1] / 2,
+        )
+        dist = math.sqrt(
+            (cell_center[0] - grid_center[0]) ** 2
+            + (cell_center[1] - grid_center[1]) ** 2
+        )
         if dist <= SIM_BOX_SIZE / 5:
             return max_particles
         else:
@@ -70,8 +87,8 @@ def getNbIndexes(self, arrayDims):
         # return indexes of neighboring cells (N,NE,E,SE,S,SW,W,NW) with wrap around
         nbs = set()
         x, y = self.thisIndex
-        nb_x_coords = [(x-1)%arrayDims[0], x, (x+1)%arrayDims[0]]
-        nb_y_coords = [(y-1)%arrayDims[1], y, (y+1)%arrayDims[1]]
+        nb_x_coords = [(x - 1) % arrayDims[0], x, (x + 1) % arrayDims[0]]
+        nb_y_coords = [(y - 1) % arrayDims[1], y, (y + 1) % arrayDims[1]]
         for nb_x in nb_x_coords:
             for nb_y in nb_y_coords:
                 if (nb_x, nb_y) != self.thisIndex:
@@ -83,7 +100,7 @@ def getNumParticles(self):
 
     @coro
     def run(self):
-        """ this is the simulation loop of each cell """
+        """this is the simulation loop of each cell"""
         cellsize = self.cellsize
         while self.iteration < NUM_ITER:
             # in each iteration, this cell's particles move randomly. some
@@ -98,12 +115,17 @@ def run(self):
             # directly into a message)
 
             # we are sending an array of particle data to each neighbor
-            outgoingParticles = {nb_idx: array.array('d') for nb_idx in self.neighbor_indexes}
+            outgoingParticles = {
+                nb_idx: array.array("d") for nb_idx in self.neighbor_indexes
+            }
             i = 0
             while i < len(self.particles):
                 p = self.particles[i]
                 p.perturb(cellsize)
-                dest_cell = (int(p.coords[0] / cellsize[0]), int(p.coords[1] / cellsize[1]))
+                dest_cell = (
+                    int(p.coords[0] / cellsize[0]),
+                    int(p.coords[1] / cellsize[1]),
+                )
                 if dest_cell != self.thisIndex:
                     # this particle is moving to a neighboring cell
                     outgoingParticles[dest_cell].extend(p.coords)
@@ -120,13 +142,17 @@ def run(self):
             # yields channels as they become ready (have data to receive)
             for channel in charm.iwait(self.neighbors):
                 incoming = channel.recv()
-                self.particles += [Particle(float(incoming[i]),
-                                            float(incoming[i+1])) for i in range(0, len(incoming), 2)]
+                self.particles += [
+                    Particle(float(incoming[i]), float(incoming[i + 1]))
+                    for i in range(0, len(incoming), 2)
+                ]
 
             if self.iteration % 10 == 0:
                 # reduction to report the current max particles per cell.
                 # this call is asynchronous and doesn't block me
-                self.reduce(self.thisProxy[(0,0)].reportMax, len(self.particles), Reducer.max)
+                self.reduce(
+                    self.thisProxy[(0, 0)].reportMax, len(self.particles), Reducer.max
+                )
 
             if self.iteration % 20 == 0:
                 # tell Charm that this cell is ready for load balancing.
@@ -151,11 +177,13 @@ def resumeFromSync(self):
         self.thisProxy[self.thisIndex].run()
 
     def reportMax(self, max_particles):
-        print('Max particles per cell= ' + str(max_particles))
+        print("Max particles per cell= " + str(max_particles))
 
 
 def main(args):
-    print('\nUsage: particle.py [num_chares_x num_chares_y] [max_particles_per_cell_start]')
+    print(
+        "\nUsage: particle.py [num_chares_x num_chares_y] [max_particles_per_cell_start]"
+    )
     if len(args) >= 3:
         array_dims = (int(args[1]), int(args[2]))
     else:
@@ -165,23 +193,30 @@ def main(args):
     else:
         max_particles_per_cell_start = 10000
 
-    print('\nCell array size:', array_dims[0], 'x', array_dims[1], 'cells')
+    print("\nCell array size:", array_dims[0], "x", array_dims[1], "cells")
 
     # create 2D Cell chare array and start simulation
     sim_done = Future()
-    cells = Array(Cell, array_dims,
-                  args=[array_dims, max_particles_per_cell_start, sim_done],
-                  useAtSync=True)
+    cells = Array(
+        Cell,
+        array_dims,
+        args=[array_dims, max_particles_per_cell_start, sim_done],
+        useAtSync=True,
+    )
     num_particles_per_cell = cells.getNumParticles(ret=True).get()
-    print('Total particles created:', sum(num_particles_per_cell))
-    print('Initial conditions:\n\tmin particles per cell:', min(num_particles_per_cell),
-          '\n\tmax particles per cell:', max(num_particles_per_cell))
-    print('\nStarting simulation')
+    print("Total particles created:", sum(num_particles_per_cell))
+    print(
+        "Initial conditions:\n\tmin particles per cell:",
+        min(num_particles_per_cell),
+        "\n\tmax particles per cell:",
+        max(num_particles_per_cell),
+    )
+    print("\nStarting simulation")
     t0 = time.time()
     cells.run()  # this is a broadcast
     # wait for the simulation to finish
     sim_done.get()
-    print('Particle simulation done, elapsed time=', round(time.time() - t0, 3), 'secs')
+    print("Particle simulation done, elapsed time=", round(time.time() - t0, 3), "secs")
     exit()
 
 
diff --git a/examples/pool/pool_fibonacci.py b/examples/pool/pool_fibonacci.py
index f0153439..32398f23 100644
--- a/examples/pool/pool_fibonacci.py
+++ b/examples/pool/pool_fibonacci.py
@@ -2,14 +2,17 @@
 
 # Recursive Parallel Fibonacci
 
+
 @coro
 def fib(n):
     if n < 2:
         return n
-    return sum(charm.pool.map(fib, [n-1, n-2]))
+    return sum(charm.pool.map(fib, [n - 1, n - 2]))
+
 
 def main(args):
-    print('fibonacci(13)=', fib(13))
+    print("fibonacci(13)=", fib(13))
     exit()
 
+
 charm.start(main)
diff --git a/examples/pool/pool_simple.py b/examples/pool/pool_simple.py
index 0adc7dd4..2914530a 100644
--- a/examples/pool/pool_simple.py
+++ b/examples/pool/pool_simple.py
@@ -4,9 +4,11 @@
 def square(x):
     return x**2
 
+
 def twice(x):
     return 2 * x
 
+
 def main(args):
     ray.init()
     results = charm.pool.map_async(square, [4], chunksize=1, multi_future=True)
@@ -15,7 +17,8 @@ def main(args):
     for x in results_twice:
         print(x.get())
 
-    #print(result)  # prints [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
+    # print(result)  # prints [0, 1, 4, 9, 16, 25, 36, 49, 64, 81]
     exit()
 
+
 charm.start(main)
diff --git a/examples/ray/batch_prediction.py b/examples/ray/batch_prediction.py
index 1869fc41..61387a96 100644
--- a/examples/ray/batch_prediction.py
+++ b/examples/ray/batch_prediction.py
@@ -26,13 +26,14 @@ def make_prediction(model, shard_path):
     # Here we just return the size about the result in this example.
     return len(result)
 
+
 def main(args):
     ray.init()
     # 12 files, one for each remote task.
     input_files = [
-            f"s3://anonymous@air-example-data/ursa-labs-taxi-data/downsampled_2009_full_year_data.parquet"
-            f"/fe41422b01c04169af2a65a83b753e0f_{i:06d}.parquet"
-            for i in range(12)
+        f"s3://anonymous@air-example-data/ursa-labs-taxi-data/downsampled_2009_full_year_data.parquet"
+        f"/fe41422b01c04169af2a65a83b753e0f_{i:06d}.parquet"
+        for i in range(12)
     ]
 
     # ray.put() the model just once to local object store, and then pass the
@@ -58,5 +59,6 @@ def main(args):
 
     exit()
 
-if __name__ == '__main__':
-    charm.start(main)
\ No newline at end of file
+
+if __name__ == "__main__":
+    charm.start(main)
diff --git a/examples/ray/fib.py b/examples/ray/fib.py
index 84f3ef22..65dce862 100644
--- a/examples/ray/fib.py
+++ b/examples/ray/fib.py
@@ -12,6 +12,7 @@
 
 # ray version
 
+
 @ray.remote
 def fib(n):
     if n < 2:
@@ -21,22 +22,23 @@ def fib(n):
         # (tasks can execute on any PE). map will block here for the result of
         # fib(n-1) and fib(n-2), which is why we mark fib as a coroutine
         # return sum(charm.pool.map(fib, [n-1, n-2]))
-        result1 = fib.remote(n-1)
-        result2 = fib.remote(n-2)
-        return ray.get(result1)+ray.get(result2)
+        result1 = fib.remote(n - 1)
+        result2 = fib.remote(n - 2)
+        return ray.get(result1) + ray.get(result2)
 
 
 def main(args):
     ray.init()
-    print('\nUsage: fib.py [n]')
+    print("\nUsage: fib.py [n]")
     n = 12
     if len(args) > 1:
         n = int(args[1])
-    print('Calculating fibonacci of N=' + str(n))
+    print("Calculating fibonacci of N=" + str(n))
     t0 = time.time()
     result = fib.remote(n)
-    print('Result is', ray.get(result), 'elapsed=', round(time.time() - t0, 3))
-    #charm.thisProxy.stop_profiling()
+    print("Result is", ray.get(result), "elapsed=", round(time.time() - t0, 3))
+    # charm.thisProxy.stop_profiling()
     exit()
 
+
 charm.start(main)
diff --git a/examples/ray/mandelbrot/mandelbrot.py b/examples/ray/mandelbrot/mandelbrot.py
index 0fb52923..d514a11f 100644
--- a/examples/ray/mandelbrot/mandelbrot.py
+++ b/examples/ray/mandelbrot/mandelbrot.py
@@ -3,6 +3,7 @@
 import matplotlib.pyplot as plt
 import os
 
+
 # Compute whether a point is in the Mandelbrot set
 def mandelbrot_fast(re, im, max_iter):
     zr = zi = 0.0
@@ -15,6 +16,7 @@ def mandelbrot_fast(re, im, max_iter):
         zr = zr2 - zi2 + re
     return max_iter
 
+
 # Remote task to compute a tile
 @ray.remote
 def compute_tile(x_start, x_end, y_start, y_end, width, height, max_iter):
@@ -26,13 +28,18 @@ def compute_tile(x_start, x_end, y_start, y_end, width, height, max_iter):
             tile[y - y_start, x - x_start] = mandelbrot_fast(re, im, max_iter)
     return tile
 
-def generate_mandelbrot_image_optimized(width=12000, height=8000, max_iter=200, tile_size=1000, max_pending=1000):
+
+def generate_mandelbrot_image_optimized(
+    width=12000, height=8000, max_iter=200, tile_size=1000, max_pending=1000
+):
     # Pre-create the empty file with the correct size
     total_bytes = 2 * width * height  # 2 bytes per pixel (uint16)
     with open("output/mandelbrot_large.dat", "wb") as f:
         f.seek(total_bytes - 1)
-        f.write(b'\0')
-    result_image = np.memmap("output/mandelbrot_large.dat", dtype=np.uint16, mode='w+', shape=(height, width))
+        f.write(b"\0")
+    result_image = np.memmap(
+        "output/mandelbrot_large.dat", dtype=np.uint16, mode="w+", shape=(height, width)
+    )
     pending = []
 
     for y in range(0, height, tile_size):
@@ -45,11 +52,11 @@ def generate_mandelbrot_image_optimized(width=12000, height=8000, max_iter=200,
             if len(pending) >= max_pending:
                 (x0, y0), tile = pending.pop(0)
                 tile = ray.get(tile)
-                result_image[y0:y0+tile.shape[0], x0:x0+tile.shape[1]] = tile
+                result_image[y0 : y0 + tile.shape[0], x0 : x0 + tile.shape[1]] = tile
 
     for (x0, y0), tile_ref in pending:
         tile = ray.get(tile_ref)
-        result_image[y0:y0+tile.shape[0], x0:x0+tile.shape[1]] = tile
+        result_image[y0 : y0 + tile.shape[0], x0 : x0 + tile.shape[1]] = tile
 
     return result_image
 
@@ -59,13 +66,19 @@ def main(args):
     os.makedirs(os.path.dirname(output_path), exist_ok=True)
     ray.init()
     # Run the benchmark
-    image = generate_mandelbrot_image_optimized(width=int(args[1]), height=int(args[2]), max_iter=int(args[3]), tile_size=int(args[4]))
+    image = generate_mandelbrot_image_optimized(
+        width=int(args[1]),
+        height=int(args[2]),
+        max_iter=int(args[3]),
+        tile_size=int(args[4]),
+    )
     # Optional: show the result
-    plt.imshow(image, cmap='hot')
+    plt.imshow(image, cmap="hot")
     plt.title("Mandelbrot Set (Ray)")
-    plt.axis('off')
-    plt.savefig("mandelbrot_ray.png", dpi=300, bbox_inches='tight')
-    os.remove('output/mandelbrot_large.dat')
+    plt.axis("off")
+    plt.savefig("mandelbrot_ray.png", dpi=300, bbox_inches="tight")
+    os.remove("output/mandelbrot_large.dat")
     charm.exit()
 
+
 charm.start(main)
diff --git a/examples/ray/model_selection.py b/examples/ray/model_selection.py
index 384d54d4..1279e3ba 100644
--- a/examples/ray/model_selection.py
+++ b/examples/ray/model_selection.py
@@ -8,7 +8,7 @@
 import torch.optim as optim
 from torchvision import datasets, transforms
 
-#import ray
+# import ray
 from charm4py import charm, ray
 
 
@@ -44,6 +44,7 @@ def get_data_loaders(batch_size):
     )
     return train_loader, test_loader
 
+
 class ConvNet(nn.Module):
     """Simple two layer Convolutional Neural Network."""
 
@@ -96,6 +97,7 @@ def test(model, test_loader, device=torch.device("cpu")):
 
     return correct / total
 
+
 @ray.remote
 def evaluate_hyperparameters(config):
     model = ConvNet()
@@ -106,6 +108,7 @@ def evaluate_hyperparameters(config):
     train(model, optimizer, train_loader)
     return test(model, test_loader)
 
+
 def main(args):
     ray.init()
 
@@ -138,18 +141,18 @@ def main(args):
 
         hyperparameters = hyperparameters_mapping[result_id]
         accuracy = ray.get(result_id)
-        #print(
+        # print(
         #    """We achieve accuracy {:.3}% with
         #    learning_rate: {:.2}
         #    batch_size: {}
         #    momentum: {:.2}
-        #""".format(
+        # """.format(
         #        100 * accuracy,
         #        hyperparameters["learning_rate"],
         #        hyperparameters["batch_size"],
         #        hyperparameters["momentum"],
         #    )
-        #)
+        # )
         if accuracy > best_accuracy:
             best_hyperparameters = hyperparameters
             best_accuracy = accuracy
@@ -172,4 +175,5 @@ def main(args):
 
     exit()
 
+
 charm.start(main)
diff --git a/examples/ray/parameter_server.py b/examples/ray/parameter_server.py
index b59d5158..a7a7043f 100644
--- a/examples/ray/parameter_server.py
+++ b/examples/ray/parameter_server.py
@@ -52,6 +52,7 @@ def evaluate(model, test_loader):
             correct += (predicted == target).sum().item()
     return 100.0 * correct / total
 
+
 class ConvNet(nn.Module):
     """Small ConvNet for MNIST."""
 
@@ -84,6 +85,7 @@ def set_gradients(self, gradients):
             if g is not None:
                 p.grad = torch.from_numpy(g)
 
+
 @ray.remote
 class ParameterServer(object):
     def __init__(self, lr):
@@ -102,6 +104,7 @@ def apply_gradients(self, *gradients):
     def get_weights(self):
         return self.model.get_weights()
 
+
 @ray.remote
 class DataWorker(object):
     def __init__(self):
@@ -138,7 +141,9 @@ def sync_train(args):
 
     current_weights = ps.get_weights.remote()
     for i in range(iterations):
-        gradients = [worker.compute_gradients.remote(current_weights) for worker in workers]
+        gradients = [
+            worker.compute_gradients.remote(current_weights) for worker in workers
+        ]
         # Calculate update after all gradients are available.
         current_weights = ps.apply_gradients.remote(*gradients)
 
@@ -152,7 +157,8 @@ def sync_train(args):
     print("Final accuracy is {:.1f}.".format(accuracy))
     exit()
     # Clean up Ray resources and processes before the next example.
-    #ray.shutdown()
+    # ray.shutdown()
+
 
 def async_train(args):
     ray.init()
@@ -191,8 +197,9 @@ def async_train(args):
     print("Final accuracy is {:.1f}.".format(accuracy))
     exit()
 
-if __name__ == '__main__':
-    if sys.argv[1] == 'sync':
+
+if __name__ == "__main__":
+    if sys.argv[1] == "sync":
         charm.start(sync_train)
-    elif sys.argv[1] == 'async':
+    elif sys.argv[1] == "async":
         charm.start(async_train)
diff --git a/examples/ray/simple.py b/examples/ray/simple.py
index 800f6b9a..1befb172 100644
--- a/examples/ray/simple.py
+++ b/examples/ray/simple.py
@@ -1,4 +1,4 @@
-from charm4py import charm, coro, Chare, Array, ray
+from charm4py import charm, ray
 from time import sleep
 import numpy as np
 
@@ -12,10 +12,17 @@ def add_task(a, b):
     print("Add task", a, b)
     return a + b
 
+
 @ray.remote
 class Compute(object):
     def __init__(self, arg):
-        print('Hello from MyChare instance in processor', charm.myPe(), 'index', self.thisIndex, arg)
+        print(
+            "Hello from MyChare instance in processor",
+            charm.myPe(),
+            "index",
+            self.thisIndex,
+            arg,
+        )
 
     def add(self, a, b):
         sleep(2)
@@ -27,13 +34,13 @@ def main(args):
     ray.init()
     # create 3 instances of MyChare, distributed among cores by the runtime
     arr = [Compute.remote(i) for i in range(4)]
-    
+
     obj1 = np.arange(100)
     obj2 = np.arange(100)
     a = ray.put(obj1)
     b = ray.put(obj2)
-    c = arr[0].add.remote(1, 2) # fut id 0
-    d = arr[1].add.remote(3, c) # fut id 1
+    c = arr[0].add.remote(1, 2)  # fut id 0
+    d = arr[1].add.remote(3, c)  # fut id 1
     e = arr[2].add.remote(2, d)
     f = arr[3].add.remote(c, 4)
     g = arr[3].add.remote(a, b)
diff --git a/examples/simple/chares.py b/examples/simple/chares.py
index 6048ed34..13143de3 100644
--- a/examples/simple/chares.py
+++ b/examples/simple/chares.py
@@ -4,7 +4,7 @@
 class MyChare(Chare):
 
     def __init__(self):
-        print('Hello from MyChare instance in processor', charm.myPe())
+        print("Hello from MyChare instance in processor", charm.myPe())
 
 
 def main(args):
diff --git a/examples/simple/hello_world.py b/examples/simple/hello_world.py
index 32872ec9..bdea82e3 100644
--- a/examples/simple/hello_world.py
+++ b/examples/simple/hello_world.py
@@ -4,7 +4,7 @@
 class Hello(Chare):
 
     def SayHi(self):
-        print('Hello World from element', self.thisIndex)
+        print("Hello World from element", self.thisIndex)
 
 
 def main(args):
diff --git a/examples/simple/start.py b/examples/simple/start.py
index 28af1d7f..333a6bc4 100644
--- a/examples/simple/start.py
+++ b/examples/simple/start.py
@@ -2,8 +2,8 @@
 
 
 def main(args):
-    print('Charm program started on processor', charm.myPe())
-    print('Running on', charm.numPes(), 'processors')
+    print("Charm program started on processor", charm.myPe())
+    print("Running on", charm.numPes(), "processors")
     exit()
 
 
diff --git a/examples/sssp/sssp.py b/examples/sssp/sssp.py
index 7dc854e6..8b7df866 100644
--- a/examples/sssp/sssp.py
+++ b/examples/sssp/sssp.py
@@ -1,46 +1,60 @@
-from charm4py import charm, Chare, Array, coro, Channel, Future, Reducer
+from charm4py import charm, Chare, Array, Future, Reducer
 import random
 import time
 
+
 class SsspChares(Chare):
 
     def __init__(self):
-        self.local_graph = [] #[vertex_index, edge_list, distance]
+        self.local_graph = []  # [vertex_index, edge_list, distance]
         self.partition_indices = []
         self.start_vertex = 0
         self.num_local_vertices = 0
         self.my_index = charm.myPe()
-    
+
     def get_partition(self, edge_list, partition_indices, callback):
         self.partition_indices = partition_indices
         self.start_vertex = partition_indices[self.my_index]
-        self.num_local_vertices = partition_indices[self.my_index + 1] - partition_indices[self.my_index]
-        self.local_graph = [[self.start_vertex + i, [], float('inf')] for i in range(self.num_local_vertices)]
+        self.num_local_vertices = (
+            partition_indices[self.my_index + 1] - partition_indices[self.my_index]
+        )
+        self.local_graph = [
+            [self.start_vertex + i, [], float("inf")]
+            for i in range(self.num_local_vertices)
+        ]
         for i in range(len(edge_list)):
-            self.local_graph[edge_list[i][0] - self.start_vertex][1].append((edge_list[i][1], edge_list[i][2]))
+            self.local_graph[edge_list[i][0] - self.start_vertex][1].append(
+                (edge_list[i][1], edge_list[i][2])
+            )
         self.reduce(callback, None, Reducer.nop)
-    
+
     def calculate_destination(self, vertex_index):
-        for i in range(len(self.partition_indices)-1):
-            if vertex_index >= self.partition_indices[i] and vertex_index < self.partition_indices[i+1]:
+        for i in range(len(self.partition_indices) - 1):
+            if (
+                vertex_index >= self.partition_indices[i]
+                and vertex_index < self.partition_indices[i + 1]
+            ):
                 return i
-        return len(self.partition_indices)-1
-    
+        return len(self.partition_indices) - 1
+
     def update_distance(self, update):
-        local_index = update[0]-self.start_vertex
+        local_index = update[0] - self.start_vertex
         if update[1] < self.local_graph[local_index][2]:
-            self.local_graph[update[0]-self.start_vertex][2] = update[1]
+            self.local_graph[update[0] - self.start_vertex][2] = update[1]
             for i in range(len(self.local_graph[local_index][1])):
                 dest_vertex = self.local_graph[local_index][1][i][0]
                 dest_partition = self.calculate_destination(dest_vertex)
-                cost = self.local_graph[local_index][2] + self.local_graph[local_index][1][i][1]
+                cost = (
+                    self.local_graph[local_index][2]
+                    + self.local_graph[local_index][1][i][1]
+                )
                 new_update = (dest_vertex, cost)
                 self.thisProxy[dest_partition].update_distance(new_update)
-    
+
     def print_results(self, callback):
         max_local_cost = 0.0
         for i in range(len(self.local_graph)):
-            #print("Final cost of vertex", self.local_graph[i][0], ":", self.local_graph[i][2])
+            # print("Final cost of vertex", self.local_graph[i][0], ":", self.local_graph[i][2])
             if self.local_graph[i][2] > max_local_cost:
                 max_local_cost = self.local_graph[i][2]
         self.reduce(callback, max_local_cost, Reducer.max)
@@ -50,32 +64,34 @@ class Main(Chare):
 
     def __init__(self, args):
         if len(args) != 5:
-            print("Wrong number of arguments. Usage: sssp.py <num_vertices> <num_edges> <random_seed> <source_vertex>")
+            print(
+                "Wrong number of arguments. Usage: sssp.py <num_vertices> <num_edges> <random_seed> <source_vertex>"
+            )
             exit()
-        #define parameters
+        # define parameters
         self.num_vertices = int(args[1])
         self.num_edges = int(args[2])
         self.random_seed = int(args[3])
         self.source_vertex = int(args[4])
-        if self.source_vertex < 0 or self.source_vertex > self.num_vertices-1:
+        if self.source_vertex < 0 or self.source_vertex > self.num_vertices - 1:
             print("Source vertex out of range")
             exit()
-        #generate edges randomly and sort them by edge source
+        # generate edges randomly and sort them by edge source
         begin_generation = time.time()
         random.seed(self.random_seed)
         self.edge_list = []
         for i in range(self.num_edges):
-            edge_source = random.randint(0, self.num_vertices-1)
-            edge_dest = random.randint(0, self.num_vertices-1)
-            while edge_source==edge_dest:
-                edge_dest = random.randint(0, self.num_vertices-1)
+            edge_source = random.randint(0, self.num_vertices - 1)
+            edge_dest = random.randint(0, self.num_vertices - 1)
+            while edge_source == edge_dest:
+                edge_dest = random.randint(0, self.num_vertices - 1)
             edge_weight = random.random()
             self.edge_list.append((edge_source, edge_dest, edge_weight))
         self.edge_list.sort(key=lambda a: a[0])
-        #initiate worker array
+        # initiate worker array
         num_partitions = charm.numPes()
         self.workers = Array(SsspChares, num_partitions)
-        #split edges by pe
+        # split edges by pe
         send_lists = [[] for _ in range(num_partitions)]
         avg_partition_size = self.num_edges // num_partitions
         for i in range(len(self.edge_list)):
@@ -83,14 +99,20 @@ def __init__(self, args):
             if partition_num >= num_partitions:
                 partition_num = num_partitions - 1
             send_lists[partition_num].append(self.edge_list[i])
-        #move edges to keep vertices intact
+        # move edges to keep vertices intact
         for i in range(1, len(send_lists)):
-            if len(send_lists[i-1])!=0 and send_lists[i-1][-1][0]==send_lists[i][0][0]:
-                last_previous_vertex = send_lists[i-1][-1][0]
-                while len(send_lists[i]) > 0 and send_lists[i][0][0] == last_previous_vertex:
+            if (
+                len(send_lists[i - 1]) != 0
+                and send_lists[i - 1][-1][0] == send_lists[i][0][0]
+            ):
+                last_previous_vertex = send_lists[i - 1][-1][0]
+                while (
+                    len(send_lists[i]) > 0
+                    and send_lists[i][0][0] == last_previous_vertex
+                ):
                     edge_to_move = send_lists[i].pop(0)
-                    send_lists[i-1].append(edge_to_move)
-        #define partition indices
+                    send_lists[i - 1].append(edge_to_move)
+        # define partition indices
         partition_indices = []
         for i in range(len(send_lists)):
             if len(send_lists[i]) > 0:
@@ -99,21 +121,24 @@ def __init__(self, args):
                 partition_indices.append(partition_indices[-1])
         partition_indices.append(self.num_vertices)
         generation_length = time.time() - begin_generation
-        #send information to pes
+        # send information to pes
         f = Future()
         for i in range(num_partitions):
             self.workers[i].get_partition(send_lists[i], partition_indices, f)
         f.get()
-        #find partition of start vertex
+        # find partition of start vertex
         source_partition = 0
-        for i in range(len(partition_indices)-1):
-            if self.source_vertex >= partition_indices[i] and self.source_vertex < partition_indices[i+1]:
+        for i in range(len(partition_indices) - 1):
+            if (
+                self.source_vertex >= partition_indices[i]
+                and self.source_vertex < partition_indices[i + 1]
+            ):
                 source_partition = i
                 break
         begin_algo = time.time()
         self.workers[source_partition].update_distance((self.source_vertex, 0.0))
         charm.waitQD()
-        algo_length = time.time()-begin_algo
+        algo_length = time.time() - begin_algo
         final_stats = Future()
         self.workers.print_results(final_stats)
         global_max = final_stats.get()
@@ -122,8 +147,5 @@ def __init__(self, args):
         print("Algorithm runtime:", algo_length)
         exit()
 
-    
-
-
 
 charm.start(Main)
diff --git a/examples/wave2d/wave2d.py b/examples/wave2d/wave2d.py
index f1e96140..895d3d29 100644
--- a/examples/wave2d/wave2d.py
+++ b/examples/wave2d/wave2d.py
@@ -1,4 +1,3 @@
-
 # This program solves the 2-d wave equation over a grid, displaying pretty results.
 # See README.rst for more information.
 
@@ -8,12 +7,14 @@
 import numpy as np
 import numba
 import random
+
 try:
     import tkinter
     from PIL import Image, ImageTk, ImageDraw
 except ImportError:
     import sys
-    sys.argv += ['--NO-RENDER']
+
+    sys.argv += ["--NO-RENDER"]
 
 
 IMAGE_WIDTH, IMAGE_HEIGHT = 800, 699
@@ -29,20 +30,26 @@ class Main(Chare):
     def __init__(self, args):
         self.RENDER = True
         try:
-            args.remove('--NO-RENDER')
+            args.remove("--NO-RENDER")
             self.RENDER = False
         except ValueError:
             pass
 
-        print('\nUsage: wave2d.py [num_iterations] [max_framerate])')
+        print("\nUsage: wave2d.py [num_iterations] [max_framerate])")
         global NUM_ITERATIONS, MAX_FRAMERATE
         if len(args) > 1:
             NUM_ITERATIONS = int(args[1])
         if len(args) > 2:
             MAX_FRAMERATE = int(args[2])
 
-        print('Running wave2d on', charm.numPes(), 'processors for', NUM_ITERATIONS, 'iterations')
-        print('Max framerate is', MAX_FRAMERATE, 'frames per second')
+        print(
+            "Running wave2d on",
+            charm.numPes(),
+            "processors for",
+            NUM_ITERATIONS,
+            "iterations",
+        )
+        print("Max framerate is", MAX_FRAMERATE, "frames per second")
 
         self.count = 0  # tracks from how many workers I have received a subimage for this iteration
         programStartTime = frameStartTime = time.time()
@@ -54,7 +61,7 @@ def __init__(self, args):
 
         if self.RENDER:
             tk = tkinter.Tk()
-            self.frame = Image.new('RGB', (IMAGE_WIDTH, IMAGE_HEIGHT))
+            self.frame = Image.new("RGB", (IMAGE_WIDTH, IMAGE_HEIGHT))
             img = ImageTk.PhotoImage(self.frame)
             label_image = tkinter.Label(tk, image=img)
             label_image.pack()
@@ -64,14 +71,14 @@ def __init__(self, args):
             self.frameReady.get()  # wait for the next frame
             if MAX_FRAMERATE > 0:
                 elapsed = time.time() - frameStartTime
-                if elapsed < 1/MAX_FRAMERATE:
+                if elapsed < 1 / MAX_FRAMERATE:
                     # enforce framerate
-                    charm.sleep(1/MAX_FRAMERATE - elapsed)
+                    charm.sleep(1 / MAX_FRAMERATE - elapsed)
             if self.RENDER:
-                fps = round(1/(time.time() - frameStartTime))
+                fps = round(1 / (time.time() - frameStartTime))
                 # draw frames per second value on image
                 d = ImageDraw.Draw(self.frame)
-                d.text((10,10), str(fps) + ' fps', fill=(0,0,0,255))
+                d.text((10, 10), str(fps) + " fps", fill=(0, 0, 0, 255))
                 img = ImageTk.PhotoImage(self.frame)
                 label_image.configure(image=img)
                 label_image.image = img
@@ -79,19 +86,19 @@ def __init__(self, args):
                 tk.update()
 
             # loop simulation every 1000 iterations
-            reset = (i % 1000 == 0)
+            reset = i % 1000 == 0
             frameStartTime = time.time()
             array.resume(reset)  # tell workers to resume
             self.frameReady = Future()
 
-        print('Program Done!, Total time=', time.time() - programStartTime)
+        print("Program Done!, Total time=", time.time() - programStartTime)
         exit()
 
     # every worker calls this method to deposit their subimage
     def depositSubImage(self, data, pos, img_size):
         self.count += 1
         if self.RENDER:
-            self.frame.paste(Image.frombytes('RGB', img_size, data), box=pos)
+            self.frame.paste(Image.frombytes("RGB", img_size, data), box=pos)
         if self.count == CHARE_ARRAY_WIDTH * CHARE_ARRAY_HEIGHT:
             # received image data from all chares
             self.count = 0
@@ -103,18 +110,25 @@ class Wave(Chare):
     def setInitialConditions(self):
         # setup some initial pressure pertubations for timesteps t-1 and t
         self.pressure_new = np.zeros((self.myheight, self.mywidth))  # time t+1
-        self.pressure     = np.zeros((self.myheight, self.mywidth))  # time t
+        self.pressure = np.zeros((self.myheight, self.mywidth))  # time t
         self.pressure_old = np.zeros((self.myheight, self.mywidth))  # time t-1
-        init_pressure(NUM_INITIAL_PERTURBATIONS, IMAGE_WIDTH, IMAGE_HEIGHT,
-                      self.mywidth, self.myheight, self.thisIndex,
-                      self.pressure, self.pressure_old)
+        init_pressure(
+            NUM_INITIAL_PERTURBATIONS,
+            IMAGE_WIDTH,
+            IMAGE_HEIGHT,
+            self.mywidth,
+            self.myheight,
+            self.thisIndex,
+            self.pressure,
+            self.pressure_old,
+        )
 
     def resume(self, reset=False):
         self.resumeFuture(reset)
 
     @coro
     def work(self, mainProxy):
-        """ this is the main simulation loop for each chare """
+        """this is the main simulation loop for each chare"""
 
         # size of my rectangular portion of the image
         self.mywidth = IMAGE_WIDTH // CHARE_ARRAY_WIDTH
@@ -124,25 +138,25 @@ def work(self, mainProxy):
         i = self.thisIndex
         X, Y = CHARE_ARRAY_WIDTH, CHARE_ARRAY_HEIGHT
         # establish a Channel with neighbor chares in the 2D grid
-        left = Channel(self, remote=self.thisProxy[(i[0]-1)%X, i[1]])
-        right = Channel(self, remote=self.thisProxy[(i[0]+1)%X, i[1]])
-        top = Channel(self, remote=self.thisProxy[i[0], (i[1]-1)%Y])
-        bottom = Channel(self, remote=self.thisProxy[i[0], (i[1]+1)%Y])
+        left = Channel(self, remote=self.thisProxy[(i[0] - 1) % X, i[1]])
+        right = Channel(self, remote=self.thisProxy[(i[0] + 1) % X, i[1]])
+        top = Channel(self, remote=self.thisProxy[i[0], (i[1] - 1) % Y])
+        bottom = Channel(self, remote=self.thisProxy[i[0], (i[1] + 1) % Y])
 
         width, height = self.mywidth, self.myheight
         # coordinate where my portion of the image is located
         sx = self.thisIndex[0] * width
         sy = self.thisIndex[1] * height
         # data will store my portion of the image
-        data = np.zeros(width*height*3, dtype=np.uint8)
+        data = np.zeros(width * height * 3, dtype=np.uint8)
         buffers = [None] * 4
 
         # run simulation now
         while True:
-            top_edge = self.pressure[[0],:].reshape(width)
-            bottom_edge = self.pressure[[-1],:].reshape(width)
-            left_edge = self.pressure[:,[0]].reshape(height)
-            right_edge = self.pressure[:,[-1]].reshape(height)
+            top_edge = self.pressure[[0], :].reshape(width)
+            bottom_edge = self.pressure[[-1], :].reshape(width)
+            left_edge = self.pressure[:, [0]].reshape(height)
+            right_edge = self.pressure[:, [-1]].reshape(height)
 
             # send ghost values to neighbors
             left.send(RIGHT, left_edge)
@@ -156,12 +170,24 @@ def work(self, mainProxy):
                 side, ghost_values = channel.recv()
                 buffers[side] = ghost_values
 
-            check_and_compute(height, width,
-                              buffers[LEFT], buffers[RIGHT], buffers[UP], buffers[DOWN],
-                              self.pressure, self.pressure_old, self.pressure_new)
+            check_and_compute(
+                height,
+                width,
+                buffers[LEFT],
+                buffers[RIGHT],
+                buffers[UP],
+                buffers[DOWN],
+                self.pressure,
+                self.pressure_old,
+                self.pressure_new,
+            )
 
             # advance to next step by shifting the data back one step in time
-            self.pressure_old, self.pressure, self.pressure_new = self.pressure, self.pressure_new, self.pressure_old
+            self.pressure_old, self.pressure, self.pressure_new = (
+                self.pressure,
+                self.pressure_new,
+                self.pressure_old,
+            )
 
             # draw my part of the image, plus a nice 1 pixel border along my
             # right/bottom boundary
@@ -176,31 +202,42 @@ def work(self, mainProxy):
 
 
 @numba.jit(nopython=True, cache=False)
-def check_and_compute(h, w, left, right, up, down,
-                      pressure, pressure_old, pressure_new):
+def check_and_compute(
+    h, w, left, right, up, down, pressure, pressure_old, pressure_new
+):
     for i in range(h):
         for j in range(w):
             # current time's pressures for neighboring array locations
-            if j == 0: L = left[i]
-            else: L = pressure[i,j-1]
-
-            if j == w-1: R = right[i]
-            else: R = pressure[i,j+1]
-
-            if i == 0: U = up[j]
-            else: U = pressure[i-1,j]
-
-            if i == h-1: D = down[j]
-            else: D = pressure[i+1,j]
+            if j == 0:
+                L = left[i]
+            else:
+                L = pressure[i, j - 1]
+
+            if j == w - 1:
+                R = right[i]
+            else:
+                R = pressure[i, j + 1]
+
+            if i == 0:
+                U = up[j]
+            else:
+                U = pressure[i - 1, j]
+
+            if i == h - 1:
+                D = down[j]
+            else:
+                D = pressure[i + 1, j]
 
             # current time's pressure for this array location
-            curr = pressure[i,j]
+            curr = pressure[i, j]
 
             # previous time's pressure for this array location
-            old = pressure_old[i,j]
+            old = pressure_old[i, j]
 
             # compute the future time's pressure for this array location
-            pressure_new[i,j] = 0.4*0.4*(L+R+U+D - 4.0*curr)-old+2.0*curr
+            pressure_new[i, j] = (
+                0.4 * 0.4 * (L + R + U + D - 4.0 * curr) - old + 2.0 * curr
+            )
 
 
 @numba.jit(nopython=True, cache=False)
@@ -209,23 +246,25 @@ def fill_subimage(data, w, h, pressure):
     # Each RGB component is a uint8 that can have 256 possible values
     for i in range(h):
         for j in range(w):
-            p = int(pressure[i,j])
-            if p > 255: p = 255    # Keep values in valid range
-            if p < -255: p = -255  # Keep values in valid range
-            pos = 3*(i*w+j)
+            p = int(pressure[i, j])
+            if p > 255:
+                p = 255  # Keep values in valid range
+            if p < -255:
+                p = -255  # Keep values in valid range
+            pos = 3 * (i * w + j)
             if p > 0:  # Positive values are red
-                data[pos:pos+3] = (255, 255-p, 255-p)
+                data[pos : pos + 3] = (255, 255 - p, 255 - p)
             else:  # Negative values are blue
-                data[pos:pos+3] = (255+p, 255+p, 255)
+                data[pos : pos + 3] = (255 + p, 255 + p, 255)
 
     # Draw a green border on right and bottom of this chare array's pixel buffer.
     # This will overwrite some pressure values at these pixels.
     for i in range(h):
-        pos = 3*(i*w+w-1)
-        data[pos:pos+3] = (0, 255, 0)
+        pos = 3 * (i * w + w - 1)
+        data[pos : pos + 3] = (0, 255, 0)
     for i in range(w):
-        pos = 3*((h-1)*w+i)
-        data[pos:pos+3] = (0, 255, 0)
+        pos = 3 * ((h - 1) * w + i)
+        data[pos : pos + 3] = (0, 255, 0)
 
 
 @numba.jit(nopython=True, cache=False)
@@ -234,20 +273,24 @@ def init_pressure(numInitialPerturbations, W, H, w, h, elemIdx, pressure, pressu
     random.seed(6)
     for s in range(numInitialPerturbations):
         # determine where to place a circle within the interior of the 2D domain
-        radius = 20 + random.randint(0,32767) % 30
-        xcenter = radius + random.randint(0,32767) % (W - 2*radius)
-        ycenter = radius + random.randint(0,32767) % (H - 2*radius)
+        radius = 20 + random.randint(0, 32767) % 30
+        xcenter = radius + random.randint(0, 32767) % (W - 2 * radius)
+        ycenter = radius + random.randint(0, 32767) % (H - 2 * radius)
         # draw the circle
         for i in range(h):
             for j in range(w):
                 # the coordinate in the global data array (not just in this chare's portion)
-                globalx = elemIdx[0]*w + j
-                globaly = elemIdx[1]*h + i
-                distanceToCenter = math.sqrt((globalx-xcenter)**2 + (globaly-ycenter)**2)
+                globalx = elemIdx[0] * w + j
+                globaly = elemIdx[1] * h + i
+                distanceToCenter = math.sqrt(
+                    (globalx - xcenter) ** 2 + (globaly - ycenter) ** 2
+                )
                 if distanceToCenter < radius:
-                    rscaled = (distanceToCenter/radius)*3.0*3.14159/2.0  # ranges from 0 to 3pi/2
+                    rscaled = (
+                        (distanceToCenter / radius) * 3.0 * 3.14159 / 2.0
+                    )  # ranges from 0 to 3pi/2
                     t = 700.0 * math.cos(rscaled)  # range won't exceed -700 to 700
-                    pressure[i,j] = pressure_old[i,j] = t
+                    pressure[i, j] = pressure_old[i, j] = t
 
 
 charm.start(Main)
diff --git a/setup.py b/setup.py
index f0cda728..38dd6d50 100644
--- a/setup.py
+++ b/setup.py
@@ -1,6 +1,5 @@
 import sys
 import os
-import re
 import shutil
 import platform
 import subprocess
@@ -14,6 +13,7 @@
 import distutils
 
 import Cython.Compiler.Options
+
 Cython.Compiler.Options.annotate = True
 
 build_mpi = False
@@ -22,30 +22,29 @@
 
 def get_build_machine():
     machine = platform.machine()
-    if machine == 'arm64' or machine == 'aarch64':
-        return 'arm8'
+    if machine == "arm64" or machine == "aarch64":
+        return "arm8"
     return machine
 
+
 def get_archflag_machine():
     machine = platform.machine()
-    if machine == 'arm64' or machine == 'aarch64':
-        return 'arm64'
+    if machine == "arm64" or machine == "aarch64":
+        return "arm64"
     return machine
 
+
 def get_build_os():
     os = platform.system()
     return os.lower()
 
 
 def get_build_network_type(build_mpi):
-    return 'netlrts' if not build_mpi else 'mpi'
+    return "netlrts" if not build_mpi else "mpi"
 
 
 def get_build_triple(build_mpi):
-    return (get_build_machine(),
-            get_build_os(),
-            get_build_network_type(build_mpi)
-            )
+    return (get_build_machine(), get_build_os(), get_build_network_type(build_mpi))
 
 
 machine = get_build_machine()
@@ -53,44 +52,49 @@ def get_build_triple(build_mpi):
 
 
 libcharm_filename2 = None
-if system == 'windows' or system.startswith('cygwin'):
-    libcharm_filename = 'charm.dll'
-    libcharm_filename2 = 'charm.lib'
-    charmrun_filename = 'charmrun.exe'
-elif system == 'darwin':
-    os.environ['ARCHFLAGS'] = f'-arch {get_archflag_machine()}'
-    libcharm_filename = 'libcharm.dylib'
-    charmrun_filename = 'charmrun'
-    if 'CPPFLAGS' in os.environ:
-        os.environ['CPPFLAGS'] += ' -Wno-error=implicit-function-declaration' # needed because some functions used by charm4py are not exported by charm.
+if system == "windows" or system.startswith("cygwin"):
+    libcharm_filename = "charm.dll"
+    libcharm_filename2 = "charm.lib"
+    charmrun_filename = "charmrun.exe"
+elif system == "darwin":
+    os.environ["ARCHFLAGS"] = f"-arch {get_archflag_machine()}"
+    libcharm_filename = "libcharm.dylib"
+    charmrun_filename = "charmrun"
+    if "CPPFLAGS" in os.environ:
+        os.environ[
+            "CPPFLAGS"
+        ] += " -Wno-error=implicit-function-declaration"  # needed because some functions used by charm4py are not exported by charm.
     else:
-        os.environ['CPPFLAGS'] = '-Wno-error=implicit-function-declaration '
+        os.environ["CPPFLAGS"] = "-Wno-error=implicit-function-declaration "
 else:  # Linux
-    libcharm_filename = 'libcharm.so'
-    charmrun_filename = 'charmrun'
+    libcharm_filename = "libcharm.so"
+    charmrun_filename = "charmrun"
 
 
 try:
-    charm4py_version = subprocess.check_output(['git', 'describe']).rstrip().decode().split('-')[0]
-    if charm4py_version.startswith('v'):
+    charm4py_version = (
+        subprocess.check_output(["git", "describe"]).rstrip().decode().split("-")[0]
+    )
+    if charm4py_version.startswith("v"):
         charm4py_version = charm4py_version[1:]
-    with open(os.path.join('charm4py', '_version.py'), 'w') as f:
+    with open(os.path.join("charm4py", "_version.py"), "w") as f:
         f.write("version='" + charm4py_version + "'\n")
 except:
     try:
-        os.environ['PYTHONPATH'] = os.getcwd()
-        os.environ['CHARM_NOLOAD'] = '1'
+        os.environ["PYTHONPATH"] = os.getcwd()
+        os.environ["CHARM_NOLOAD"] = "1"
         from charm4py import _version
+
         charm4py_version = _version.version
     except:
-        raise DistutilsSetupError('Could not determine Charm4py version')
+        raise DistutilsSetupError("Could not determine Charm4py version")
 
 
 def charm_built(charm_src_dir):
-    library_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename)
+    library_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename)
     if not os.path.exists(library_path):
         return False
-    charmrun_path = os.path.join(charm_src_dir, 'charm', 'bin', charmrun_filename)
+    charmrun_path = os.path.join(charm_src_dir, "charm", "bin", charmrun_filename)
     if not os.path.exists(charmrun_path):
         return False
     return True
@@ -98,111 +102,151 @@ def charm_built(charm_src_dir):
 
 def check_libcharm_version(charm_src_dir):
     import ctypes
-    library_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename)
+
+    library_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename)
     lib = ctypes.CDLL(library_path)
-    with open(os.path.join(os.getcwd(), 'charm4py', 'libcharm_version'), 'r') as f:
-        req_version = tuple(int(n) for n in f.read().split('.'))
+    with open(os.path.join(os.getcwd(), "charm4py", "libcharm_version"), "r") as f:
+        req_version = tuple(int(n) for n in f.read().split("."))
     commit_id_str = ctypes.c_char_p.in_dll(lib, "CmiCommitID").value.decode()
-    version = [int(n) for n in commit_id_str.split('-')[0][1:].split('.')]
+    version = [int(n) for n in commit_id_str.split("-")[0][1:].split(".")]
     try:
-        version = tuple(version + [int(commit_id_str.split('-')[1])])
+        version = tuple(version + [int(commit_id_str.split("-")[1])])
     except:
         version = tuple(version + [0])
     if version < req_version:
-        req_str = '.'.join([str(n) for n in req_version])
-        cur_str = '.'.join([str(n) for n in version])
-        raise DistutilsSetupError('Charm++ version >= ' + req_str + ' required. '
-                                  'Existing version is ' + cur_str)
+        req_str = ".".join([str(n) for n in req_version])
+        cur_str = ".".join([str(n) for n in version])
+        raise DistutilsSetupError(
+            "Charm++ version >= " + req_str + " required. "
+            "Existing version is " + cur_str
+        )
 
 
 def build_libcharm(charm_src_dir, build_dir):
 
     lib_output_dirs = []
     charmrun_output_dirs = []
-    lib_output_dirs.append(os.path.join(build_dir, 'charm4py', '.libs'))
-    lib_output_dirs.append(os.path.join(os.getcwd(), 'charm4py', '.libs'))
-    charmrun_output_dirs.append(os.path.join(build_dir, 'charmrun'))
-    charmrun_output_dirs.append(os.path.join(os.getcwd(), 'charmrun'))
-    for output_dir in (lib_output_dirs + charmrun_output_dirs):
+    lib_output_dirs.append(os.path.join(build_dir, "charm4py", ".libs"))
+    lib_output_dirs.append(os.path.join(os.getcwd(), "charm4py", ".libs"))
+    charmrun_output_dirs.append(os.path.join(build_dir, "charmrun"))
+    charmrun_output_dirs.append(os.path.join(os.getcwd(), "charmrun"))
+    for output_dir in lib_output_dirs + charmrun_output_dirs:
         distutils.dir_util.mkpath(output_dir)
 
     if not os.path.exists(charm_src_dir) or not os.path.isdir(charm_src_dir):
-        raise DistutilsSetupError('charm sources dir ' + charm_src_dir + ' not found')
+        raise DistutilsSetupError("charm sources dir " + charm_src_dir + " not found")
 
     if not charm_built(charm_src_dir):
 
-        if system == 'windows' or system.startswith('cygwin'):
-            raise DistutilsSetupError('Building charm++ from setup.py not currently supported on Windows.'
-                                      ' Please download a Charm4py binary wheel (64-bit Python required)')
+        if system == "windows" or system.startswith("cygwin"):
+            raise DistutilsSetupError(
+                "Building charm++ from setup.py not currently supported on Windows."
+                " Please download a Charm4py binary wheel (64-bit Python required)"
+            )
 
-        if os.path.exists(os.path.join(charm_src_dir, 'charm.tar.gz')):
-            log.info('Uncompressing charm.tar.gz...')
-            cmd = ['tar', 'xf', 'charm.tar.gz']
+        if os.path.exists(os.path.join(charm_src_dir, "charm.tar.gz")):
+            log.info("Uncompressing charm.tar.gz...")
+            cmd = ["tar", "xf", "charm.tar.gz"]
             p = subprocess.Popen(cmd, cwd=charm_src_dir, shell=False)
             rc = p.wait()
             if rc != 0:
-                raise DistutilsSetupError('An error occured while building charm library')
+                raise DistutilsSetupError(
+                    "An error occured while building charm library"
+                )
 
         # divide by 2 to not hog the system. On systems with hyperthreading, this will likely
         # result in using same # cores as physical cores (therefore not all the logical cores)
         import multiprocessing
-        build_num_cores = max(int(os.environ.get('CHARM_BUILD_PROCESSES', multiprocessing.cpu_count() // 2)), 1)
-        extra_build_opts = os.environ.get('CHARM_EXTRA_BUILD_OPTS', '')
+
+        build_num_cores = max(
+            int(
+                os.environ.get(
+                    "CHARM_BUILD_PROCESSES", multiprocessing.cpu_count() // 2
+                )
+            ),
+            1,
+        )
+        extra_build_opts = os.environ.get("CHARM_EXTRA_BUILD_OPTS", "")
 
         if enable_tracing:
-         extra_build_opts += " --enable-tracing "
-        
+            extra_build_opts += " --enable-tracing "
+
         target_machine, os_target, target_layer = get_build_triple(build_mpi)
 
-        build_triple = f'{target_layer}-{os_target}-{target_machine}'
-        cmd = f'./build charm4py {build_triple} -j{build_num_cores} --with-production {extra_build_opts}'
+        build_triple = f"{target_layer}-{os_target}-{target_machine}"
+        cmd = f"./build charm4py {build_triple} -j{build_num_cores} --with-production {extra_build_opts}"
         print(cmd)
 
-        p = subprocess.Popen(cmd.rstrip().split(' '),
-                             cwd=os.path.join(charm_src_dir, 'charm'),
-                             shell=False)
+        p = subprocess.Popen(
+            cmd.rstrip().split(" "),
+            cwd=os.path.join(charm_src_dir, "charm"),
+            shell=False,
+        )
         rc = p.wait()
         if rc != 0:
-            raise DistutilsSetupError('An error occured while building charm library')
+            raise DistutilsSetupError("An error occured while building charm library")
 
-        if system == 'darwin':
-            old_file_path = os.path.join(charm_src_dir, 'charm', 'lib', 'libcharm.dylib')
-            new_file_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename)
+        if system == "darwin":
+            old_file_path = os.path.join(
+                charm_src_dir, "charm", "lib", "libcharm.dylib"
+            )
+            new_file_path = os.path.join(
+                charm_src_dir, "charm", "lib", libcharm_filename
+            )
             shutil.move(old_file_path, new_file_path)
-            cmd = ['install_name_tool', '-id', '@rpath/../.libs/' + libcharm_filename, new_file_path]
+            cmd = [
+                "install_name_tool",
+                "-id",
+                "@rpath/../.libs/" + libcharm_filename,
+                new_file_path,
+            ]
             p = subprocess.Popen(cmd, shell=False)
             rc = p.wait()
             if rc != 0:
-                raise DistutilsSetupError('install_name_tool error')
+                raise DistutilsSetupError("install_name_tool error")
 
     # verify that the version of charm++ that was built is same or greater than the
     # one required by charm4py
     check_libcharm_version(charm_src_dir)
 
     # ---- copy libcharm ----
-    lib_src_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename)
+    lib_src_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename)
     for output_dir in lib_output_dirs:
-        log.info('copying ' + os.path.relpath(lib_src_path) + ' to ' + os.path.relpath(output_dir))
+        log.info(
+            "copying "
+            + os.path.relpath(lib_src_path)
+            + " to "
+            + os.path.relpath(output_dir)
+        )
         shutil.copy(lib_src_path, output_dir)
     if libcharm_filename2 is not None:
-        lib_src_path = os.path.join(charm_src_dir, 'charm', 'lib', libcharm_filename2)
+        lib_src_path = os.path.join(charm_src_dir, "charm", "lib", libcharm_filename2)
         for output_dir in lib_output_dirs:
-            log.info('copying ' + os.path.relpath(lib_src_path) + ' to ' + os.path.relpath(output_dir))
+            log.info(
+                "copying "
+                + os.path.relpath(lib_src_path)
+                + " to "
+                + os.path.relpath(output_dir)
+            )
             shutil.copy(lib_src_path, output_dir)
 
-
     # ---- copy charmrun ----
-    charmrun_src_path = os.path.join(charm_src_dir, 'charm', 'bin', charmrun_filename)
+    charmrun_src_path = os.path.join(charm_src_dir, "charm", "bin", charmrun_filename)
     for output_dir in charmrun_output_dirs:
-        log.info('copying ' + os.path.relpath(charmrun_src_path) + ' to ' + os.path.relpath(output_dir))
+        log.info(
+            "copying "
+            + os.path.relpath(charmrun_src_path)
+            + " to "
+            + os.path.relpath(output_dir)
+        )
         shutil.copy(charmrun_src_path, output_dir)
 
 
 class custom_install(install, object):
 
     user_options = install.user_options + [
-        ('mpi', None, 'Build libcharm with MPI'),
-        ('enable-tracing', None, 'Build libcharm with tracing enabled')
+        ("mpi", None, "Build libcharm with MPI"),
+        ("enable-tracing", None, "Build libcharm with tracing enabled"),
     ]
 
     def initialize_options(self):
@@ -227,8 +271,8 @@ def run(self):
 class custom_build_py(build_py, object):
 
     user_options = build_py.user_options + [
-        ('mpi', None, 'Build libcharm with MPI'),
-        ('enable-tracing', None, 'Build libcharm with tracing enabled')
+        ("mpi", None, "Build libcharm with MPI"),
+        ("enable-tracing", None, "Build libcharm with tracing enabled"),
     ]
 
     def initialize_options(self):
@@ -247,16 +291,19 @@ def finalize_options(self):
 
     def run(self):
         if not self.dry_run:
-            build_libcharm(os.path.join(os.getcwd(), 'charm_src'), self.build_lib)
-            shutil.copy(os.path.join(os.getcwd(), 'LICENSE'), os.path.join(self.build_lib, 'charm4py'))
+            build_libcharm(os.path.join(os.getcwd(), "charm_src"), self.build_lib)
+            shutil.copy(
+                os.path.join(os.getcwd(), "LICENSE"),
+                os.path.join(self.build_lib, "charm4py"),
+            )
         super(custom_build_py, self).run()
 
 
 class custom_build_ext(build_ext, object):
 
     user_options = build_ext.user_options + [
-        ('mpi', None, 'Build libcharm with MPI'),
-        ('enable-tracing', None, 'Build libcharm with tracing enabled')
+        ("mpi", None, "Build libcharm with MPI"),
+        ("enable-tracing", None, "Build libcharm with tracing enabled"),
     ]
 
     def initialize_options(self):
@@ -276,18 +323,18 @@ def finalize_options(self):
 
     def run(self):
         if not self.dry_run:
-            build_libcharm(os.path.join(os.getcwd(), 'charm_src'), self.build_lib)
+            build_libcharm(os.path.join(os.getcwd(), "charm_src"), self.build_lib)
         super(custom_build_ext, self).run()
 
+
 class _renameInstalled(_install_lib):
     def __init__(self, *args, **kwargs):
         _install_lib.__init__(self, *args, **kwargs)
 
-    
     def install(self):
         log.info("Renaming libraries")
         outfiles = _install_lib.install(self)
-        '''
+        """
         for file in outfiles:
             if "c_object_store" in file and system == "darwin":
                 direc = os.path.dirname(file)
@@ -307,66 +354,78 @@ def install(self):
                 install_name_command += "/charmlib_cython.*.so"
                 log.info(install_name_command)
                 os.system(install_name_command)
-        '''
+        """
         return outfiles
 
 
-
 extensions = []
 py_impl = platform.python_implementation()
 
 
-
 log.info("Check sys version info")
 if sys.version_info[0] >= 3:
     log.info("Defining cython args")
     # compile C-extension module (from cython)
     from Cython.Build import cythonize
+
     my_include_dirs = []
     haveNumpy = False
     try:
         import numpy
+
         haveNumpy = True
         my_include_dirs.append(numpy.get_include())
     except:
-        log.warn('WARNING: Building charmlib C-extension module without numpy support (numpy not found or import failed)')
+        log.warn(
+            "WARNING: Building charmlib C-extension module without numpy support (numpy not found or import failed)"
+        )
 
     extra_link_args = []
-    if os.name != 'nt':
-        if system == 'darwin':
-            extra_link_args=["-Wl,-rpath,@loader_path/../.libs"]
+    if os.name != "nt":
+        if system == "darwin":
+            extra_link_args = ["-Wl,-rpath,@loader_path/../.libs"]
         else:
-            extra_link_args=["-Wl,-rpath,$ORIGIN/../.libs"]
+            extra_link_args = ["-Wl,-rpath,$ORIGIN/../.libs"]
 
     cobject_extra_args = []
     log.info("Extra object args for object store")
-    if os.name != 'nt':
-        if system == 'darwin':
-            cobject_extra_args=["-Wl,-rpath,@loader_path/.libs"]
+    if os.name != "nt":
+        if system == "darwin":
+            cobject_extra_args = ["-Wl,-rpath,@loader_path/.libs"]
         else:
-            cobject_extra_args=["-Wl,-rpath,$ORIGIN/.libs"]
-            
-    cudaBuild = os.environ.get('CHARM_EXTRA_BUILD_OPTS', '').find('cuda') != -1
-    
-    extensions.extend(cythonize(setuptools.Extension('charm4py.charmlib.charmlib_cython',
-                            sources=['charm4py/charmlib/charmlib_cython.pyx'],
-                            include_dirs=['charm_src/charm/include'] + my_include_dirs,
-                            library_dirs=[os.path.join(os.getcwd(), 'charm4py', '.libs')],
-                            libraries=["charm"],
-                            extra_compile_args=[],
-                            extra_link_args=extra_link_args,
-                            ), compile_time_env={'HAVE_NUMPY': haveNumpy,
-                                                 'HAVE_CUDA_BUILD': cudaBuild}))
-
-    extensions.extend(cythonize(setuptools.Extension('charm4py.c_object_store',
-                            sources=['charm4py/c_object_store.pyx'],
-                            include_dirs=['charm_src/charm/include'] + my_include_dirs,
-                            library_dirs=[os.path.join(os.getcwd(), 'charm4py', '.libs')],
-                            libraries=["charm"],
-                            extra_compile_args=[],
-                            extra_link_args=cobject_extra_args,
-                            ), compile_time_env={'HAVE_NUMPY': haveNumpy,
-                                                 'HAVE_CUDA_BUILD': cudaBuild}))
+            cobject_extra_args = ["-Wl,-rpath,$ORIGIN/.libs"]
+
+    cudaBuild = os.environ.get("CHARM_EXTRA_BUILD_OPTS", "").find("cuda") != -1
+
+    extensions.extend(
+        cythonize(
+            setuptools.Extension(
+                "charm4py.charmlib.charmlib_cython",
+                sources=["charm4py/charmlib/charmlib_cython.pyx"],
+                include_dirs=["charm_src/charm/include"] + my_include_dirs,
+                library_dirs=[os.path.join(os.getcwd(), "charm4py", ".libs")],
+                libraries=["charm"],
+                extra_compile_args=[],
+                extra_link_args=extra_link_args,
+            ),
+            compile_time_env={"HAVE_NUMPY": haveNumpy, "HAVE_CUDA_BUILD": cudaBuild},
+        )
+    )
+
+    extensions.extend(
+        cythonize(
+            setuptools.Extension(
+                "charm4py.c_object_store",
+                sources=["charm4py/c_object_store.pyx"],
+                include_dirs=["charm_src/charm/include"] + my_include_dirs,
+                library_dirs=[os.path.join(os.getcwd(), "charm4py", ".libs")],
+                libraries=["charm"],
+                extra_compile_args=[],
+                extra_link_args=cobject_extra_args,
+            ),
+            compile_time_env={"HAVE_NUMPY": haveNumpy, "HAVE_CUDA_BUILD": cudaBuild},
+        )
+    )
 
 
 additional_setup_keywords = {}
@@ -374,12 +433,14 @@ def install(self):
     version=charm4py_version,
     packages=setuptools.find_packages(),
     package_data={
-        'charm4py': ['libcharm_version'],
+        "charm4py": ["libcharm_version"],
     },
     ext_modules=extensions,
-    cmdclass = {'build_py': custom_build_py,
-                'build_ext': custom_build_ext,
-                'install': custom_install,
-                'install_lib': _renameInstalled,},
-    **additional_setup_keywords
+    cmdclass={
+        "build_py": custom_build_py,
+        "build_ext": custom_build_ext,
+        "install": custom_install,
+        "install_lib": _renameInstalled,
+    },
+    **additional_setup_keywords,
 )
diff --git a/tests/array_maps/test1.py b/tests/array_maps/test1.py
index 8a780adf..a5e782c5 100644
--- a/tests/array_maps/test1.py
+++ b/tests/array_maps/test1.py
@@ -15,7 +15,7 @@ def procNum(self, index):
 class MyChare(Chare):
 
     def __init__(self, last):
-        assert charm.myPe() == index_to_pe(self.thisIndex), 'ArrayMap failed'
+        assert charm.myPe() == index_to_pe(self.thisIndex), "ArrayMap failed"
         if last:
             self.contribute(None, None, charm.thisProxy[0].exit)
 
diff --git a/tests/benchmark/pingpong.py b/tests/benchmark/pingpong.py
index cb6060c2..3f1dc0c9 100644
--- a/tests/benchmark/pingpong.py
+++ b/tests/benchmark/pingpong.py
@@ -1,6 +1,7 @@
 from charm4py import charm, Chare, Array, coro, Future
 from time import time
-#import numpy as np
+
+# import numpy as np
 
 PAYLOAD = 100  # number of bytes
 NITER = 10000
@@ -18,7 +19,7 @@ def __init__(self):
     def start(self, done_future, threaded=False):
         self.done_future = done_future
         self.iter = 0
-        #data = np.zeros(PAYLOAD, dtype='int8')
+        # data = np.zeros(PAYLOAD, dtype='int8')
         data = 3
         self.startTime = time()
         if threaded:
@@ -48,7 +49,7 @@ def recv_th(self, data):
 
 def main(args):
     threaded = False
-    if len(args) > 1 and args[1] == '-t':
+    if len(args) > 1 and args[1] == "-t":
         threaded = True
     pings = Array(Ping, 2)
     charm.awaitCreation(pings)
diff --git a/tests/callbacks/callbacks.py b/tests/callbacks/callbacks.py
index 9bc81190..5784c042 100644
--- a/tests/callbacks/callbacks.py
+++ b/tests/callbacks/callbacks.py
@@ -24,12 +24,12 @@ def __init__(self, main):
         self.main = main
 
     def getResult(self, result):
-        #print('[' + str(charm.myPe()) + '] got result:', result)
+        # print('[' + str(charm.myPe()) + '] got result:', result)
         assert result == (charm.numPes() * (charm.numPes() - 1)) // 2
         self.main.workDone(self.thisIndex[0])
 
     def getResultBroadcast(self, result):
-        #print('[' + str(charm.myPe()) + '] got result:', result)
+        # print('[' + str(charm.myPe()) + '] got result:', result)
         assert result == (charm.numPes() * (charm.numPes() - 1)) // 2
         self.contribute(1, Reducer.sum, self.main.workDone)
 
@@ -45,15 +45,15 @@ def __init__(self, args):
         controllers = Array(Controller, charm.numPes())
         receivers = Array(CallbackReceiver, charm.numPes(), args=[self.thisProxy])
         workers.work(receivers[1].getResult)
-        self.wait('self.done == 1')
+        self.wait("self.done == 1")
         self.done = -1
 
         controllers[1].start(workers, receivers[2].getResult)
-        self.wait('self.done == 2')
+        self.wait("self.done == 2")
         self.done = -1
 
         controllers[2].start(workers, receivers.getResultBroadcast)
-        self.wait('self.done == ' + str(charm.numPes()))
+        self.wait("self.done == " + str(charm.numPes()))
         self.done = -1
 
         f = Future()
diff --git a/tests/callbacks/schedule_cb.py b/tests/callbacks/schedule_cb.py
index b5f1fab4..74ebd824 100644
--- a/tests/callbacks/schedule_cb.py
+++ b/tests/callbacks/schedule_cb.py
@@ -14,15 +14,16 @@ def start(self):
         charm.scheduleCallableAfter(self.thisProxy[self.thisIndex].next, 1, [-1])
 
     def next(self, from_elem):
-        print(self.thisIndex, 'time=', time() - self.t0, 'from=', from_elem)
+        print(self.thisIndex, "time=", time() - self.t0, "from=", from_elem)
         assert from_elem == self.thisIndex[0] - 1
         assert time() - self.t0 > self.thisIndex[0] + 0.9
         if self.thisIndex[0] == NUM_CHARES - 1:
-            print('DONE')
+            print("DONE")
             exit()
         else:
-            charm.scheduleCallableAfter(self.thisProxy[self.thisIndex[0] + 1].next,
-                                        1, [self.thisIndex[0]])
+            charm.scheduleCallableAfter(
+                self.thisProxy[self.thisIndex[0] + 1].next, 1, [self.thisIndex[0]]
+            )
 
 
 def main(args):
diff --git a/tests/channels/test1.py b/tests/channels/test1.py
index 72fe6878..258c0ef8 100644
--- a/tests/channels/test1.py
+++ b/tests/channels/test1.py
@@ -10,16 +10,16 @@ def __init__(self, id):
     def work(self, mainProxy, other, done_fut):
         me = self.thisProxy[self.thisIndex]
         ch = Channel(self, remote=mainProxy)
-        ch.send('hello from ' + str(self.id))
+        ch.send("hello from " + str(self.id))
 
         ch = Channel(self, remote=me)
-        ch.send('self ping', me)
-        assert ch.recv() == ('self ping', me)
+        ch.send("self ping", me)
+        assert ch.recv() == ("self ping", me)
 
         ch = Channel(self, remote=other)
-        ch.send(('hi from ' + str(self.id), me))
+        ch.send(("hi from " + str(self.id), me))
         data = ch.recv()
-        assert data[0] == 'hi from ' + str((self.id + 1) % 2)
+        assert data[0] == "hi from " + str((self.id + 1) % 2)
         assert data[1] == other
         done_fut()
 
@@ -35,8 +35,8 @@ def __init__(self, args):
         chare1.work(self.thisProxy, chare0, done_fut)
         ch0 = Channel(self, remote=chare0)
         ch1 = Channel(self, remote=chare1)
-        assert ch0.recv() == 'hello from 0'
-        assert ch1.recv() == 'hello from 1'
+        assert ch0.recv() == "hello from 0"
+        assert ch1.recv() == "hello from 1"
         done_fut.get()
         exit()
 
diff --git a/tests/channels/test2.py b/tests/channels/test2.py
index d7f769ef..0c317c73 100644
--- a/tests/channels/test2.py
+++ b/tests/channels/test2.py
@@ -60,11 +60,11 @@ def main(args):
         for idx in range(P):
             chares.append(collection[idx])
 
-    for collection, numelems in ((a1, P*8), (a2, P*10), (a3, P*4), (a4, P)):
+    for collection, numelems in ((a1, P * 8), (a2, P * 10), (a3, P * 4), (a4, P)):
         for idx in range(numelems):
             chares.append(collection[idx])
 
-    print('There are', len(chares), 'chares')
+    print("There are", len(chares), "chares")
 
     # establish random channels between chares
     global gchannels
@@ -79,19 +79,24 @@ def main(args):
                 num_self_channels += 1
             gchannels[level][a].append(b)
             gchannels[level][b].append(a)
-    charm.thisProxy.updateGlobals({'gchannels': gchannels}, awaitable=True).get()
+    charm.thisProxy.updateGlobals({"gchannels": gchannels}, awaitable=True).get()
 
     done_fut = Future(8 * NUM_LEVELS)  # wait for 8 collections to finish 3 levels
     for collection in (g1, g2, g3, g4, a1, a2, a3, a4):
         collection.setup(awaitable=True).get()
-    print(NUM_CHANNELS * NUM_LEVELS, 'channels set up,', num_self_channels, 'self channels')
+    print(
+        NUM_CHANNELS * NUM_LEVELS,
+        "channels set up,",
+        num_self_channels,
+        "self channels",
+    )
     for collection in (g1, g2, g3, g4, a1, a2, a3, a4):
         for lvl in range(NUM_LEVELS):
             collection.work(lvl, done_fut)
 
     msgs = sum(done_fut.get())
     assert msgs == sum(LEVELS_NUM_ITER[:NUM_LEVELS]) * NUM_CHANNELS * 2
-    print('total msgs received by chares=', msgs)
+    print("total msgs received by chares=", msgs)
     exit()
 
 
diff --git a/tests/channels/test_numpy.py b/tests/channels/test_numpy.py
index 26c0a783..e583e68f 100644
--- a/tests/channels/test_numpy.py
+++ b/tests/channels/test_numpy.py
@@ -12,9 +12,9 @@ def work(self, mainProxy, done_fut):
         ch = Channel(self, remote=mainProxy)
         for i in range(NUM_ITER):
             array1, array2, array3 = ch.recv()
-            np.testing.assert_array_equal(array1, np.arange(100, dtype='int64') + i)
-            np.testing.assert_array_equal(array2, np.arange(50, dtype='int64') + i)
-            np.testing.assert_array_equal(array3, np.arange(70, dtype='int64') + i)
+            np.testing.assert_array_equal(array1, np.arange(100, dtype="int64") + i)
+            np.testing.assert_array_equal(array2, np.arange(50, dtype="int64") + i)
+            np.testing.assert_array_equal(array3, np.arange(70, dtype="int64") + i)
         done_fut()
 
 
@@ -26,9 +26,9 @@ def __init__(self, args):
         done_fut = Future()
         chare.work(self.thisProxy, done_fut)
         for i in range(NUM_ITER):
-            array1 = np.arange(100, dtype='int64') + i
-            array2 = np.arange(50, dtype='int64') + i
-            array3 = np.arange(70, dtype='int64') + i
+            array1 = np.arange(100, dtype="int64") + i
+            array2 = np.arange(50, dtype="int64") + i
+            array3 = np.arange(70, dtype="int64") + i
             ch.send(array1, array2, array3)
         done_fut.get()
         exit()
diff --git a/tests/charm_remote.py b/tests/charm_remote.py
index 6105409d..f0f280f2 100644
--- a/tests/charm_remote.py
+++ b/tests/charm_remote.py
@@ -15,8 +15,10 @@ def start(self):
         pe = charm.myPe() - 1
         if pe == -1:
             pe = 0
-        charm.thisProxy[pe].exec('global MY_GLOBAL; MY_GLOBAL = 7262', __name__, awaitable=True).get()
-        assert charm.thisProxy[pe].eval('MY_GLOBAL', __name__, ret=True).get() == 7262
+        charm.thisProxy[pe].exec(
+            "global MY_GLOBAL; MY_GLOBAL = 7262", __name__, awaitable=True
+        ).get()
+        assert charm.thisProxy[pe].eval("MY_GLOBAL", __name__, ret=True).get() == 7262
 
         Group(Test)
 
diff --git a/tests/collections/proxies_same_name.py b/tests/collections/proxies_same_name.py
index b10ccbf0..b9faf03b 100644
--- a/tests/collections/proxies_same_name.py
+++ b/tests/collections/proxies_same_name.py
@@ -26,9 +26,9 @@ def main(args):
     tester1 = Chare(Test, onPE=2)
     tester2 = Chare(proxies_same_name_aux.Test, onPE=1)
     charm.awaitCreation(g2, g1, tester2, tester1)
-    tester1.test(g2, 'check2', awaitable=True).get()
-    tester2.test(g1, 'check1', awaitable=True).get()
+    tester1.test(g2, "check2", awaitable=True).get()
+    tester2.test(g1, "check1", awaitable=True).get()
     exit()
 
 
-charm.start(main, modules=['proxies_same_name_aux'])
+charm.start(main, modules=["proxies_same_name_aux"])
diff --git a/tests/collections/proxy_eq.py b/tests/collections/proxy_eq.py
index d5129da9..b0a04556 100644
--- a/tests/collections/proxy_eq.py
+++ b/tests/collections/proxy_eq.py
@@ -26,11 +26,15 @@ def __init__(self, args):
         assert g1 == g1[2].getProxy(ret=True).get()
         assert g1[2] == g1[2].getProxy(elem=True, ret=True).get()
         assert g1[2].getProxy(ret=True).get() == g1[3].getProxy(ret=True).get()
-        assert g1[2].getProxy(True, ret=True).get() != g1[3].getProxy(True, ret=True).get()
+        assert (
+            g1[2].getProxy(True, ret=True).get() != g1[3].getProxy(True, ret=True).get()
+        )
 
         assert g1 != g2
         assert g1[2].getProxy(ret=True).get() != g2[2].getProxy(ret=True).get()
-        assert g1[2].getProxy(True, ret=True).get() != g2[2].getProxy(True, ret=True).get()
+        assert (
+            g1[2].getProxy(True, ret=True).get() != g2[2].getProxy(True, ret=True).get()
+        )
 
         assert g1 != a
         assert a == a
diff --git a/tests/collections/test.py b/tests/collections/test.py
index 928b1f0f..98ffe068 100644
--- a/tests/collections/test.py
+++ b/tests/collections/test.py
@@ -13,7 +13,7 @@ def __init__(self):
         else:
             myIndex = self.thisIndex
         if charm.numPes() <= 20 or myIndex == 0:
-            print('Test', self.thisIndex, 'created')
+            print("Test", self.thisIndex, "created")
 
     def work(self, main):
         self.contribute(1, Reducer.sum, main.done)
@@ -31,7 +31,7 @@ def done(self, result):
         self.countReductions += 1
         if self.countReductions == 2:
             assert self.count == (charm.numPes() + charm.numPes() * CHARES_PER_PE)
-            print('Program done')
+            print("Program done")
             exit()
 
 
diff --git a/tests/dcopy/test_dcopy.py b/tests/dcopy/test_dcopy.py
index 83ebd9d0..144940c8 100644
--- a/tests/dcopy/test_dcopy.py
+++ b/tests/dcopy/test_dcopy.py
@@ -17,7 +17,9 @@
 class Main(Chare):
 
     def __init__(self, args):
-        charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy}, '__main__', awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {"mainProxy": self.thisProxy}, "__main__", awaitable=True
+        ).get()
         self.testProxy = Array(Test, charm.numPes() * CHARES_PER_PE)
 
     def start(self):
@@ -27,10 +29,10 @@ def start(self):
 
     def iterationComplete(self):
         if self.iterations % 10 == 0:
-            print('Iteration', self.iterations, 'complete')
+            print("Iteration", self.iterations, "complete")
         self.iterations += 1
         if self.iterations == MAX_ITER:
-            print('Program done. Total time =', time.time() - self.startTime)
+            print("Program done. Total time =", time.time() - self.startTime)
             charm.printStats()
             exit()
         else:
@@ -40,11 +42,11 @@ def iterationComplete(self):
 class Test(Chare):
 
     def __init__(self):
-        self.x = numpy.arange(DATA_LEN, dtype='float64')
+        self.x = numpy.arange(DATA_LEN, dtype="float64")
         y = self.x * (self.thisIndex[0] + 1)
 
         self.S1 = y.tobytes()
-        self.S2 = array.array('d', y)
+        self.S2 = array.array("d", y)
         self.S3 = y
 
         self.msgsRcvd = 0
@@ -72,10 +74,10 @@ def recvData(self, src, d1, d2, d3):
 
         desired = self.x * (src[0] + 1)
 
-        v1 = numpy.frombuffer(d1, dtype='float64')
+        v1 = numpy.frombuffer(d1, dtype="float64")
         assert_allclose(v1, desired, atol=1e-07)
 
-        v2 = numpy.array(d2, dtype='float64')
+        v2 = numpy.array(d2, dtype="float64")
         assert_allclose(v2, desired, atol=1e-07)
 
         assert_allclose(d3, desired, atol=1e-07)
diff --git a/tests/entry_methods/bcast_globals.py b/tests/entry_methods/bcast_globals.py
index ab5b1b3f..ceb43182 100644
--- a/tests/entry_methods/bcast_globals.py
+++ b/tests/entry_methods/bcast_globals.py
@@ -22,10 +22,12 @@ def main(args):
     done = charm.Future()
 
     main_globals = {}
-    main_globals['group1_proxy'] = g1
-    main_globals['group2_proxy'] = g2
-    main_globals['done_future'] = done
-    charm.thisProxy.updateGlobals(main_globals, module_name='__main__', awaitable=True).get()
+    main_globals["group1_proxy"] = g1
+    main_globals["group2_proxy"] = g2
+    main_globals["done_future"] = done
+    charm.thisProxy.updateGlobals(
+        main_globals, module_name="__main__", awaitable=True
+    ).get()
 
     group1_proxy.start()
     done.get()
diff --git a/tests/entry_methods/entrymethod_args_kwargs.py b/tests/entry_methods/entrymethod_args_kwargs.py
index 652cc56b..aaf046b5 100644
--- a/tests/entry_methods/entrymethod_args_kwargs.py
+++ b/tests/entry_methods/entrymethod_args_kwargs.py
@@ -71,7 +71,9 @@ def __init__(self, args):
             else:
                 continue
             collection[single_chare].recv(10, 20, 3000, b=4000, awaitable=True).get()
-            collection[single_chare].recv(b=4000, a=3000, y=20, x=10, awaitable=True).get()
+            collection[single_chare].recv(
+                b=4000, a=3000, y=20, x=10, awaitable=True
+            ).get()
 
         exit()
 
diff --git a/tests/exceptions/pool.py b/tests/exceptions/pool.py
index ea0812ee..95cff659 100644
--- a/tests/exceptions/pool.py
+++ b/tests/exceptions/pool.py
@@ -12,11 +12,11 @@ def __init__(self):
 
 myfunc = None
 
-myfunc_bad_source = '''
+myfunc_bad_source = """
 def myfunc(x):
     raise MyException
     return x**2
-'''
+"""
 
 myfunc_good_source = """
 def myfunc(x):
@@ -42,15 +42,26 @@ def main(args):
                         try:
                             if func is None:
                                 tasks = [(myfunc, i) for i in range(num_tasks)]
-                                result = charm.pool.submit_async(tasks, multi_future=multi_future, chunksize=chunk_size)
+                                result = charm.pool.submit_async(
+                                    tasks,
+                                    multi_future=multi_future,
+                                    chunksize=chunk_size,
+                                )
                             else:
                                 tasks = range(num_tasks)
-                                result = charm.pool.map_async(func, tasks, multi_future=multi_future, chunksize=chunk_size)
+                                result = charm.pool.map_async(
+                                    func,
+                                    tasks,
+                                    multi_future=multi_future,
+                                    chunksize=chunk_size,
+                                )
                             if multi_future:
                                 result = [f.get() for f in result]
                             else:
                                 result = result.get()
-                            assert trial == 1 and result == [x**2 for x in range(num_tasks)]
+                            assert trial == 1 and result == [
+                                x**2 for x in range(num_tasks)
+                            ]
                         except MyException:
                             assert trial == 0
     exit()
diff --git a/tests/exceptions/test.py b/tests/exceptions/test.py
index 06e93f27..a5d74466 100644
--- a/tests/exceptions/test.py
+++ b/tests/exceptions/test.py
@@ -14,7 +14,7 @@ def bad(self):
             # this will raise NameError exception
             test[3] = 3
         else:
-            return 'good'
+            return "good"
 
     def allbad(self):
         # this will raise NameError exception
@@ -47,9 +47,9 @@ def main(args):
     for proxy, num_chares in ((g, npes), (a, npes * 8)):
         for i in range(2):
             if i == 0:
-                methods = {'allbad': 'allbad', 'good': 'good', 'bad': 'bad'}
+                methods = {"allbad": "allbad", "good": "good", "bad": "bad"}
             else:
-                methods = {'allbad': 'allbad_th', 'good': 'good_th', 'bad': 'bad_th'}
+                methods = {"allbad": "allbad_th", "good": "good_th", "bad": "bad_th"}
 
             # p2p
             if proxy == g:
@@ -58,34 +58,34 @@ def main(args):
                 bad_idx = (num_chares // 2) + 1
             for _ in range(NUM_ITER):
                 try:
-                    getattr(proxy[bad_idx], methods['bad'])(ret=True).get()
+                    getattr(proxy[bad_idx], methods["bad"])(ret=True).get()
                     assert False
                 except NameError:
-                    retval = getattr(proxy[bad_idx], methods['good'])(ret=True).get()
+                    retval = getattr(proxy[bad_idx], methods["good"])(ret=True).get()
                     assert retval == bad_idx
 
             # bcast awaitable=True
             for _ in range(NUM_ITER):
                 try:
-                    getattr(proxy, methods['allbad'])(awaitable=True).get()
+                    getattr(proxy, methods["allbad"])(awaitable=True).get()
                     assert False
                 except NameError:
                     try:
-                        getattr(proxy, methods['bad'])(awaitable=True).get()
+                        getattr(proxy, methods["bad"])(awaitable=True).get()
                         assert False
                     except NameError:
-                        retval = getattr(proxy, methods['good'])(awaitable=True).get()
+                        retval = getattr(proxy, methods["good"])(awaitable=True).get()
                         assert retval is None
 
             # bcast ret=True (returns list of results)
             for _ in range(NUM_ITER):
-                retvals = getattr(proxy, methods['bad'])(ret=True).get()
+                retvals = getattr(proxy, methods["bad"])(ret=True).get()
                 num_errors = 0
                 for retval in retvals:
                     if isinstance(retval, NameError):
                         num_errors += 1
                     else:
-                        assert retval == 'good'
+                        assert retval == "good"
                 assert num_errors == (num_chares // 2)
     exit()
 
diff --git a/tests/futures/multi_futures.py b/tests/futures/multi_futures.py
index 0d69c11c..75119dba 100644
--- a/tests/futures/multi_futures.py
+++ b/tests/futures/multi_futures.py
@@ -12,9 +12,9 @@ def main(args):
     testProxy.getData(f)
 
     data = f.get()
-    print('[Main] Received data: ' + str(data))
-    assert sorted(data) == list(range(numChares)), 'Multi-futures failed!'
-    print('[Main] All done.')
+    print("[Main] Received data: " + str(data))
+    assert sorted(data) == list(range(numChares)), "Multi-futures failed!"
+    print("[Main] All done.")
     exit()
 
 
diff --git a/tests/futures/test_different_coroutines.py b/tests/futures/test_different_coroutines.py
index 8a7d5e56..95a8e712 100644
--- a/tests/futures/test_different_coroutines.py
+++ b/tests/futures/test_different_coroutines.py
@@ -1,9 +1,11 @@
 from charm4py import charm, Chare, Future, coro
+
 # This test will "fail" if running it results in a timeout, as any
 # return code generated by this program will be 1
 
 TEST_VALUE = 42
 
+
 class TestChare(Chare):
     @coro
     def __init__(self, done_future):
@@ -39,4 +41,5 @@ def main(args):
 
     charm.exit()
 
+
 charm.start(main)
diff --git a/tests/futures/test_futures.py b/tests/futures/test_futures.py
index c39ddaa2..1d495494 100644
--- a/tests/futures/test_futures.py
+++ b/tests/futures/test_futures.py
@@ -12,8 +12,15 @@ def main(args):
     max_f = Future()
     testProxy.getStats((sum_f, min_f, max_f))
 
-    print('[Main] Sum: ' + str(sum_f.get()) + ', Min: ' + str(min_f.get()) + ', Max: ' + str(max_f.get()))
-    print('[Main] All done.')
+    print(
+        "[Main] Sum: "
+        + str(sum_f.get())
+        + ", Min: "
+        + str(min_f.get())
+        + ", Max: "
+        + str(max_f.get())
+    )
+    print("[Main] All done.")
     exit()
 
 
@@ -28,7 +35,7 @@ def getStats(self, futures):
         self.contribute(self.thisIndex[0], Reducer.max, self.thisProxy[0].collectStats)
 
     def collectStats(self, stat_result):
-        assert self.thisIndex[0] == 0, 'Reduction target incorrect!'
+        assert self.thisIndex[0] == 0, "Reduction target incorrect!"
         if stat_result == 0:
             self.min_future.send(stat_result)
         elif stat_result == (charm.numPes() * CHARES_PER_PE) - 1:
diff --git a/tests/migration/chare_migration.py b/tests/migration/chare_migration.py
index 50ca1152..9398d1e8 100644
--- a/tests/migration/chare_migration.py
+++ b/tests/migration/chare_migration.py
@@ -1,4 +1,3 @@
-
 """
 A program to test migration of chares.
 """
@@ -20,7 +19,7 @@ def migrated(self):
         chare has migrated.
         """
         if self.thisIndex == (0,):
-            print(self.thisIndex, 'migrated to PE', charm.myPe())
+            print(self.thisIndex, "migrated to PE", charm.myPe())
         assert charm.myPe() == self.toPe
         self.contribute(None, None, charm.thisProxy[0].exit)
 
@@ -29,14 +28,14 @@ def start(self):
         Invoke the starter code for test.
         """
         if charm.myPe() == 0:
-            print(self.thisIndex, 'on PE', charm.myPe(), 'before migration')
+            print(self.thisIndex, "on PE", charm.myPe(), "before migration")
         self.toPe = (charm.myPe() + 1) % charm.numPes()
         self.thisProxy[self.thisIndex].migrate(self.toPe)
 
 
 def main(args):
     if charm.numPes() == 1:
-        charm.abort('Run program with more than 1 PE')
+        charm.abort("Run program with more than 1 PE")
     array_proxy = Array(Migrate, CHARES_PER_PE * charm.numPes())
     array_proxy.start()
 
diff --git a/tests/migration/test_migrate.py b/tests/migration/test_migrate.py
index 02e04232..2fe756b4 100644
--- a/tests/migration/test_migrate.py
+++ b/tests/migration/test_migrate.py
@@ -10,19 +10,23 @@
 class Test(Chare):
 
     def __init__(self, home_pes_future):
-        assert(not all_created)  # makes sure constructor is only called for creation, not migration
+        assert (
+            not all_created
+        )  # makes sure constructor is only called for creation, not migration
         self.iteration = 0
         self.originalPe = charm.myPe()
-        self.data = numpy.arange(100, dtype='int64') * (self.originalPe + 1)
+        self.data = numpy.arange(100, dtype="int64") * (self.originalPe + 1)
         # notify controllers that array elements are created and pass home PE of every element
         self.contribute(charm.myPe(), Reducer.gather, home_pes_future)
 
     def start(self):
         if self.thisIndex == (0,) and self.iteration % 20 == 0:
-            print('Iteration ' + str(self.iteration))
+            print("Iteration " + str(self.iteration))
         self.check()
-        A = numpy.arange(1000, dtype='float64')
-        work = 1000 * int(round(math.log(charm.myPe() + 1) + 1))  # elements in higher PEs do more work
+        A = numpy.arange(1000, dtype="float64")
+        work = 1000 * int(
+            round(math.log(charm.myPe() + 1) + 1)
+        )  # elements in higher PEs do more work
         for i in range(work):
             A += 1.33
         self.iteration += 1
@@ -36,17 +40,22 @@ def start(self):
     def resumeFromSync(self):
         self.start()
 
-    def check(self):  # check that my attributes haven't changed as a result of migrating
-        assert(self.originalPe == arrayElemHomeMap[self.thisIndex[0]])
-        v = numpy.arange(100, dtype='int64') * (self.originalPe + 1)
+    def check(
+        self,
+    ):  # check that my attributes haven't changed as a result of migrating
+        assert self.originalPe == arrayElemHomeMap[self.thisIndex[0]]
+        v = numpy.arange(100, dtype="int64") * (self.originalPe + 1)
         numpy.testing.assert_allclose(self.data, v)
 
 
 def main(args):
     home_pes = Future()
     array = Array(Test, charm.numPes() * 4, args=[home_pes], useAtSync=True)
-    charm.thisProxy.updateGlobals({'all_created': True, 'arrayElemHomeMap': home_pes.get()},
-                                  '__main__', awaitable=True).get()
+    charm.thisProxy.updateGlobals(
+        {"all_created": True, "arrayElemHomeMap": home_pes.get()},
+        "__main__",
+        awaitable=True,
+    ).get()
     array.start()
 
 
diff --git a/tests/migration/test_nonmigratables.py b/tests/migration/test_nonmigratables.py
index b87b9d74..307f2bf6 100644
--- a/tests/migration/test_nonmigratables.py
+++ b/tests/migration/test_nonmigratables.py
@@ -1,6 +1,7 @@
 from charm4py import charm, Chare, Array
 import sys
-sys.argv += ['+balancer', 'RandCentLB']
+
+sys.argv += ["+balancer", "RandCentLB"]
 
 MAX_ITER = 100
 
diff --git a/tests/pool/pool.py b/tests/pool/pool.py
index b7f26bc5..c145f332 100644
--- a/tests/pool/pool.py
+++ b/tests/pool/pool.py
@@ -36,7 +36,7 @@ def main(args):
             for _ in range(NUM_TRIALS):
                 result = charm.pool.map(func, tasks, chunksize=chunksize)
                 assert result == [func(x) for x in tasks]
-            print('Elapsed=', time() - t0)
+            print("Elapsed=", time() - t0)
 
     # test charm.pool.submit()
     funcs = [square, square_coro, add_val, add_val_coro]
@@ -48,7 +48,7 @@ def main(args):
         for _ in range(NUM_TRIALS):
             result = charm.pool.submit(tasks, chunksize=chunksize)
             assert result == [f(x) for f, x in tasks]
-        print('Elapsed=', time() - t0)
+        print("Elapsed=", time() - t0)
 
     exit()
 
diff --git a/tests/pool/pool_ncores.py b/tests/pool/pool_ncores.py
index c9539b5c..fee72bff 100644
--- a/tests/pool/pool_ncores.py
+++ b/tests/pool/pool_ncores.py
@@ -2,7 +2,7 @@
 
 
 def square(x):
-    return x ** 2
+    return x**2
 
 
 def add_val(x):
diff --git a/tests/qd/qd.py b/tests/qd/qd.py
index 4b5ecccc..9485780c 100644
--- a/tests/qd/qd.py
+++ b/tests/qd/qd.py
@@ -58,7 +58,7 @@ def __init__(self, args):
         assert charm.numPes() > 1
         numChares = charm.numPes() * CHARES_PER_PE
         self.workers = Array(Worker, numChares, args=[numChares])
-        print('WORK_TIME=', WORK_TIME)
+        print("WORK_TIME=", WORK_TIME)
         qdGroupReceivers = Group(QDReceiver, args=[self.thisProxy])
         qdArrayReceivers = Array(QDReceiver, charm.numPes(), args=[self.thisProxy])
         charm.awaitCreation(self.workers, qdGroupReceivers, qdArrayReceivers)
@@ -82,9 +82,9 @@ def testQD(self, callback):
             charm.startQD(callback)
             if isinstance(callback, threads.Future):
                 callback.get()
-                print('QD reached')
+                print("QD reached")
             else:
-                self.wait('self.qdReached')
+                self.wait("self.qdReached")
         else:
             charm.waitQD()
         assert time() - t0 > WORK_TIME
@@ -92,7 +92,7 @@ def testQD(self, callback):
         check_fut.get()
 
     def recvQD(self):
-        print('QD reached')
+        print("QD reached")
         self.qdReached = True
 
 
diff --git a/tests/reductions/allreduce.py b/tests/reductions/allreduce.py
index d060fde5..941d7115 100644
--- a/tests/reductions/allreduce.py
+++ b/tests/reductions/allreduce.py
@@ -41,7 +41,7 @@ def main(args):
 
     for done in wait_alldone:
         done.get()
-    print('DONE')
+    print("DONE")
     exit()
 
 
diff --git a/tests/reductions/array_reduction.py b/tests/reductions/array_reduction.py
index bc2833a2..b4f5b2c1 100644
--- a/tests/reductions/array_reduction.py
+++ b/tests/reductions/array_reduction.py
@@ -6,7 +6,9 @@
 # utility methods for assertions
 def assert_allclose(actual, desired, tol):
     assert len(actual) == len(desired)
-    assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len(actual)
+    assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len(
+        actual
+    )
 
 
 def assert_almost_equal(actual, desired, tol):
@@ -27,53 +29,67 @@ def __init__(self, args):
         nElements = 1
         for x in ARRAY_SIZE:
             nElements *= x
-        print('Running reduction example on ' + str(charm.numPes()) + ' processors for ' + str(nElements) + ' elements, array dims=' + str(ARRAY_SIZE))
+        print(
+            "Running reduction example on "
+            + str(charm.numPes())
+            + " processors for "
+            + str(nElements)
+            + " elements, array dims="
+            + str(ARRAY_SIZE)
+        )
         arrProxy = Array(Test, ARRAY_SIZE)
         groupProxy = Group(TestGroup)
-        charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, 'arrProxy': arrProxy,
-                                       'groupProxy': groupProxy}, '__main__', awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {
+                "mainProxy": self.thisProxy,
+                "arrProxy": arrProxy,
+                "groupProxy": groupProxy,
+            },
+            "__main__",
+            awaitable=True,
+        ).get()
         arrProxy.doReduction()
 
     def done_int(self, reduction_result):
-        assert reduction_result == 420, 'Array-to-singleton sum_int reduction failed'
-        print('[Main] All sum_int contributions done. Test passed')
+        assert reduction_result == 420, "Array-to-singleton sum_int reduction failed"
+        print("[Main] All sum_int contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_nop(self):
-        print('[Main] All nop contributions received. Test passed')
+        print("[Main] All nop contributions received. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_float(self, reduction_result):
         assert_allclose(reduction_result, [101.0, 134.0, 45.0], 1e-03)
-        print('[Main] All sum_float contributions done. Test passed')
+        print("[Main] All sum_float contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_array_to_array(self):
-        print('[Main] All array-to-array contributions done. Test passed')
+        print("[Main] All array-to-array contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_array_to_array_bcast(self):
-        print('[Main] All array-to-array bcast contributions done. Test passed')
+        print("[Main] All array-to-array bcast contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_array_to_group(self):
-        print('[Main] All array-to-group contributions done. Test passed')
+        print("[Main] All array-to-group contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_array_to_group_bcast(self):
-        print('[Main] All array-to-group bcast contributions done. Test passed')
+        print("[Main] All array-to-group bcast contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
@@ -82,25 +98,37 @@ def done_array_to_group_bcast(self):
 class Test(Chare):
 
     def __init__(self):
-        print('Test ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe()))
+        print("Test " + str(self.thisIndex) + " created on PE " + str(charm.myPe()))
 
     def doReduction(self):
-        print('Test element ' + str(self.thisIndex) + ' on PE ' + str(charm.myPe()) + ' is starting its contributions.')
+        print(
+            "Test element "
+            + str(self.thisIndex)
+            + " on PE "
+            + str(charm.myPe())
+            + " is starting its contributions."
+        )
         # test contributing single int back to Main
         self.contribute(42, Reducer.sum, mainProxy.done_int)
         # test contributing list of floats back to main
         num = [10.1, 13.4]
-        self.contribute(num+[float(self.thisIndex[0])], Reducer.sum, mainProxy.done_float)
+        self.contribute(
+            num + [float(self.thisIndex[0])], Reducer.sum, mainProxy.done_float
+        )
         # test nop reduction to main
         self.contribute(None, Reducer.nop, mainProxy.done_nop)
         # test contributing to Test[0]
         self.contribute(4.2, Reducer.sum, self.thisProxy[0].reductionTarget)
         # test contributing to Test (broadcast)
-        self.contribute(numpy.array([4.2, 8.4]), Reducer.sum, self.thisProxy.reductionTargetBcast)
+        self.contribute(
+            numpy.array([4.2, 8.4]), Reducer.sum, self.thisProxy.reductionTargetBcast
+        )
         # test contributing to TestGroup[0]
         self.contribute(4, Reducer.sum, groupProxy[0].reduceFromArray)
         # test contributing to TestGroup (broadcast)
-        self.contribute(array.array('i', [0, 8, 3]), Reducer.sum, groupProxy.reduceFromArrayBcast)
+        self.contribute(
+            array.array("i", [0, 8, 3]), Reducer.sum, groupProxy.reduceFromArrayBcast
+        )
 
     def reductionTarget(self, reduction_result):
         assert self.thisIndex[0] == 0
@@ -115,15 +143,21 @@ def reductionTargetBcast(self, reduction_result):
 class TestGroup(Chare):
 
     def __init__(self):
-        print('TestGroup ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe()))
+        print(
+            "TestGroup " + str(self.thisIndex) + " created on PE " + str(charm.myPe())
+        )
 
     def reduceFromArray(self, reduction_result):
         assert self.thisIndex == 0
-        assert reduction_result == 40, 'Array-to-group sum_int reduction failed.'
+        assert reduction_result == 40, "Array-to-group sum_int reduction failed."
         mainProxy.done_array_to_group()
 
     def reduceFromArrayBcast(self, reduction_result):
-        assert list(reduction_result) == [0, 80, 30], 'Array-to-group bcast sum_int reduction failed.'
+        assert list(reduction_result) == [
+            0,
+            80,
+            30,
+        ], "Array-to-group bcast sum_int reduction failed."
         self.contribute(None, None, mainProxy.done_array_to_group_bcast)
 
 
diff --git a/tests/reductions/bench_reductions.py b/tests/reductions/bench_reductions.py
index 380fa5af..9f324c80 100644
--- a/tests/reductions/bench_reductions.py
+++ b/tests/reductions/bench_reductions.py
@@ -19,8 +19,11 @@ def assert_almost_equal(actual, desired, tol):
 class Main(Chare):
 
     def __init__(self, args):
-        charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy,
-                                       'NUM_CHARES': charm.numPes() * CHARES_PER_PE}, '__main__', awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {"mainProxy": self.thisProxy, "NUM_CHARES": charm.numPes() * CHARES_PER_PE},
+            "__main__",
+            awaitable=True,
+        ).get()
         self.arrayProxy = Array(Test, NUM_CHARES)
         self.arrayProxy.run()
         self.startTime = time.time()
@@ -30,7 +33,7 @@ def collectSum(self, result):
         self.arrayProxy.run()
 
     def done(self):
-        print('Program done in', time.time() - self.startTime)
+        print("Program done in", time.time() - self.startTime)
         charm.printStats()
         exit()
 
@@ -38,7 +41,7 @@ def done(self):
 class Test(Chare):
 
     def __init__(self):
-        self.data = numpy.arange(DATA_LEN, dtype='float64')
+        self.data = numpy.arange(DATA_LEN, dtype="float64")
         self.reductions = 0
 
     def run(self):
diff --git a/tests/reductions/custom_reduction.py b/tests/reductions/custom_reduction.py
index 51c19ab8..14a55815 100644
--- a/tests/reductions/custom_reduction.py
+++ b/tests/reductions/custom_reduction.py
@@ -24,37 +24,52 @@ def __init__(self, args):
 
         nDims = 1
         ARRAY_SIZE = [10] * nDims
-        lastIdx = tuple([x-1 for x in ARRAY_SIZE])
+        lastIdx = tuple([x - 1 for x in ARRAY_SIZE])
 
         self.nElements = 1
         for x in ARRAY_SIZE:
             self.nElements *= x
-        print('Running reduction example on ' + str(charm.numPes()) + ' processors for ' + str(self.nElements) + ' elements, array dims=' + str(ARRAY_SIZE))
+        print(
+            "Running reduction example on "
+            + str(charm.numPes())
+            + " processors for "
+            + str(self.nElements)
+            + " elements, array dims="
+            + str(ARRAY_SIZE)
+        )
         arrProxy = Array(Test, ARRAY_SIZE)
-        charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, 'arrProxy': arrProxy,
-                                       'lastIdx': lastIdx}, '__main__', awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {"mainProxy": self.thisProxy, "arrProxy": arrProxy, "lastIdx": lastIdx},
+            "__main__",
+            awaitable=True,
+        ).get()
         arrProxy.doReduction()
 
     def done_charm_builtin(self, result):
-        sum_indices = (self.nElements*(self.nElements-1))/2
-        assert list(result) == [10, sum_indices], 'Built-in Charm sum_int reduction failed'
-        print('[Main] All Charm builtin reductions done. Test passed')
+        sum_indices = (self.nElements * (self.nElements - 1)) / 2
+        assert list(result) == [
+            10,
+            sum_indices,
+        ], "Built-in Charm sum_int reduction failed"
+        print("[Main] All Charm builtin reductions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_python_builtin(self, result):
-        sum_indices = (self.nElements*(self.nElements-1))/2
+        sum_indices = (self.nElements * (self.nElements - 1)) / 2
         assert type(result) == MyObject
-        assert result.value == sum_indices or result.value == 0, 'Built-in Python _sum or _product reduction failed'
-        print('[Main] All Python builtin reductions done. Test passed')
+        assert (
+            result.value == sum_indices or result.value == 0
+        ), "Built-in Python _sum or _product reduction failed"
+        print("[Main] All Python builtin reductions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_python_custom(self, result):
-        assert result == [10, lastIdx[0], 0], 'Custom Python myReduce failed'
-        print('[Main] All Python custom reductions done. Test passed')
+        assert result == [10, lastIdx[0], 0], "Custom Python myReduce failed"
+        print("[Main] All Python custom reductions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
@@ -66,10 +81,10 @@ def __init__(self, n):
         self.value = n
 
     def __add__(self, other):
-        return MyObject(self.value+other.value)
+        return MyObject(self.value + other.value)
 
     def __mul__(self, other):
-        return MyObject(self.value*other.value)
+        return MyObject(self.value * other.value)
 
     def __radd__(self, other):
         if other == 0:
@@ -81,18 +96,24 @@ def __radd__(self, other):
 class Test(Chare):
 
     def __init__(self):
-        print('Test ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe()))
+        print("Test " + str(self.thisIndex) + " created on PE " + str(charm.myPe()))
 
     def doReduction(self):
         # test contributing using built-in Charm reducer
-        self.contribute([1, self.thisIndex[0]], Reducer.sum, mainProxy.done_charm_builtin)
+        self.contribute(
+            [1, self.thisIndex[0]], Reducer.sum, mainProxy.done_charm_builtin
+        )
         a = MyObject(self.thisIndex[0])
         # test contributing using built-in Python reducer
         self.contribute(a, Reducer.sum, mainProxy.done_python_builtin)
         # test product reducer
         self.contribute(a, Reducer.product, mainProxy.done_python_builtin)
         # test contributing using custom Python reducer
-        self.contribute([1, self.thisIndex[0], self.thisIndex[0]], Reducer.myReducer, mainProxy.done_python_custom)
+        self.contribute(
+            [1, self.thisIndex[0], self.thisIndex[0]],
+            Reducer.myReducer,
+            mainProxy.done_python_custom,
+        )
 
 
 charm.start(Main)
diff --git a/tests/reductions/future_reduction.py b/tests/reductions/future_reduction.py
index 6f15a729..bf5c4049 100644
--- a/tests/reductions/future_reduction.py
+++ b/tests/reductions/future_reduction.py
@@ -4,7 +4,7 @@
 
 class Test(Chare):
     def __init__(self, f):
-        data = np.arange(10, dtype='float64')
+        data = np.arange(10, dtype="float64")
         self.contribute(data, Reducer.sum, f)
 
 
@@ -13,8 +13,12 @@ def main(args):
     f2 = Future()
     Group(Test, args=[f1])
     Array(Test, charm.numPes() * 4, args=[f2])
-    np.testing.assert_allclose(f1.get(), np.arange(10, dtype='float64') * charm.numPes())
-    np.testing.assert_allclose(f2.get(), np.arange(10, dtype='float64') * charm.numPes() * 4)
+    np.testing.assert_allclose(
+        f1.get(), np.arange(10, dtype="float64") * charm.numPes()
+    )
+    np.testing.assert_allclose(
+        f2.get(), np.arange(10, dtype="float64") * charm.numPes() * 4
+    )
     exit()
 
 
diff --git a/tests/reductions/group_reduction.py b/tests/reductions/group_reduction.py
index 4811eedf..92e6b675 100644
--- a/tests/reductions/group_reduction.py
+++ b/tests/reductions/group_reduction.py
@@ -4,7 +4,9 @@
 # utility methods for assertions
 def assert_allclose(actual, desired, tol):
     assert len(actual) == len(desired)
-    assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len(actual)
+    assert sum([(abs(actual[i] - v) <= tol) for i, v in enumerate(desired)]) == len(
+        actual
+    )
 
 
 def assert_almost_equal(actual, desired, tol):
@@ -25,57 +27,62 @@ def __init__(self, args):
         nElements = 1
         for x in ARRAY_SIZE:
             nElements *= x
-        print('Running reduction example on ' + str(charm.numPes()) + ' processors')
+        print("Running reduction example on " + str(charm.numPes()) + " processors")
         groupProxy = Group(TestGroup)
         # create an array to test group-to-array reductions
         arrayProxy = Array(TestArray, ARRAY_SIZE)
-        charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy, 'arrayProxy': arrayProxy},
-                                      '__main__', awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {"mainProxy": self.thisProxy, "arrayProxy": arrayProxy},
+            "__main__",
+            awaitable=True,
+        ).get()
         groupProxy.doReduction()
 
     def done_int(self, reduction_result):
-        assert reduction_result == 42*charm.numPes(), 'Group-to-singleton sum_int reduction failed'
-        print('[Main] All sum_int contributions done. Test passed')
+        assert (
+            reduction_result == 42 * charm.numPes()
+        ), "Group-to-singleton sum_int reduction failed"
+        print("[Main] All sum_int contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_nop(self):
-        print('[Main] All nop contributions received. Test passed')
+        print("[Main] All nop contributions received. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_float(self, reduction_result):
-        expected_result = [x*charm.numPes() for x in [10.1, 13.4]]
-        indices_sum = (charm.numPes() * (charm.numPes() - 1))/2
+        expected_result = [x * charm.numPes() for x in [10.1, 13.4]]
+        indices_sum = (charm.numPes() * (charm.numPes() - 1)) / 2
         expected_result += [float(indices_sum)]
         assert_allclose(reduction_result, expected_result, 1e-03)
-        print('[Main] All sum_float contributions done. Test passed')
+        print("[Main] All sum_float contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_group_to_array(self):
-        print('[Main] All group-to-array contributions done. Test passed')
+        print("[Main] All group-to-array contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_group_to_array_bcast(self):
-        print('[Main] All group-to-array bcast contributions done. Test passed')
+        print("[Main] All group-to-array bcast contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_group_to_group(self):
-        print('[Main] All group-to-group contributions done. Test passed')
+        print("[Main] All group-to-group contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
 
     def done_group_to_group_bcast(self):
-        print('[Main] All group-to-group bcast contributions done. Test passed')
+        print("[Main] All group-to-group bcast contributions done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
@@ -84,15 +91,23 @@ def done_group_to_group_bcast(self):
 class TestGroup(Chare):
 
     def __init__(self):
-        print('TestGroup ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe()))
+        print(
+            "TestGroup " + str(self.thisIndex) + " created on PE " + str(charm.myPe())
+        )
 
     def doReduction(self):
-        print('TestGroup element on PE ' + str(charm.myPe()) + ' is starting its contributions.')
+        print(
+            "TestGroup element on PE "
+            + str(charm.myPe())
+            + " is starting its contributions."
+        )
         # test contributing single int back to Main
         self.contribute(42, Reducer.sum, mainProxy.done_int)
         # test contributing list of floats back to Main
         num = [10.1, 13.4]
-        self.contribute(num+[float(self.thisIndex)], Reducer.sum, mainProxy.done_float)
+        self.contribute(
+            num + [float(self.thisIndex)], Reducer.sum, mainProxy.done_float
+        )
         # test nop reduction to main
         self.contribute(None, Reducer.nop, mainProxy.done_nop)
         # test contributing to TestArray[0]
@@ -100,32 +115,42 @@ def doReduction(self):
         # test contributing to TestArray (broadcast)
         self.contribute(-4, Reducer.sum, arrayProxy.reduceGroupToArrayBcast)
         # test contributing to TestGroup[0]
-        self.contribute([5, 7, -3, 0], Reducer.sum, self.thisProxy[0].reduceGroupToGroup)
+        self.contribute(
+            [5, 7, -3, 0], Reducer.sum, self.thisProxy[0].reduceGroupToGroup
+        )
         # test contributing to TestGroup (broadcast)
         self.contribute(-4.2, Reducer.sum, self.thisProxy.reduceGroupToGroupBcast)
 
     def reduceGroupToGroup(self, reduction_result):
         assert self.thisIndex == 0
-        assert list(reduction_result) == [charm.numPes()*x for x in [5, 7, -3, 0]], 'Group-to-group reduction failed.'
+        assert list(reduction_result) == [
+            charm.numPes() * x for x in [5, 7, -3, 0]
+        ], "Group-to-group reduction failed."
         mainProxy.done_group_to_group()
 
     def reduceGroupToGroupBcast(self, reduction_result):
-        assert_almost_equal(reduction_result, -4.2*charm.numPes(), 1e-03)
+        assert_almost_equal(reduction_result, -4.2 * charm.numPes(), 1e-03)
         self.contribute(None, None, mainProxy.done_group_to_group_bcast)
 
 
 class TestArray(Chare):
 
     def __init__(self):
-        print('TestArray ' + str(self.thisIndex) + ' created on PE ' + str(charm.myPe()))
+        print(
+            "TestArray " + str(self.thisIndex) + " created on PE " + str(charm.myPe())
+        )
 
     def reduceGroupToArray(self, reduction_result):
         assert self.thisIndex[0] == 0
-        assert_allclose(reduction_result, [charm.numPes()*x for x in [4.2, 13.1]], 1e-03)
+        assert_allclose(
+            reduction_result, [charm.numPes() * x for x in [4.2, 13.1]], 1e-03
+        )
         mainProxy.done_group_to_array()
 
     def reduceGroupToArrayBcast(self, reduction_result):
-        assert reduction_result == -4*charm.numPes(), 'Group-to-array bcast reduction failed.'
+        assert (
+            reduction_result == -4 * charm.numPes()
+        ), "Group-to-array bcast reduction failed."
         self.contribute(None, None, mainProxy.done_group_to_array_bcast)
 
 
diff --git a/tests/reductions/section_reduction.py b/tests/reductions/section_reduction.py
index a1638ffc..86c2ea3d 100644
--- a/tests/reductions/section_reduction.py
+++ b/tests/reductions/section_reduction.py
@@ -51,10 +51,10 @@ def test_op(done, op, vector_size, use_numpy=False):
                 assert list(val1) == list(val2)
         else:
             assert val1 == val2
-        print('[Main] Reduction with Reducer.%s passes.' % get_op_name(op))
+        print("[Main] Reduction with Reducer.%s passes." % get_op_name(op))
         done(True)
     except AssertionError:
-        print('[Main] Reduction with Reducer.%s is not correct.' % get_op_name(op))
+        print("[Main] Reduction with Reducer.%s is not correct." % get_op_name(op))
         done(False)
 
 
@@ -64,7 +64,7 @@ def test_op_logical(done, op, vector_size, use_numpy=False):
         if use_numpy:
             data = np.random.rand(vector_size)
             p = 0.1
-            data = np.random.choice(a=[False, True], size=(vector_size), p=[p, 1-p])
+            data = np.random.choice(a=[False, True], size=(vector_size), p=[p, 1 - p])
         else:
             data = list(map(bool, range(0, vector_size)))
     else:
@@ -82,10 +82,10 @@ def test_op_logical(done, op, vector_size, use_numpy=False):
             assert list(val1) == list(val2)
         else:
             assert val1 == val2
-        print('[Main] Reduction with Reducer.%s passes.' % get_op_name(op))
+        print("[Main] Reduction with Reducer.%s passes." % get_op_name(op))
         done(True)
     except AssertionError:
-        print('[Main] Reduction with Reducer.%s is not correct.' % get_op_name(op))
+        print("[Main] Reduction with Reducer.%s is not correct." % get_op_name(op))
         done(False)
 
 
@@ -139,10 +139,10 @@ def main(args):
     passes = sum(map(lambda x: x.get(), test_futures))
 
     if passes == num_tests:
-        print('All tests passed!')
+        print("All tests passed!")
         exit()
     else:
-        print('ERROR: Not all tests passed.')
+        print("ERROR: Not all tests passed.")
         exit(1)
 
 
diff --git a/tests/reductions/test_gather.py b/tests/reductions/test_gather.py
index f81423db..fa8cd606 100644
--- a/tests/reductions/test_gather.py
+++ b/tests/reductions/test_gather.py
@@ -15,10 +15,19 @@ def __init__(self, args):
         self.nElements = 1
         for x in ARRAY_SIZE:
             self.nElements *= x
-        print('Running gather example on', charm.numPes(), 'processors for', self.nElements, 'elements, array dims=', ARRAY_SIZE)
+        print(
+            "Running gather example on",
+            charm.numPes(),
+            "processors for",
+            self.nElements,
+            "elements, array dims=",
+            ARRAY_SIZE,
+        )
         arrProxy = Array(Test, ARRAY_SIZE)
         grpProxy = Group(TestGroup)
-        charm.thisProxy.updateGlobals({'mainProxy': self.thisProxy}, '__main__', awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {"mainProxy": self.thisProxy}, "__main__", awaitable=True
+        ).get()
         arrProxy.doGather()
         grpProxy.doGather()
         red_future = charm.Future()
@@ -28,8 +37,10 @@ def __init__(self, args):
     def done_gather_single(self, result):
         gather_arr_indices = list(range(self.nElements))
         gather_grp_indices = list(range(charm.numPes()))
-        assert result == gather_arr_indices or result == gather_grp_indices, 'Gather single elements failed.'
-        print('[Main] Gather collective for single elements done. Test passed')
+        assert (
+            result == gather_arr_indices or result == gather_grp_indices
+        ), "Gather single elements failed."
+        print("[Main] Gather collective for single elements done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
@@ -37,8 +48,10 @@ def done_gather_single(self, result):
     def done_gather_array(self, result):
         gather_arr_indices = [tuple([i]) for i in range(self.nElements)]
         gather_grp_indices = [[i, 42] for i in range(charm.numPes())]
-        assert result == gather_arr_indices or result == gather_grp_indices, 'Gather arrays failed.'
-        print('[Main] Gather collective for arrays done. Test passed')
+        assert (
+            result == gather_arr_indices or result == gather_grp_indices
+        ), "Gather arrays failed."
+        print("[Main] Gather collective for arrays done. Test passed")
         self.recvdReductions += 1
         if self.recvdReductions >= self.expectedReductions:
             exit()
@@ -47,12 +60,14 @@ def done_gather_array(self, result):
 class Test(Chare):
 
     def __init__(self):
-        print('Test', self.thisIndex, 'created on PE', charm.myPe())
+        print("Test", self.thisIndex, "created on PE", charm.myPe())
 
     def doGather(self, red_future=None):
         if red_future is None:
             # gather single elements
-            self.contribute(self.thisIndex[0], Reducer.gather, mainProxy.done_gather_single)
+            self.contribute(
+                self.thisIndex[0], Reducer.gather, mainProxy.done_gather_single
+            )
             # gather arrays
             self.contribute(self.thisIndex, Reducer.gather, mainProxy.done_gather_array)
         else:
@@ -62,13 +77,15 @@ def doGather(self, red_future=None):
 class TestGroup(Chare):
 
     def __init__(self):
-        print('TestGroup', self.thisIndex, 'created on PE', charm.myPe())
+        print("TestGroup", self.thisIndex, "created on PE", charm.myPe())
 
     def doGather(self):
         # gather single elements
         self.contribute(self.thisIndex, Reducer.gather, mainProxy.done_gather_single)
         # gather arrays
-        self.contribute([self.thisIndex, 42], Reducer.gather, mainProxy.done_gather_array)
+        self.contribute(
+            [self.thisIndex, 42], Reducer.gather, mainProxy.done_gather_array
+        )
 
 
 charm.start(Main)
diff --git a/tests/sections/callbacks.py b/tests/sections/callbacks.py
index b1b10ca7..cff12fe5 100644
--- a/tests/sections/callbacks.py
+++ b/tests/sections/callbacks.py
@@ -45,7 +45,7 @@ def work1(self, cb, secProxy=None):
         self.contribute(3, Reducer.sum, cb, secProxy)
 
     def work2(self, cb, secProxy=None):
-        data = numpy.arange(100, dtype='float64')
+        data = numpy.arange(100, dtype="float64")
         self.contribute(data, Reducer.sum, cb, secProxy)
 
     def work3(self, cb, secProxy=None):
@@ -56,7 +56,7 @@ def work4(self, cb, secProxy=None):
 
     def work5(self, cb, secProxy=None):
         if self.idx == 1:
-            cb('test section callback')
+            cb("test section callback")
 
 
 def main(args):
@@ -89,15 +89,15 @@ def main(args):
         assert f.get() == (numchares // 2)
 
         f = Future()
-        expected = numpy.arange(100, dtype='float64')
+        expected = numpy.arange(100, dtype="float64")
         expected *= numchares
         collection.setTest(f, expected, awaitable=True).get()
         collection.work2(secProxy.recvResult)
         assert f.get() == (numchares // 2)
 
         f = Future()
-        expected = numpy.arange(100, dtype='float64')
-        expected *= (numchares // 2)
+        expected = numpy.arange(100, dtype="float64")
+        expected *= numchares // 2
         secProxy.setTest(f, expected, awaitable=True).get()
         secProxy.work2(secProxy.recvResult, secProxy)
         assert f.get() == (numchares // 2)
@@ -127,7 +127,7 @@ def main(args):
         assert f.get() == (numchares // 2)
 
         f = Future()
-        expected = 'test section callback'
+        expected = "test section callback"
         collection.setTest(f, expected, awaitable=True).get()
         collection.work5(secProxy.recvResult)
         assert f.get() == (numchares // 2)
diff --git a/tests/sections/constrained_groups.py b/tests/sections/constrained_groups.py
index 47a59a2f..401e52d6 100644
--- a/tests/sections/constrained_groups.py
+++ b/tests/sections/constrained_groups.py
@@ -27,7 +27,7 @@ def main(args):
     assert charm.numPes() > 1
     global section_pes
     section_pes = random.sample(range(charm.numPes()), charm.numPes() // 2)
-    charm.thisProxy.updateGlobals({'section_pes': section_pes}, awaitable=True).get()
+    charm.thisProxy.updateGlobals({"section_pes": section_pes}, awaitable=True).get()
     g = Group(Test, onPEs=section_pes, args=[4862])
     assert g[section_pes[0]].test2(ret=True).get() == 34589
     g.test(awaitable=True).get()
diff --git a/tests/sections/multirand-split-combine.py b/tests/sections/multirand-split-combine.py
index 5be476f8..4ede3b1c 100644
--- a/tests/sections/multirand-split-combine.py
+++ b/tests/sections/multirand-split-combine.py
@@ -67,8 +67,8 @@ def addElems(self, elems):
 
     def verify(self, result):
         if set(result) != self.elems:
-            print('self.elems=', self.elems)
-            print('result=', result)
+            print("self.elems=", self.elems)
+            print("result=", result)
             raise Exception
 
     def split(self, N):
@@ -94,7 +94,7 @@ def split(self, N):
                     if (cid, idx) not in insections:
                         insections[(cid, idx)] = []
                     insections[(cid, idx)].append(i)
-        charm.thisProxy.updateGlobals({'insections': insections}, awaitable=True).get()
+        charm.thisProxy.updateGlobals({"insections": insections}, awaitable=True).get()
         assert len(sections) == N
         for section in sections:
             assert len(section) > 0
@@ -106,8 +106,7 @@ def split(self, N):
 
 def partition(elems, N):
     num_elems = len(elems)
-    return [elems[i*num_elems // N: (i+1)*num_elems // N]
-            for i in range(N)]
+    return [elems[i * num_elems // N : (i + 1) * num_elems // N] for i in range(N)]
 
 
 def inSections(obj):
@@ -161,11 +160,16 @@ def main(args):
             c = Collection([], proxy)
             for c_ in cs:
                 c.addElems(c_.elems)
-            assert hasattr(c.proxy, 'section') and c.proxy.issec
+            assert hasattr(c.proxy, "section") and c.proxy.issec
             sections_combined += 1
             collections.append(c)
-    print(len(collections), 'collections created, sections_split=', sections_split,
-          'sections_combined=', sections_combined)
+    print(
+        len(collections),
+        "collections created, sections_split=",
+        sections_split,
+        "sections_combined=",
+        sections_combined,
+    )
 
     if VERBOSE:
         section_sizes = []
@@ -173,11 +177,11 @@ def main(args):
             if c.proxy.issec is not None:
                 section_sizes.append(len(c.elems))
         section_sizes = numpy.array(section_sizes)
-        print(len(section_sizes), 'sections, sizes:')
-        print('min size=', numpy.min(section_sizes))
-        print('median size=', numpy.median(section_sizes))
-        print('mean size=', numpy.mean(section_sizes))
-        print('max size=', numpy.max(section_sizes))
+        print(len(section_sizes), "sections, sizes:")
+        print("min size=", numpy.min(section_sizes))
+        print("median size=", numpy.median(section_sizes))
+        print("mean size=", numpy.mean(section_sizes))
+        print("max size=", numpy.max(section_sizes))
 
     for c in collections:
         if c.proxy.issec:
@@ -187,7 +191,9 @@ def main(args):
 
     for _ in range(NUM_ITER):
         futures = [Future() for _ in range(len(collections))]
-        charm.thisProxy.updateGlobals({'DATA_VERIFY': random.randint(0, 100000)}, awaitable=True).get()
+        charm.thisProxy.updateGlobals(
+            {"DATA_VERIFY": random.randint(0, 100000)}, awaitable=True
+        ).get()
         data = DATA_VERIFY
         for i, c in enumerate(collections):
             sid = None
@@ -198,7 +204,7 @@ def main(args):
             result = futures[i].get()
             collections[i].verify(result)
 
-    print('DONE')
+    print("DONE")
     exit()
 
 
diff --git a/tests/sections/simple.py b/tests/sections/simple.py
index dbaa9b74..e99f6ed3 100644
--- a/tests/sections/simple.py
+++ b/tests/sections/simple.py
@@ -12,7 +12,7 @@ def member(obj):
 class Test(Chare):
 
     def __init__(self):
-        self.insection = (member(self) >= 0)
+        self.insection = member(self) >= 0
 
     def setSecProxy(self, proxy):
         self.secProxy = proxy
@@ -33,7 +33,7 @@ def main(args):
     array3d = Array(Test, (4, 5, 3))
 
     # for each array, create one section using member function to determine section membership
-    for array, size in [(array2d, 8*8), (array3d, 4*5*3)]:
+    for array, size in [(array2d, 8 * 8), (array3d, 4 * 5 * 3)]:
         secProxy = charm.split(array, 1, member)[0]
         array.setSecProxy(secProxy, awaitable=True).get()
         f = Future()
@@ -41,7 +41,7 @@ def main(args):
         assert len(f.get()) < size
 
     # for each array, create one section passing a random list of element indexes (half the size of the array)
-    for array, size in [(array2d, 8*8), (array3d, 4*5*3)]:
+    for array, size in [(array2d, 8 * 8), (array3d, 4 * 5 * 3)]:
         elems = array.getElems(ret=True).get()
         assert len(elems) == size
         section_elems = random.sample(elems, size // 2)
diff --git a/tests/sections/slice.py b/tests/sections/slice.py
index ea0b0107..11669dfb 100644
--- a/tests/sections/slice.py
+++ b/tests/sections/slice.py
@@ -17,14 +17,14 @@ def main(args):
     elems = list(range(0, charm.numPes(), 2))
     assert g[::2].getIdx(ret=True).get() == elems
     assert g[0::2].getIdx_th(ret=True).get() == elems
-    assert g[:charm.numPes():2].getIdx(ret=True).get() == elems
-    assert g[0:charm.numPes()].getIdx_th(ret=True).get() != elems
+    assert g[: charm.numPes() : 2].getIdx(ret=True).get() == elems
+    assert g[0 : charm.numPes()].getIdx_th(ret=True).get() != elems
 
     a1 = Array(Test, (8, 8))
     a2 = Array(Test, 64)
 
     indexes = a1[0:8:2, 1:8:2].getIdx(ret=True).get()
-    assert len(indexes) == 8*8//4
+    assert len(indexes) == 8 * 8 // 4
     for idx in indexes:
         assert len(idx) == 2
         assert idx[0] % 2 == 0
diff --git a/tests/thread_entry_methods/future_bcast.py b/tests/thread_entry_methods/future_bcast.py
index 91ae31ec..0a81fc2b 100644
--- a/tests/thread_entry_methods/future_bcast.py
+++ b/tests/thread_entry_methods/future_bcast.py
@@ -21,7 +21,7 @@ def main(args):
     t0 = time.time()
     a.work(sleepTimes, awaitable=True).get()  # wait for broadcast to complete
     wait_time = time.time() - t0
-    assert(wait_time >= max(sleepTimes))
+    assert wait_time >= max(sleepTimes)
     print(wait_time, max(sleepTimes))
 
     g = Group(Test)
@@ -30,7 +30,7 @@ def main(args):
     t0 = time.time()
     g.work(sleepTimes, awaitable=True).get()  # wait for broadcast to complete
     wait_time = time.time() - t0
-    assert(wait_time >= max(sleepTimes))
+    assert wait_time >= max(sleepTimes)
     print(wait_time, max(sleepTimes))
 
     exit()
diff --git a/tests/thread_entry_methods/test1.py b/tests/thread_entry_methods/test1.py
index 31614ae5..ff4a702b 100644
--- a/tests/thread_entry_methods/test1.py
+++ b/tests/thread_entry_methods/test1.py
@@ -36,7 +36,7 @@ def done(self):
 class Test2(Chare):
 
     def getVal(self):
-        return (73 + charm.myPe())
+        return 73 + charm.myPe()
 
 
 def main(args):
@@ -44,8 +44,9 @@ def main(args):
     # every chare sends to every other so don't want a ton of chares
     numChares = min(charm.numPes() * 8, 32)
     testGroup = Group(Test2)
-    charm.thisProxy.updateGlobals({'numChares': numChares, 'testGroup': testGroup},
-                                  '__main__', awaitable=True).get()
+    charm.thisProxy.updateGlobals(
+        {"numChares": numChares, "testGroup": testGroup}, "__main__", awaitable=True
+    ).get()
     Array(Test, numChares)
 
 
diff --git a/tests/thread_entry_methods/test1_when.py b/tests/thread_entry_methods/test1_when.py
index b9a28064..449d47df 100644
--- a/tests/thread_entry_methods/test1_when.py
+++ b/tests/thread_entry_methods/test1_when.py
@@ -23,10 +23,15 @@ def start(self, pes):
         self.contribute(None, None, self.thisProxy[0].done)
 
     @coro
-    @when('self.iteration == iteration')
+    @when("self.iteration == iteration")
     def getVal(self, iteration):
-        result = 53 * testGroup[charm.myPe()].getVal(ret=True).get() * self.thisIndex[0] * self.iteration
-        #assert result == 53 * (73 + charm.myPe()) * self.thisIndex[0] * self.iteration
+        result = (
+            53
+            * testGroup[charm.myPe()].getVal(ret=True).get()
+            * self.thisIndex[0]
+            * self.iteration
+        )
+        # assert result == 53 * (73 + charm.myPe()) * self.thisIndex[0] * self.iteration
         self.msgsRcvd += 1
         if self.msgsRcvd == numChares:
             self.msgsRcvd = 0
@@ -41,15 +46,16 @@ def done(self):
 class Test2(Chare):
 
     def getVal(self):
-        return (73 + charm.myPe())
+        return 73 + charm.myPe()
 
 
 def main(args):
     global numChares, testGroup
     numChares = min(charm.numPes() * 8, 32)
     testGroup = Group(Test2)
-    charm.thisProxy.updateGlobals({'numChares': numChares, 'testGroup': testGroup},
-                                  '__main__', awaitable=True).get()
+    charm.thisProxy.updateGlobals(
+        {"numChares": numChares, "testGroup": testGroup}, "__main__", awaitable=True
+    ).get()
     Array(Test, numChares)
 
 
diff --git a/tests/thread_entry_methods/test_wait.py b/tests/thread_entry_methods/test_wait.py
index 2e654de5..c18b4c88 100644
--- a/tests/thread_entry_methods/test_wait.py
+++ b/tests/thread_entry_methods/test_wait.py
@@ -30,11 +30,17 @@ def __init__(self, args):
         self.result = 0
         for i in range(NUM_ITER):
             workers.sendVal()
-            self.wait("self.num_responses1 == " + str(num_chares//2) + " and 33 == TEST_GLOBAL")
-            self.wait("self.num_responses2 == " + str(num_chares//2) + " and 47 == ro.X")
-            assert(self.result == num_chares * 237)
-            assert(self.num_responses1 == num_chares//2)
-            assert(self.num_responses2 == num_chares//2)
+            self.wait(
+                "self.num_responses1 == "
+                + str(num_chares // 2)
+                + " and 33 == TEST_GLOBAL"
+            )
+            self.wait(
+                "self.num_responses2 == " + str(num_chares // 2) + " and 47 == ro.X"
+            )
+            assert self.result == num_chares * 237
+            assert self.num_responses1 == num_chares // 2
+            assert self.num_responses2 == num_chares // 2
             self.num_responses1 = self.num_responses2 = 0
             self.result = 0
         charm.printStats()
diff --git a/tests/topo/topo_treeAPI.py b/tests/topo/topo_treeAPI.py
index 4e4a91be..1d312967 100644
--- a/tests/topo/topo_treeAPI.py
+++ b/tests/topo/topo_treeAPI.py
@@ -1,7 +1,7 @@
 from charm4py import charm
 
 
-allPes_check  = []
+allPes_check = []
 evenPes_check = []
 
 
@@ -27,20 +27,24 @@ def main(args):
 
     print("\nWhole topo tree rooted at PE 0")
     printWholeTree(0, 0)
-    assert(len(allPes_check) == charm.numPes() and set(allPes_check) == set(range(charm.numPes())))
+    assert len(allPes_check) == charm.numPes() and set(allPes_check) == set(
+        range(charm.numPes())
+    )
     allPes_check = []
 
     lastPE = charm.numPes() - 1
     if lastPE != 0:
         print("\nWhole topo tree rooted at", lastPE)
         printWholeTree(lastPE, lastPE)
-        assert(len(allPes_check) == charm.numPes() and set(allPes_check) == set(range(charm.numPes())))
+        assert len(allPes_check) == charm.numPes() and set(allPes_check) == set(
+            range(charm.numPes())
+        )
         allPes_check = []
 
     print("\nEven numbered PE tree, rooted at PE 0")
     evenPEs = [pe for pe in range(charm.numPes()) if pe % 2 == 0]
     printEvenNbTree(evenPEs, 0)
-    assert(len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs))
+    assert len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs)
     evenPes_check = []
 
     newRoot = evenPEs[-1]
@@ -48,7 +52,7 @@ def main(args):
         evenPEs.insert(0, evenPEs.pop())  # move root from back to beginning of list
         print("\nEven numbered PE tree, rooted at PE", newRoot)
         printEvenNbTree(evenPEs, newRoot)
-        assert(len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs))
+        assert len(evenPes_check) == len(evenPEs) and set(evenPes_check) == set(evenPEs)
         evenPes_check = []
 
     exit()
diff --git a/tests/when/perf_test.py b/tests/when/perf_test.py
index 78033114..dbfce8b7 100644
--- a/tests/when/perf_test.py
+++ b/tests/when/perf_test.py
@@ -9,15 +9,15 @@
 class Worker(Chare):
 
     def start(self, done_future):
-        self.cur_id    = 0
+        self.cur_id = 0
         self.phase_cnt = 0
         self.done_future = done_future
 
     @when("self.cur_id == id")
     def recv_id(self, id):
-        #if self.thisIndex == 0:
+        # if self.thisIndex == 0:
         #    return self.contribute(None, None, self.done_future)
-        assert(id == self.cur_id)
+        assert id == self.cur_id
         self.phase_cnt += 1
         if self.phase_cnt == PHASE_NUM:
             self.phase_cnt = 0
@@ -33,8 +33,8 @@ def main(args):
     random.seed(45782)
     ids = []
     for i in range(MAX_VALS):
-        #for _ in range(PHASE_NUM):
-            #ids.append(i)
+        # for _ in range(PHASE_NUM):
+        # ids.append(i)
         ids.append(i)
     random.shuffle(ids)
 
@@ -42,7 +42,7 @@ def main(args):
     g.start(done, awaitable=True).get()
     t0 = time.time()
     for id in ids:
-        #g.recv_id(id)
+        # g.recv_id(id)
         for _ in range(PHASE_NUM):
             g.recv_id(id)
     done.get()
diff --git a/tests/when/stencil.py b/tests/when/stencil.py
index f8eb677b..5bae5c3a 100644
--- a/tests/when/stencil.py
+++ b/tests/when/stencil.py
@@ -25,7 +25,7 @@ def work(self, done_fut):
             self.iter_complete.get()
         self.reduce(done_fut)
 
-    @when('self.iteration == iteration')
+    @when("self.iteration == iteration")
     def recvData(self, iteration, data):
         self.msgs_recvd += 1
         if self.msgs_recvd == len(self.nbs):
diff --git a/tests/when/test_when_syntax.py b/tests/when/test_when_syntax.py
index 3be34a3f..7e597a66 100644
--- a/tests/when/test_when_syntax.py
+++ b/tests/when/test_when_syntax.py
@@ -9,69 +9,70 @@
 
 # NOTE: this is not a parallel program
 
+
 def parseMethodArgs(s):
-    arg_names = re.split(', *', s[1:-1])
+    arg_names = re.split(", *", s[1:-1])
     method_args = {}
     for i in range(1, len(arg_names)):
-        method_args[arg_names[i]] = i-1
+        method_args[arg_names[i]] = i - 1
     return method_args
 
 
 def main(args):
 
-    when_cond = 'self.iterations == iter'
-    method    = '(self, iter, x, y)'
+    when_cond = "self.iterations == iter"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.MsgTagCond)
-    assert cond.attrib_name == 'iterations'
+    assert cond.attrib_name == "iterations"
     assert cond.arg_idx == 0
 
-    when_cond = 'self.x == x'
-    method    = '(self, iter, x, y)'
+    when_cond = "self.x == x"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.MsgTagCond)
-    assert cond.attrib_name == 'x'
+    assert cond.attrib_name == "x"
     assert cond.arg_idx == 1
 
-    when_cond = 'y    ==    self.x  '
-    method    = '(self, iter, x, y)'
+    when_cond = "y    ==    self.x  "
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.MsgTagCond)
-    assert cond.attrib_name == 'x'
+    assert cond.attrib_name == "x"
     assert cond.arg_idx == 2
 
-    when_cond = 'self.x == x + y'
-    method    = '(self, iter, x, y)'
+    when_cond = "self.x == x + y"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateMsgCond)
 
-    when_cond = 'x < y'
-    method    = '(self, iter, x, y)'
+    when_cond = "x < y"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateMsgCond)
 
-    when_cond = 'y == y'
-    method    = '(self, iter, x, y)'
+    when_cond = "y == y"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateMsgCond)
 
-    when_cond = 'iter'
-    method    = '(self, iter, x, y)'
+    when_cond = "iter"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateMsgCond)
 
-    when_cond = 'self.x'
-    method    = '(self, iter, x, y)'
+    when_cond = "self.x"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateCond)
 
-    when_cond = 'self.x + self.y == 3'
-    method    = '(self, iter, x, y)'
+    when_cond = "self.x + self.y == 3"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateCond)
 
-    when_cond = 'self.x > (self.y + 2/3 + self.z + error)'
-    method    = '(self, iter, x, y)'
+    when_cond = "self.x > (self.y + 2/3 + self.z + error)"
+    method = "(self, iter, x, y)"
     cond = wait.parse_cond_str(when_cond, __name__, parseMethodArgs(method))
     assert isinstance(cond, wait.ChareStateCond)
 
diff --git a/tests/when/when_test.py b/tests/when/when_test.py
index 9c57b27c..6f850434 100644
--- a/tests/when/when_test.py
+++ b/tests/when/when_test.py
@@ -7,10 +7,10 @@ class Test(Chare):
 
     def __init__(self, numParticipants):
         self.numParticipants = numParticipants
-        self.msgsRcvd = 0   # for PE 0
-        self.current  = 1   # for PE 0
-        self.msgsSent = 0   # for PEs != 0
-        #print("Group constructed " + str(self.thisIndex))
+        self.msgsRcvd = 0  # for PE 0
+        self.current = 1  # for PE 0
+        self.msgsSent = 0  # for PEs != 0
+        # print("Group constructed " + str(self.thisIndex))
 
     @when("self.current == id")
     def testWhen(self, id, msg):
@@ -26,7 +26,7 @@ def testWhen(self, id, msg):
     def run(self):
         if charm.myPe() == 0 or charm.myPe() > self.numParticipants:
             return
-        #print("Group " + str(self.thisIndex) + " sending msg " + str(self.msgsSent))
+        # print("Group " + str(self.thisIndex) + " sending msg " + str(self.msgsSent))
         self.thisProxy[0].testWhen(charm.myPe(), "hi")
         self.msgsSent += 1
         if self.msgsSent < GRP_TO_SEND:
@@ -36,7 +36,7 @@ def run(self):
 def main(args):
     if charm.numPes() < 3:
         charm.abort("Run program with at least 3 PEs")
-    numParticipants = min(charm.numPes()-1, 31)
+    numParticipants = min(charm.numPes() - 1, 31)
     Group(Test, args=[numParticipants]).run()
 
 
diff --git a/tests/when/when_test2.py b/tests/when/when_test2.py
index d3dd380f..56ebd88f 100644
--- a/tests/when/when_test2.py
+++ b/tests/when/when_test2.py
@@ -18,20 +18,20 @@ def __init__(self, controller):
 
     @when("self.ready and (TEST_WHEN_GLOBAL == 33)")
     def startWork(self, x, y, z):
-        assert(self.ready)
+        assert self.ready
         self.ready = False
         self.thisProxy[self.thisIndex].doWork(x, y, z)
 
     def doWork(self, x, y, z):
-        assert(not self.ready)
+        assert not self.ready
         result = 0
         for _ in range(WORKER_ITERS):
-            result += (x * y * z)
+            result += x * y * z
         self.thisProxy[self.thisIndex].workDone(result)
 
     def workDone(self, result):
-        assert(not self.ready)
-        assert(result == X*Y*Z*WORKER_ITERS)
+        assert not self.ready
+        assert result == X * Y * Z * WORKER_ITERS
         self.ready = True
         self.controller.taskDone()