@@ -88,6 +88,9 @@ def g():
8888# communication speed over compatibility:
8989DEFAULT_PROTOCOL = pickle .HIGHEST_PROTOCOL
9090
91+ # Names of modules whose resources should be treated as dynamic.
92+ _PICKLE_BY_VALUE_MODULES = set ()
93+
9194# Track the provenance of reconstructed dynamic classes to make it possible to
9295# reconstruct instances from the matching singleton class definition when
9396# appropriate and preserve the usual "isinstance" semantics of Python objects.
@@ -124,6 +127,77 @@ def _lookup_class_or_track(class_tracker_id, class_def):
124127 return class_def
125128
126129
130+ def register_pickle_by_value (module ):
131+ """Register a module to make it functions and classes picklable by value.
132+
133+ By default, functions and classes that are attributes of an importable
134+ module are to be pickled by reference, that is relying on re-importing
135+ the attribute from the module at load time.
136+
137+ If `register_pickle_by_value(module)` is called, all its functions and
138+ classes are subsequently to be pickled by value, meaning that they can
139+ be loaded in Python processes where the module is not importable.
140+
141+ This is especially useful when developing a module in a distributed
142+ execution environment: restarting the client Python process with the new
143+ source code is enough: there is no need to re-install the new version
144+ of the module on all the worker nodes nor to restart the workers.
145+
146+ Note: this feature is considered experimental. See the cloudpickle
147+ README.md file for more details and limitations.
148+ """
149+ if not isinstance (module , types .ModuleType ):
150+ raise ValueError (
151+ f"Input should be a module object, got { str (module )} instead"
152+ )
153+ # In the future, cloudpickle may need a way to access any module registered
154+ # for pickling by value in order to introspect relative imports inside
155+ # functions pickled by value. (see
156+ # https://github.com/cloudpipe/cloudpickle/pull/417#issuecomment-873684633).
157+ # This access can be ensured by checking that module is present in
158+ # sys.modules at registering time and assuming that it will still be in
159+ # there when accessed during pickling. Another alternative would be to
160+ # store a weakref to the module. Even though cloudpickle does not implement
161+ # this introspection yet, in order to avoid a possible breaking change
162+ # later, we still enforce the presence of module inside sys.modules.
163+ if module .__name__ not in sys .modules :
164+ raise ValueError (
165+ f"{ module } was not imported correctly, have you used an "
166+ f"`import` statement to access it?"
167+ )
168+ _PICKLE_BY_VALUE_MODULES .add (module .__name__ )
169+
170+
171+ def unregister_pickle_by_value (module ):
172+ """Unregister that the input module should be pickled by value."""
173+ if not isinstance (module , types .ModuleType ):
174+ raise ValueError (
175+ f"Input should be a module object, got { str (module )} instead"
176+ )
177+ if module .__name__ not in _PICKLE_BY_VALUE_MODULES :
178+ raise ValueError (f"{ module } is not registered for pickle by value" )
179+ else :
180+ _PICKLE_BY_VALUE_MODULES .remove (module .__name__ )
181+
182+
183+ def list_registry_pickle_by_value ():
184+ return _PICKLE_BY_VALUE_MODULES .copy ()
185+
186+
187+ def _is_registered_pickle_by_value (module ):
188+ module_name = module .__name__
189+ if module_name in _PICKLE_BY_VALUE_MODULES :
190+ return True
191+ while True :
192+ parent_name = module_name .rsplit ("." , 1 )[0 ]
193+ if parent_name == module_name :
194+ break
195+ if parent_name in _PICKLE_BY_VALUE_MODULES :
196+ return True
197+ module_name = parent_name
198+ return False
199+
200+
127201def _whichmodule (obj , name ):
128202 """Find the module an object belongs to.
129203
@@ -170,18 +244,35 @@ def _whichmodule(obj, name):
170244 return None
171245
172246
173- def _is_importable (obj , name = None ):
174- """Dispatcher utility to test the importability of various constructs."""
175- if isinstance (obj , types .FunctionType ):
176- return _lookup_module_and_qualname (obj , name = name ) is not None
177- elif issubclass (type (obj ), type ):
178- return _lookup_module_and_qualname (obj , name = name ) is not None
247+ def _should_pickle_by_reference (obj , name = None ):
248+ """Test whether an function or a class should be pickled by reference
249+
250+ Pickling by reference means by that the object (typically a function or a
251+ class) is an attribute of a module that is assumed to be importable in the
252+ target Python environment. Loading will therefore rely on importing the
253+ module and then calling `getattr` on it to access the function or class.
254+
255+ Pickling by reference is the only option to pickle functions and classes
256+ in the standard library. In cloudpickle the alternative option is to
257+ pickle by value (for instance for interactively or locally defined
258+ functions and classes or for attributes of modules that have been
259+ explicitly registered to be pickled by value.
260+ """
261+ if isinstance (obj , types .FunctionType ) or issubclass (type (obj ), type ):
262+ module_and_name = _lookup_module_and_qualname (obj , name = name )
263+ if module_and_name is None :
264+ return False
265+ module , name = module_and_name
266+ return not _is_registered_pickle_by_value (module )
267+
179268 elif isinstance (obj , types .ModuleType ):
180269 # We assume that sys.modules is primarily used as a cache mechanism for
181270 # the Python import machinery. Checking if a module has been added in
182- # is sys.modules therefore a cheap and simple heuristic to tell us whether
183- # we can assume that a given module could be imported by name in
184- # another Python process.
271+ # is sys.modules therefore a cheap and simple heuristic to tell us
272+ # whether we can assume that a given module could be imported by name
273+ # in another Python process.
274+ if _is_registered_pickle_by_value (obj ):
275+ return False
185276 return obj .__name__ in sys .modules
186277 else :
187278 raise TypeError (
@@ -839,10 +930,15 @@ def _decompose_typevar(obj):
839930
840931
841932def _typevar_reduce (obj ):
842- # TypeVar instances have no __qualname__ hence we pass the name explicitly.
933+ # TypeVar instances require the module information hence why we
934+ # are not using the _should_pickle_by_reference directly
843935 module_and_name = _lookup_module_and_qualname (obj , name = obj .__name__ )
936+
844937 if module_and_name is None :
845938 return (_make_typevar , _decompose_typevar (obj ))
939+ elif _is_registered_pickle_by_value (module_and_name [0 ]):
940+ return (_make_typevar , _decompose_typevar (obj ))
941+
846942 return (getattr , module_and_name )
847943
848944
0 commit comments