281 lines
9.4 KiB
Python
281 lines
9.4 KiB
Python
|
###############################################################################
|
||
|
# Customizable Pickler with some basic reducers
|
||
|
#
|
||
|
# author: Thomas Moreau
|
||
|
#
|
||
|
# adapted from multiprocessing/reduction.py (17/02/2017)
|
||
|
# * Replace the ForkingPickler with a similar _LokyPickler,
|
||
|
# * Add CustomizableLokyPickler to allow customizing pickling process
|
||
|
# on the fly.
|
||
|
#
|
||
|
import io
|
||
|
import os
|
||
|
import sys
|
||
|
import functools
|
||
|
from multiprocessing import util
|
||
|
import types
|
||
|
try:
|
||
|
# Python 2 compat
|
||
|
from cPickle import loads as pickle_loads
|
||
|
except ImportError:
|
||
|
from pickle import loads as pickle_loads
|
||
|
import copyreg
|
||
|
|
||
|
from pickle import HIGHEST_PROTOCOL
|
||
|
|
||
|
if sys.platform == "win32":
|
||
|
if sys.version_info[:2] > (3, 3):
|
||
|
from multiprocessing.reduction import duplicate
|
||
|
else:
|
||
|
from multiprocessing.forking import duplicate
|
||
|
|
||
|
|
||
|
###############################################################################
|
||
|
# Enable custom pickling in Loky.
|
||
|
# To allow instance customization of the pickling process, we use 2 classes.
|
||
|
# _ReducerRegistry gives module level customization and CustomizablePickler
|
||
|
# permits to use instance base custom reducers. Only CustomizablePickler
|
||
|
# should be used.
|
||
|
|
||
|
class _ReducerRegistry(object):
|
||
|
"""Registry for custom reducers.
|
||
|
|
||
|
HIGHEST_PROTOCOL is selected by default as this pickler is used
|
||
|
to pickle ephemeral datastructures for interprocess communication
|
||
|
hence no backward compatibility is required.
|
||
|
|
||
|
"""
|
||
|
|
||
|
# We override the pure Python pickler as its the only way to be able to
|
||
|
# customize the dispatch table without side effects in Python 2.6
|
||
|
# to 3.2. For Python 3.3+ leverage the new dispatch_table
|
||
|
# feature from http://bugs.python.org/issue14166 that makes it possible
|
||
|
# to use the C implementation of the Pickler which is faster.
|
||
|
|
||
|
dispatch_table = {}
|
||
|
|
||
|
@classmethod
|
||
|
def register(cls, type, reduce_func):
|
||
|
"""Attach a reducer function to a given type in the dispatch table."""
|
||
|
if sys.version_info < (3,):
|
||
|
# Python 2 pickler dispatching is not explicitly customizable.
|
||
|
# Let us use a closure to workaround this limitation.
|
||
|
def dispatcher(cls, obj):
|
||
|
reduced = reduce_func(obj)
|
||
|
cls.save_reduce(obj=obj, *reduced)
|
||
|
cls.dispatch_table[type] = dispatcher
|
||
|
else:
|
||
|
cls.dispatch_table[type] = reduce_func
|
||
|
|
||
|
|
||
|
###############################################################################
|
||
|
# Registers extra pickling routines to improve picklization for loky
|
||
|
|
||
|
register = _ReducerRegistry.register
|
||
|
|
||
|
|
||
|
# make methods picklable
|
||
|
def _reduce_method(m):
|
||
|
if m.__self__ is None:
|
||
|
return getattr, (m.__class__, m.__func__.__name__)
|
||
|
else:
|
||
|
return getattr, (m.__self__, m.__func__.__name__)
|
||
|
|
||
|
|
||
|
class _C:
|
||
|
def f(self):
|
||
|
pass
|
||
|
|
||
|
@classmethod
|
||
|
def h(cls):
|
||
|
pass
|
||
|
|
||
|
|
||
|
register(type(_C().f), _reduce_method)
|
||
|
register(type(_C.h), _reduce_method)
|
||
|
|
||
|
|
||
|
if not hasattr(sys, "pypy_version_info"):
|
||
|
# PyPy uses functions instead of method_descriptors and wrapper_descriptors
|
||
|
def _reduce_method_descriptor(m):
|
||
|
return getattr, (m.__objclass__, m.__name__)
|
||
|
|
||
|
register(type(list.append), _reduce_method_descriptor)
|
||
|
register(type(int.__add__), _reduce_method_descriptor)
|
||
|
|
||
|
|
||
|
# Make partial func pickable
|
||
|
def _reduce_partial(p):
|
||
|
return _rebuild_partial, (p.func, p.args, p.keywords or {})
|
||
|
|
||
|
|
||
|
def _rebuild_partial(func, args, keywords):
|
||
|
return functools.partial(func, *args, **keywords)
|
||
|
|
||
|
|
||
|
register(functools.partial, _reduce_partial)
|
||
|
|
||
|
if sys.platform != "win32":
|
||
|
from ._posix_reduction import _mk_inheritable # noqa: F401
|
||
|
else:
|
||
|
from . import _win_reduction # noqa: F401
|
||
|
|
||
|
# global variable to change the pickler behavior
|
||
|
try:
|
||
|
from joblib.externals import cloudpickle # noqa: F401
|
||
|
DEFAULT_ENV = "cloudpickle"
|
||
|
except ImportError:
|
||
|
# If cloudpickle is not present, fallback to pickle
|
||
|
DEFAULT_ENV = "pickle"
|
||
|
|
||
|
ENV_LOKY_PICKLER = os.environ.get("LOKY_PICKLER", DEFAULT_ENV)
|
||
|
_LokyPickler = None
|
||
|
_loky_pickler_name = None
|
||
|
|
||
|
|
||
|
def set_loky_pickler(loky_pickler=None):
|
||
|
global _LokyPickler, _loky_pickler_name
|
||
|
|
||
|
if loky_pickler is None:
|
||
|
loky_pickler = ENV_LOKY_PICKLER
|
||
|
|
||
|
loky_pickler_cls = None
|
||
|
|
||
|
# The default loky_pickler is cloudpickle
|
||
|
if loky_pickler in ["", None]:
|
||
|
loky_pickler = "cloudpickle"
|
||
|
|
||
|
if loky_pickler == _loky_pickler_name:
|
||
|
return
|
||
|
|
||
|
if loky_pickler == "cloudpickle":
|
||
|
from joblib.externals.cloudpickle import CloudPickler as loky_pickler_cls
|
||
|
else:
|
||
|
try:
|
||
|
from importlib import import_module
|
||
|
module_pickle = import_module(loky_pickler)
|
||
|
loky_pickler_cls = module_pickle.Pickler
|
||
|
except (ImportError, AttributeError) as e:
|
||
|
extra_info = ("\nThis error occurred while setting loky_pickler to"
|
||
|
" '{}', as required by the env variable LOKY_PICKLER"
|
||
|
" or the function set_loky_pickler."
|
||
|
.format(loky_pickler))
|
||
|
e.args = (e.args[0] + extra_info,) + e.args[1:]
|
||
|
e.msg = e.args[0]
|
||
|
raise e
|
||
|
|
||
|
util.debug("Using '{}' for serialization."
|
||
|
.format(loky_pickler if loky_pickler else "cloudpickle"))
|
||
|
|
||
|
class CustomizablePickler(loky_pickler_cls):
|
||
|
_loky_pickler_cls = loky_pickler_cls
|
||
|
|
||
|
def _set_dispatch_table(self, dispatch_table):
|
||
|
for ancestor_class in self._loky_pickler_cls.mro():
|
||
|
dt_attribute = getattr(ancestor_class, "dispatch_table", None)
|
||
|
if isinstance(dt_attribute, types.MemberDescriptorType):
|
||
|
# Ancestor class (typically _pickle.Pickler) has a
|
||
|
# member_descriptor for its "dispatch_table" attribute. Use
|
||
|
# it to set the dispatch_table as a member instead of a
|
||
|
# dynamic attribute in the __dict__ of the instance,
|
||
|
# otherwise it will not be taken into account by the C
|
||
|
# implementation of the dump method if a subclass defines a
|
||
|
# class-level dispatch_table attribute as was done in
|
||
|
# cloudpickle 1.6.0:
|
||
|
# https://github.com/joblib/loky/pull/260
|
||
|
dt_attribute.__set__(self, dispatch_table)
|
||
|
break
|
||
|
|
||
|
# On top of member descriptor set, also use setattr such that code
|
||
|
# that directly access self.dispatch_table gets a consistent view
|
||
|
# of the same table.
|
||
|
self.dispatch_table = dispatch_table
|
||
|
|
||
|
def __init__(self, writer, reducers=None, protocol=HIGHEST_PROTOCOL):
|
||
|
loky_pickler_cls.__init__(self, writer, protocol=protocol)
|
||
|
if reducers is None:
|
||
|
reducers = {}
|
||
|
if sys.version_info < (3,):
|
||
|
self.dispatch = loky_pickler_cls.dispatch.copy()
|
||
|
self.dispatch.update(_ReducerRegistry.dispatch_table)
|
||
|
else:
|
||
|
if hasattr(self, "dispatch_table"):
|
||
|
# Force a copy that we will update without mutating the
|
||
|
# any class level defined dispatch_table.
|
||
|
loky_dt = dict(self.dispatch_table)
|
||
|
else:
|
||
|
# Use standard reducers as bases
|
||
|
loky_dt = copyreg.dispatch_table.copy()
|
||
|
|
||
|
# Register loky specific reducers
|
||
|
loky_dt.update(_ReducerRegistry.dispatch_table)
|
||
|
|
||
|
# Set the new dispatch table, taking care of the fact that we
|
||
|
# need to use the member_descriptor when we inherit from a
|
||
|
# subclass of the C implementation of the Pickler base class
|
||
|
# with an class level dispatch_table attribute.
|
||
|
self._set_dispatch_table(loky_dt)
|
||
|
|
||
|
# Register custom reducers
|
||
|
for type, reduce_func in reducers.items():
|
||
|
self.register(type, reduce_func)
|
||
|
|
||
|
def register(self, type, reduce_func):
|
||
|
"""Attach a reducer function to a given type in the dispatch table.
|
||
|
"""
|
||
|
if sys.version_info < (3,):
|
||
|
# Python 2 pickler dispatching is not explicitly customizable.
|
||
|
# Let us use a closure to workaround this limitation.
|
||
|
def dispatcher(self, obj):
|
||
|
reduced = reduce_func(obj)
|
||
|
self.save_reduce(obj=obj, *reduced)
|
||
|
self.dispatch[type] = dispatcher
|
||
|
else:
|
||
|
self.dispatch_table[type] = reduce_func
|
||
|
|
||
|
_LokyPickler = CustomizablePickler
|
||
|
_loky_pickler_name = loky_pickler
|
||
|
|
||
|
|
||
|
def get_loky_pickler_name():
|
||
|
global _loky_pickler_name
|
||
|
return _loky_pickler_name
|
||
|
|
||
|
|
||
|
def get_loky_pickler():
|
||
|
global _LokyPickler
|
||
|
return _LokyPickler
|
||
|
|
||
|
|
||
|
# Set it to its default value
|
||
|
set_loky_pickler()
|
||
|
|
||
|
|
||
|
def loads(buf):
|
||
|
# Compat for python2.7 version
|
||
|
if sys.version_info < (3, 3) and isinstance(buf, io.BytesIO):
|
||
|
buf = buf.getvalue()
|
||
|
return pickle_loads(buf)
|
||
|
|
||
|
|
||
|
def dump(obj, file, reducers=None, protocol=None):
|
||
|
'''Replacement for pickle.dump() using _LokyPickler.'''
|
||
|
global _LokyPickler
|
||
|
_LokyPickler(file, reducers=reducers, protocol=protocol).dump(obj)
|
||
|
|
||
|
|
||
|
def dumps(obj, reducers=None, protocol=None):
|
||
|
global _LokyPickler
|
||
|
|
||
|
buf = io.BytesIO()
|
||
|
dump(obj, buf, reducers=reducers, protocol=protocol)
|
||
|
if sys.version_info < (3, 3):
|
||
|
return buf.getvalue()
|
||
|
return buf.getbuffer()
|
||
|
|
||
|
|
||
|
__all__ = ["dump", "dumps", "loads", "register", "set_loky_pickler"]
|
||
|
|
||
|
if sys.platform == "win32":
|
||
|
__all__ += ["duplicate"]
|