Bases: Cache
Hybrid HDFStore / Memory cache
Sometimes there are errors depending on the dtypes of dataframes stored
Source code in pyhdx/web/cache.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90 | class HybridHDFCache(Cache):
"""
Hybrid HDFStore / Memory cache
Sometimes there are errors depending on the dtypes of dataframes stored
"""
file_path = param.String()
_store = param.ClassSelector(class_=pd.HDFStore)
_cache = param.Dict(default={})
bytes_threshold = param.Integer(default=int(1e8))
def __init__(self, **params):
super().__init__(**params)
if self.file_path is not None:
self._store = pd.HDFStore(self.file_path)
def __getitem__(self, item):
key = str(item)
try:
return self._cache.__getitem__(key)
except KeyError:
return self._store.__getitem__(key)
def _store_put(self, key, value):
try:
self._store[key] = value
# Check if reading back the dataframe works
try:
_value = self._store[key]
except AttributeError:
del self._store[key]
self._cache[key] = value
except (
NotImplementedError,
TypeError,
): # pytables does not support categorical dtypes
self._cache[key] = value
def __setitem__(self, key, value):
key = str(key)
if isinstance(value, pd.DataFrame) and value.memory_usage().sum() > self.bytes_threshold:
self._store_put(key, value)
elif isinstance(value, pd.Series) and value.memory_usage() > self.bytes_threshold:
self._store_put(key, value)
else:
self._cache[str(key)] = value
def __contains__(self, item):
return str(item) in self._cache.keys() | self._store.keys()
|