Skip to content

cache

HybridHDFCache

Bases: Cache

Hybrid HDFStore / Memory cache

Sometimes there are errors depending on the dtypes of dataframes stored

Source code in pyhdx/web/cache.py
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
class HybridHDFCache(Cache):
    """

    Hybrid HDFStore / Memory cache

    Sometimes there are errors depending on the dtypes of dataframes stored

    """

    file_path = param.String()

    _store = param.ClassSelector(class_=pd.HDFStore)

    _cache = param.Dict(default={})

    bytes_threshold = param.Integer(default=int(1e8))

    def __init__(self, **params):
        super().__init__(**params)
        if self.file_path is not None:
            self._store = pd.HDFStore(self.file_path)

    def __getitem__(self, item):
        key = str(item)
        try:
            return self._cache.__getitem__(key)
        except KeyError:
            return self._store.__getitem__(key)

    def _store_put(self, key, value):
        try:
            self._store[key] = value

            # Check if reading back the dataframe works
            try:
                _value = self._store[key]
            except AttributeError:
                del self._store[key]
                self._cache[key] = value

        except (
            NotImplementedError,
            TypeError,
        ):  # pytables does not support categorical dtypes
            self._cache[key] = value

    def __setitem__(self, key, value):
        key = str(key)
        if isinstance(value, pd.DataFrame) and value.memory_usage().sum() > self.bytes_threshold:
            self._store_put(key, value)
        elif isinstance(value, pd.Series) and value.memory_usage() > self.bytes_threshold:
            self._store_put(key, value)
        else:
            self._cache[str(key)] = value

    def __contains__(self, item):
        return str(item) in self._cache.keys() | self._store.keys()