Coverage for lib/datamodel/dataobject.py: 98%
170 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-28 07:25 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-28 07:25 +0000
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
4# Hermes : Change Data Capture (CDC) tool from any source(s) to any target
5# Copyright (C) 2023, 2024 INSA Strasbourg
6#
7# This file is part of Hermes.
8#
9# Hermes is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# Hermes is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with Hermes. If not, see <https://www.gnu.org/licenses/>.
23from lib.datamodel.diffobject import DiffObject
24from lib.datamodel.serialization import JSONSerializable
26from jinja2.environment import Template
27from typing import Any
30class HermesMergingConflictError(Exception):
31 """Raised when merging two objects with the same attribute having different
32 values"""
35class DataObject(JSONSerializable):
36 """Generic serializable object to create from several external sources
38 Subclasses MUST define the following class vars:
39 - HERMES_TO_REMOTE_MAPPING
40 - HERMES_ATTRIBUTES
41 - REMOTE_ATTRIBUTES
42 - INTERNALATTRIBUTES
43 - SECRETS_ATTRIBUTES
44 - LOCAL_ATTRIBUTES
45 - CACHEONLY_ATTRIBUTES
46 - PRIMARYKEY_ATTRIBUTE
48 The class provides
49 - data storage
50 - json serialization/deserialization
51 - full equality/difference operators based on attributes name and content
52 - diffFrom() function generating DiffFrom object
53 """
55 HERMES_TO_REMOTE_MAPPING: dict[str, Any] = {}
56 """Mapping dictionary containing datamodel attributes as key, and datasources fields
57 as values, eventually stored in a list. Used by DataObject only on server side"""
58 REMOTE_ATTRIBUTES: set[str] = None
59 """Set containing datamodel fields. Used by DataObject and Datamodel only on server
60 side"""
61 HERMES_ATTRIBUTES: set[str] = None
62 """Set containing datamodel attributes fields. Used by DataObject"""
63 INTERNALATTRIBUTES: set[str] = set(["_trashbin_timestamp"])
64 """Set containing internal datamodel fields. Used by DataObject"""
65 SECRETS_ATTRIBUTES: set[str] = None
66 """Set containing password attributes fields. Used by DataObject"""
67 LOCAL_ATTRIBUTES: set[str] = None
68 """Set containing attributes names that won't be sent in events, cached or used for
69 diff. Used by DataObject"""
70 CACHEONLY_ATTRIBUTES: set[str] = None
71 """Set containing attributes names that won't be sent in events or used for diff,
72 but will be cached. Used by DataObject"""
73 PRIMARYKEY_ATTRIBUTE: str | tuple[str, ...] = None
74 """String or tuple of strings containing datamodel primary key(s) attribute
75 name(s)"""
76 TOSTRING: Template | None = None
77 """Contains a compiled Jinja template for objects repr/string representation if set
78 in datamodel, or None to use default one"""
80 def __init__(
81 self,
82 from_remote: dict[str, Any] | None = None,
83 from_json_dict: dict[str, Any] | None = None,
84 jinjaContextVars: dict[str, Any] = {},
85 ):
86 """Create a new instance, with data coming from json (for deserialization),
87 or from remote source.
89 If data is from json, no check will be done.
90 If data is from remote, every attributes specified in REMOTE_ATTRIBUTES must
91 exists in from_remote dict, eventually with None value to be ignored
93 jinjaContextVars may contains additional vars to pass to Jinja render() method
94 when called with 'from_remote'
95 """
96 super().__init__(jsondataattr="_jsondata")
97 self._hash = None
99 if from_remote is None and from_json_dict is None:
100 err = (
101 "Cannot instantiate object from nothing:"
102 " you must specify one data source"
103 )
104 __hermes__.logger.critical(err)
105 raise AttributeError(err)
107 if from_remote is not None and from_json_dict is not None:
108 err = "Cannot instantiate object from multiple data sources at once"
109 __hermes__.logger.critical(err)
110 raise AttributeError(err)
112 if from_remote is not None:
113 self.__init_from_remote__(from_remote, jinjaContextVars)
114 elif from_json_dict is not None:
115 self.__init_from_json_dict__(from_json_dict)
117 def __init_from_remote__(
118 self, from_remote: dict[str, Any], jinjaContextVars: dict[str, Any] = {}
119 ):
120 """Create a new instance from remote source, check that all attributes in
121 REMOTE_ATTRIBUTES/HERMES_TO_REMOTE_MAPPING are set, and ignore others.
122 Will render Jinja template if any, passing a merged dict of from_remote
123 and jinjaContextVars"""
124 if self.REMOTE_ATTRIBUTES is None:
125 raise AttributeError(
126 f"Current class {self.__class__.__name__} can't be instantiated with"
127 f" 'from_remote' args as {self.__class__.__name__}.REMOTE_ATTRIBUTES"
128 " is not defined"
129 )
130 missingattrs = self.REMOTE_ATTRIBUTES.difference(from_remote.keys())
131 if len(missingattrs) > 0:
132 err = (
133 "Required attributes are missing from specified from_remote dict:"
134 f" {missingattrs}"
135 )
136 __hermes__.logger.critical(err)
137 raise AttributeError(err)
139 self._data: dict[str, Any] = {}
140 for attr, remoteattr in self.HERMES_TO_REMOTE_MAPPING.items():
141 if isinstance(remoteattr, Template): # May be a compiled Jinja Template
142 result = remoteattr.render(jinjaContextVars | from_remote)
143 if type(result) is list:
144 result = [v for v in result if v is not None]
145 if result is not None and result != [] and result != {}:
146 self._data[attr] = result
147 elif type(remoteattr) is str:
148 val = from_remote[remoteattr]
149 if val is not None and val != [] and val != {}:
150 self._data[attr] = from_remote[remoteattr]
151 elif type(remoteattr) is list:
152 self._data[attr] = []
153 for remoteattritem in remoteattr:
154 value = from_remote[remoteattritem]
155 if value is not None:
156 self._data[attr].append(value)
157 if len(self._data[attr]) == 0:
158 del self._data[attr]
159 else:
160 err = (
161 "Invalid type met in"
162 f" HERMES_TO_REMOTE_MAPPING['{attr}']: {type(remoteattr)}"
163 )
164 __hermes__.logger.critical(err)
165 raise AttributeError(err)
167 def __init_from_json_dict__(self, from_json_dict: dict[str, Any]):
168 """Create a new instance from json source, without checking anything"""
169 self._data = from_json_dict.copy()
171 def __getattribute__(self, attr: str) -> Any:
172 """Return attribute from "data" dict or from instance"""
173 try:
174 return super().__getattribute__("_data")[attr]
175 except (KeyError, AttributeError, TypeError):
176 return super().__getattribute__(attr)
178 def __setattr__(self, attr: str, value: Any):
179 """Set attribute in "data" dict (and reset instance hash cache) if attrname
180 exists in HERMES_ATTRIBUTES or INTERNALATTRIBUTES. Otherwise set it in
181 "standard" python way"""
182 if attr not in (self.HERMES_ATTRIBUTES | self.INTERNALATTRIBUTES):
183 super().__setattr__(attr, value)
184 else:
185 self._hash = None
186 self._data[attr] = value
188 def __delattr__(self, attr: str):
189 """Remove attribute from "data" dict (and reset instance hash cache) if attrname
190 exists in it. Otherwise remove it in "standard" python way"""
191 if attr not in self._data:
192 super().__delattr__(attr)
193 else:
194 self._hash = None
195 del self._data[attr]
197 def __eq__(self, other) -> bool:
198 """Equality operator, computed on hash equality"""
199 return hash(self) == hash(other)
201 def __ne__(self, other) -> bool:
202 """Difference operator, computed on hash difference"""
203 return hash(self) != hash(other)
205 def __lt__(self, other) -> bool:
206 """Less than operator, used for sorting. Computed on primary key comparison"""
207 return self.getPKey() < other.getPKey()
209 @staticmethod
210 def _complexhash(data: Any) -> int:
211 """Recursive hash of dict, list, tuple, set and standard hashable values"""
212 # hash based on attrnames and values, ignoring internal attributes
213 if isinstance(data, dict):
214 keys = tuple(sorted(set(data.keys())))
215 _hash = hash(
216 (
217 hash(keys),
218 hash(tuple([DataObject._complexhash(data[k]) for k in keys])),
219 )
220 )
221 elif isinstance(data, (list, set, tuple)):
222 _hash = hash(tuple([DataObject._complexhash(i) for i in data]))
223 else:
224 _hash = hash(data)
226 return _hash
228 def __hash__(self) -> int:
229 """Hash operator, compute hash based on attrnames and values, ignoring internal,
230 local and cacheonly attributes. As the computation is slow, the value is cached
231 """
232 if self._hash is None:
233 keys = tuple(
234 sorted(
235 set(self._data.keys())
236 - self.INTERNALATTRIBUTES
237 - self.LOCAL_ATTRIBUTES
238 - self.CACHEONLY_ATTRIBUTES
239 )
240 )
241 self._hash = self._complexhash(
242 (
243 self._complexhash(keys),
244 self._complexhash([self._data[k] for k in keys]),
245 )
246 )
247 return self._hash
249 @property
250 def _jsondata(self) -> dict[str, Any]:
251 """Return serializable data (all data minus LOCAL_ATTRIBUTES and
252 SECRETS_ATTRIBUTES)"""
253 return {
254 k: self._data[k]
255 for k in sorted(self._data.keys())
256 if k not in self.LOCAL_ATTRIBUTES | self.SECRETS_ATTRIBUTES
257 }
259 def diffFrom(self, other: "DataObject") -> DiffObject:
260 """Return DiffObject with differences (attributes names) of current instance
261 from another"""
262 diff = DiffObject(self, other)
264 s = set(
265 self._data.keys()
266 - self.INTERNALATTRIBUTES
267 - self.LOCAL_ATTRIBUTES
268 - self.CACHEONLY_ATTRIBUTES
269 )
270 o = set(
271 other._data.keys()
272 - self.INTERNALATTRIBUTES
273 - self.LOCAL_ATTRIBUTES
274 - self.CACHEONLY_ATTRIBUTES
275 )
276 commonattrs = s & o
278 diff.appendRemoved(o - s)
279 diff.appendAdded(s - o)
281 for k, v in self._data.items():
282 if k in commonattrs and DataObject.isDifferent(v, other._data[k]):
283 diff.appendModified(k)
285 return diff
287 @staticmethod
288 def isDifferent(a: Any, b: Any) -> bool:
289 """Test true difference between two object: recursive compare of type,
290 len and values"""
291 if type(a) is not type(b):
292 return True
294 if type(a) is list:
295 if len(a) != len(b):
296 return True
297 else:
298 for i in range(len(a)):
299 if DataObject.isDifferent(a[i], b[i]):
300 return True
301 elif type(a) is dict:
302 if a.keys() != b.keys():
303 return True
304 else:
305 for k in a.keys():
306 if DataObject.isDifferent(a[k], b[k]):
307 return True
308 else:
309 return a != b
311 return False
313 def toNative(self) -> dict[str, Any]:
314 """Return complete data dict"""
315 return self._data
317 def toEvent(self) -> dict[str, Any]:
318 """Return data to send in Event (all data minus LOCAL_ATTRIBUTES and
319 CACHEONLY_ATTRIBUTES)"""
320 return {
321 k: self._data[k]
322 for k in self._data.keys()
323 - self.LOCAL_ATTRIBUTES
324 - self.CACHEONLY_ATTRIBUTES
325 }
327 def mergeWith(self, other: "DataObject", raiseExceptionOnConflict=False):
328 """Merge data of current instance with another"""
329 for k, v in other._data.items():
330 if not hasattr(self, k):
331 # Attribute wasn't set, so set it from other's value
332 setattr(self, k, v)
333 elif self.isDifferent(v, getattr(self, k)):
334 # Attribute was set, and had another value than other's
335 err = (
336 f"Merging conflict. Attribute '{k}' exist on both objects with"
337 f" differents values ({repr(self)}:"
338 f" '{getattr(self, k)}' / {repr(other)}: '{v}')"
339 )
340 if raiseExceptionOnConflict:
341 raise HermesMergingConflictError(err)
342 else:
343 __hermes__.logger.debug(f"{err}. The first one is kept")
344 # else: attributes have same value
346 def getPKey(self) -> Any:
347 """Return primary key value"""
348 if type(self.PRIMARYKEY_ATTRIBUTE) is tuple:
349 return tuple([getattr(self, key) for key in self.PRIMARYKEY_ATTRIBUTE])
350 else:
351 return getattr(self, self.PRIMARYKEY_ATTRIBUTE)
353 def getType(self) -> str:
354 """Return current class name"""
355 return self.__class__.__name__
357 def __repr__(self) -> str:
358 """String representation of current instance"""
359 if isinstance(self.TOSTRING, Template):
360 return self.TOSTRING.render(self._data)
361 else:
362 return f"<{self.getType()}[{self.getPKey()}]>"
364 def __str__(self) -> str:
365 """Multiline string representation of current instance, with data it contains"""
366 lf = "\n"
367 ret = repr(self)
368 for attr in sorted(self._data):
369 if attr in self.SECRETS_ATTRIBUTES:
370 ret += f"{lf} - {attr}: <SECRET_VALUE({type(getattr(self, attr))})>"
371 else:
372 ret += f"{lf} - {attr}: {repr(getattr(self, attr))}"
373 return ret