Coverage for lib/datamodel/dataobject.py: 98%

170 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-28 07:25 +0000

1#!/usr/bin/env python3 

2# -*- coding: utf-8 -*- 

3 

4# Hermes : Change Data Capture (CDC) tool from any source(s) to any target 

5# Copyright (C) 2023, 2024 INSA Strasbourg 

6# 

7# This file is part of Hermes. 

8# 

9# Hermes is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# Hermes is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with Hermes. If not, see <https://www.gnu.org/licenses/>. 

21 

22 

23from lib.datamodel.diffobject import DiffObject 

24from lib.datamodel.serialization import JSONSerializable 

25 

26from jinja2.environment import Template 

27from typing import Any 

28 

29 

30class HermesMergingConflictError(Exception): 

31 """Raised when merging two objects with the same attribute having different 

32 values""" 

33 

34 

35class DataObject(JSONSerializable): 

36 """Generic serializable object to create from several external sources 

37 

38 Subclasses MUST define the following class vars: 

39 - HERMES_TO_REMOTE_MAPPING 

40 - HERMES_ATTRIBUTES 

41 - REMOTE_ATTRIBUTES 

42 - INTERNALATTRIBUTES 

43 - SECRETS_ATTRIBUTES 

44 - LOCAL_ATTRIBUTES 

45 - CACHEONLY_ATTRIBUTES 

46 - PRIMARYKEY_ATTRIBUTE 

47 

48 The class provides 

49 - data storage 

50 - json serialization/deserialization 

51 - full equality/difference operators based on attributes name and content 

52 - diffFrom() function generating DiffFrom object 

53 """ 

54 

55 HERMES_TO_REMOTE_MAPPING: dict[str, Any] = {} 

56 """Mapping dictionary containing datamodel attributes as key, and datasources fields 

57 as values, eventually stored in a list. Used by DataObject only on server side""" 

58 REMOTE_ATTRIBUTES: set[str] = None 

59 """Set containing datamodel fields. Used by DataObject and Datamodel only on server 

60 side""" 

61 HERMES_ATTRIBUTES: set[str] = None 

62 """Set containing datamodel attributes fields. Used by DataObject""" 

63 INTERNALATTRIBUTES: set[str] = set(["_trashbin_timestamp"]) 

64 """Set containing internal datamodel fields. Used by DataObject""" 

65 SECRETS_ATTRIBUTES: set[str] = None 

66 """Set containing password attributes fields. Used by DataObject""" 

67 LOCAL_ATTRIBUTES: set[str] = None 

68 """Set containing attributes names that won't be sent in events, cached or used for 

69 diff. Used by DataObject""" 

70 CACHEONLY_ATTRIBUTES: set[str] = None 

71 """Set containing attributes names that won't be sent in events or used for diff, 

72 but will be cached. Used by DataObject""" 

73 PRIMARYKEY_ATTRIBUTE: str | tuple[str, ...] = None 

74 """String or tuple of strings containing datamodel primary key(s) attribute 

75 name(s)""" 

76 TOSTRING: Template | None = None 

77 """Contains a compiled Jinja template for objects repr/string representation if set 

78 in datamodel, or None to use default one""" 

79 

80 def __init__( 

81 self, 

82 from_remote: dict[str, Any] | None = None, 

83 from_json_dict: dict[str, Any] | None = None, 

84 jinjaContextVars: dict[str, Any] = {}, 

85 ): 

86 """Create a new instance, with data coming from json (for deserialization), 

87 or from remote source. 

88 

89 If data is from json, no check will be done. 

90 If data is from remote, every attributes specified in REMOTE_ATTRIBUTES must 

91 exists in from_remote dict, eventually with None value to be ignored 

92 

93 jinjaContextVars may contains additional vars to pass to Jinja render() method 

94 when called with 'from_remote' 

95 """ 

96 super().__init__(jsondataattr="_jsondata") 

97 self._hash = None 

98 

99 if from_remote is None and from_json_dict is None: 

100 err = ( 

101 "Cannot instantiate object from nothing:" 

102 " you must specify one data source" 

103 ) 

104 __hermes__.logger.critical(err) 

105 raise AttributeError(err) 

106 

107 if from_remote is not None and from_json_dict is not None: 

108 err = "Cannot instantiate object from multiple data sources at once" 

109 __hermes__.logger.critical(err) 

110 raise AttributeError(err) 

111 

112 if from_remote is not None: 

113 self.__init_from_remote__(from_remote, jinjaContextVars) 

114 elif from_json_dict is not None: 

115 self.__init_from_json_dict__(from_json_dict) 

116 

117 def __init_from_remote__( 

118 self, from_remote: dict[str, Any], jinjaContextVars: dict[str, Any] = {} 

119 ): 

120 """Create a new instance from remote source, check that all attributes in 

121 REMOTE_ATTRIBUTES/HERMES_TO_REMOTE_MAPPING are set, and ignore others. 

122 Will render Jinja template if any, passing a merged dict of from_remote 

123 and jinjaContextVars""" 

124 if self.REMOTE_ATTRIBUTES is None: 

125 raise AttributeError( 

126 f"Current class {self.__class__.__name__} can't be instantiated with" 

127 f" 'from_remote' args as {self.__class__.__name__}.REMOTE_ATTRIBUTES" 

128 " is not defined" 

129 ) 

130 missingattrs = self.REMOTE_ATTRIBUTES.difference(from_remote.keys()) 

131 if len(missingattrs) > 0: 

132 err = ( 

133 "Required attributes are missing from specified from_remote dict:" 

134 f" {missingattrs}" 

135 ) 

136 __hermes__.logger.critical(err) 

137 raise AttributeError(err) 

138 

139 self._data: dict[str, Any] = {} 

140 for attr, remoteattr in self.HERMES_TO_REMOTE_MAPPING.items(): 

141 if isinstance(remoteattr, Template): # May be a compiled Jinja Template 

142 result = remoteattr.render(jinjaContextVars | from_remote) 

143 if type(result) is list: 

144 result = [v for v in result if v is not None] 

145 if result is not None and result != [] and result != {}: 

146 self._data[attr] = result 

147 elif type(remoteattr) is str: 

148 val = from_remote[remoteattr] 

149 if val is not None and val != [] and val != {}: 

150 self._data[attr] = from_remote[remoteattr] 

151 elif type(remoteattr) is list: 

152 self._data[attr] = [] 

153 for remoteattritem in remoteattr: 

154 value = from_remote[remoteattritem] 

155 if value is not None: 

156 self._data[attr].append(value) 

157 if len(self._data[attr]) == 0: 

158 del self._data[attr] 

159 else: 

160 err = ( 

161 "Invalid type met in" 

162 f" HERMES_TO_REMOTE_MAPPING['{attr}']: {type(remoteattr)}" 

163 ) 

164 __hermes__.logger.critical(err) 

165 raise AttributeError(err) 

166 

167 def __init_from_json_dict__(self, from_json_dict: dict[str, Any]): 

168 """Create a new instance from json source, without checking anything""" 

169 self._data = from_json_dict.copy() 

170 

171 def __getattribute__(self, attr: str) -> Any: 

172 """Return attribute from "data" dict or from instance""" 

173 try: 

174 return super().__getattribute__("_data")[attr] 

175 except (KeyError, AttributeError, TypeError): 

176 return super().__getattribute__(attr) 

177 

178 def __setattr__(self, attr: str, value: Any): 

179 """Set attribute in "data" dict (and reset instance hash cache) if attrname 

180 exists in HERMES_ATTRIBUTES or INTERNALATTRIBUTES. Otherwise set it in 

181 "standard" python way""" 

182 if attr not in (self.HERMES_ATTRIBUTES | self.INTERNALATTRIBUTES): 

183 super().__setattr__(attr, value) 

184 else: 

185 self._hash = None 

186 self._data[attr] = value 

187 

188 def __delattr__(self, attr: str): 

189 """Remove attribute from "data" dict (and reset instance hash cache) if attrname 

190 exists in it. Otherwise remove it in "standard" python way""" 

191 if attr not in self._data: 

192 super().__delattr__(attr) 

193 else: 

194 self._hash = None 

195 del self._data[attr] 

196 

197 def __eq__(self, other) -> bool: 

198 """Equality operator, computed on hash equality""" 

199 return hash(self) == hash(other) 

200 

201 def __ne__(self, other) -> bool: 

202 """Difference operator, computed on hash difference""" 

203 return hash(self) != hash(other) 

204 

205 def __lt__(self, other) -> bool: 

206 """Less than operator, used for sorting. Computed on primary key comparison""" 

207 return self.getPKey() < other.getPKey() 

208 

209 @staticmethod 

210 def _complexhash(data: Any) -> int: 

211 """Recursive hash of dict, list, tuple, set and standard hashable values""" 

212 # hash based on attrnames and values, ignoring internal attributes 

213 if isinstance(data, dict): 

214 keys = tuple(sorted(set(data.keys()))) 

215 _hash = hash( 

216 ( 

217 hash(keys), 

218 hash(tuple([DataObject._complexhash(data[k]) for k in keys])), 

219 ) 

220 ) 

221 elif isinstance(data, (list, set, tuple)): 

222 _hash = hash(tuple([DataObject._complexhash(i) for i in data])) 

223 else: 

224 _hash = hash(data) 

225 

226 return _hash 

227 

228 def __hash__(self) -> int: 

229 """Hash operator, compute hash based on attrnames and values, ignoring internal, 

230 local and cacheonly attributes. As the computation is slow, the value is cached 

231 """ 

232 if self._hash is None: 

233 keys = tuple( 

234 sorted( 

235 set(self._data.keys()) 

236 - self.INTERNALATTRIBUTES 

237 - self.LOCAL_ATTRIBUTES 

238 - self.CACHEONLY_ATTRIBUTES 

239 ) 

240 ) 

241 self._hash = self._complexhash( 

242 ( 

243 self._complexhash(keys), 

244 self._complexhash([self._data[k] for k in keys]), 

245 ) 

246 ) 

247 return self._hash 

248 

249 @property 

250 def _jsondata(self) -> dict[str, Any]: 

251 """Return serializable data (all data minus LOCAL_ATTRIBUTES and 

252 SECRETS_ATTRIBUTES)""" 

253 return { 

254 k: self._data[k] 

255 for k in sorted(self._data.keys()) 

256 if k not in self.LOCAL_ATTRIBUTES | self.SECRETS_ATTRIBUTES 

257 } 

258 

259 def diffFrom(self, other: "DataObject") -> DiffObject: 

260 """Return DiffObject with differences (attributes names) of current instance 

261 from another""" 

262 diff = DiffObject(self, other) 

263 

264 s = set( 

265 self._data.keys() 

266 - self.INTERNALATTRIBUTES 

267 - self.LOCAL_ATTRIBUTES 

268 - self.CACHEONLY_ATTRIBUTES 

269 ) 

270 o = set( 

271 other._data.keys() 

272 - self.INTERNALATTRIBUTES 

273 - self.LOCAL_ATTRIBUTES 

274 - self.CACHEONLY_ATTRIBUTES 

275 ) 

276 commonattrs = s & o 

277 

278 diff.appendRemoved(o - s) 

279 diff.appendAdded(s - o) 

280 

281 for k, v in self._data.items(): 

282 if k in commonattrs and DataObject.isDifferent(v, other._data[k]): 

283 diff.appendModified(k) 

284 

285 return diff 

286 

287 @staticmethod 

288 def isDifferent(a: Any, b: Any) -> bool: 

289 """Test true difference between two object: recursive compare of type, 

290 len and values""" 

291 if type(a) is not type(b): 

292 return True 

293 

294 if type(a) is list: 

295 if len(a) != len(b): 

296 return True 

297 else: 

298 for i in range(len(a)): 

299 if DataObject.isDifferent(a[i], b[i]): 

300 return True 

301 elif type(a) is dict: 

302 if a.keys() != b.keys(): 

303 return True 

304 else: 

305 for k in a.keys(): 

306 if DataObject.isDifferent(a[k], b[k]): 

307 return True 

308 else: 

309 return a != b 

310 

311 return False 

312 

313 def toNative(self) -> dict[str, Any]: 

314 """Return complete data dict""" 

315 return self._data 

316 

317 def toEvent(self) -> dict[str, Any]: 

318 """Return data to send in Event (all data minus LOCAL_ATTRIBUTES and 

319 CACHEONLY_ATTRIBUTES)""" 

320 return { 

321 k: self._data[k] 

322 for k in self._data.keys() 

323 - self.LOCAL_ATTRIBUTES 

324 - self.CACHEONLY_ATTRIBUTES 

325 } 

326 

327 def mergeWith(self, other: "DataObject", raiseExceptionOnConflict=False): 

328 """Merge data of current instance with another""" 

329 for k, v in other._data.items(): 

330 if not hasattr(self, k): 

331 # Attribute wasn't set, so set it from other's value 

332 setattr(self, k, v) 

333 elif self.isDifferent(v, getattr(self, k)): 

334 # Attribute was set, and had another value than other's 

335 err = ( 

336 f"Merging conflict. Attribute '{k}' exist on both objects with" 

337 f" differents values ({repr(self)}:" 

338 f" '{getattr(self, k)}' / {repr(other)}: '{v}')" 

339 ) 

340 if raiseExceptionOnConflict: 

341 raise HermesMergingConflictError(err) 

342 else: 

343 __hermes__.logger.debug(f"{err}. The first one is kept") 

344 # else: attributes have same value 

345 

346 def getPKey(self) -> Any: 

347 """Return primary key value""" 

348 if type(self.PRIMARYKEY_ATTRIBUTE) is tuple: 

349 return tuple([getattr(self, key) for key in self.PRIMARYKEY_ATTRIBUTE]) 

350 else: 

351 return getattr(self, self.PRIMARYKEY_ATTRIBUTE) 

352 

353 def getType(self) -> str: 

354 """Return current class name""" 

355 return self.__class__.__name__ 

356 

357 def __repr__(self) -> str: 

358 """String representation of current instance""" 

359 if isinstance(self.TOSTRING, Template): 

360 return self.TOSTRING.render(self._data) 

361 else: 

362 return f"<{self.getType()}[{self.getPKey()}]>" 

363 

364 def __str__(self) -> str: 

365 """Multiline string representation of current instance, with data it contains""" 

366 lf = "\n" 

367 ret = repr(self) 

368 for attr in sorted(self._data): 

369 if attr in self.SECRETS_ATTRIBUTES: 

370 ret += f"{lf} - {attr}: <SECRET_VALUE({type(getattr(self, attr))})>" 

371 else: 

372 ret += f"{lf} - {attr}: {repr(getattr(self, attr))}" 

373 return ret