Coverage for lib/datamodel/dataschema.py: 96%

140 statements  

« prev     ^ index     » next       coverage.py v7.10.1, created at 2025-07-28 07:25 +0000

1#!/usr/bin/env python3 

2# -*- coding: utf-8 -*- 

3 

4# Hermes : Change Data Capture (CDC) tool from any source(s) to any target 

5# Copyright (C) 2023, 2024 INSA Strasbourg 

6# 

7# This file is part of Hermes. 

8# 

9# Hermes is free software: you can redistribute it and/or modify 

10# it under the terms of the GNU General Public License as published by 

11# the Free Software Foundation, either version 3 of the License, or 

12# (at your option) any later version. 

13# 

14# Hermes is distributed in the hope that it will be useful, 

15# but WITHOUT ANY WARRANTY; without even the implied warranty of 

16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 

17# GNU General Public License for more details. 

18# 

19# You should have received a copy of the GNU General Public License 

20# along with Hermes. If not, see <https://www.gnu.org/licenses/>. 

21 

22 

23from copy import deepcopy 

24from typing import Any 

25 

26from lib.datamodel.serialization import LocalCache 

27from lib.datamodel.dataobject import DataObject 

28from lib.datamodel.dataobjectlist import DataObjectList 

29from lib.datamodel.diffobject import DiffObject 

30from lib.datamodel.foreignkey import ForeignKey 

31 

32 

33class HermesInvalidDataschemaError(Exception): 

34 """Raised when the dataschema is invalid""" 

35 

36 

37class HermesInvalidForeignkeysError(Exception): 

38 """Raised when the dataschema contains invalid foreign keys""" 

39 

40 

41class Dataschema(LocalCache): 

42 """Handle the Dataschema computed from server config, or received from server on 

43 clients side 

44 This class will offer the main datamodel types names and their corresponding 

45 DataObject and DataObjectList subclasses in 'objectTypes' and 'objectlistTypes' 

46 attributes. 

47 """ 

48 

49 @classmethod 

50 def migrate_from_v1_0_0_alpha_5_to_v1_0_0_alpha_6( 

51 cls: "Dataschema", jsondict: Any | dict[Any, Any] 

52 ) -> Any | dict[Any, Any]: 

53 # Initialize new FOREIGN_KEYS attribute 

54 for objtypedata in jsondict.values(): 

55 if "FOREIGN_KEYS" not in objtypedata: 

56 objtypedata["FOREIGN_KEYS"] = {} 

57 return jsondict 

58 

59 def __init__( 

60 self, 

61 from_raw_dict: None | dict[str, Any] = None, 

62 from_json_dict: None | dict[str, Any] = None, 

63 ): 

64 """Setup a new DataSchema""" 

65 

66 self.objectTypes: dict[str, type[DataObject]] = {} 

67 """Contains the datamodel object types specified in server datamodel or in 

68 client schema with object name as key, and dynamically created DataObject 

69 subclass as value 

70 """ 

71 

72 self.objectlistTypes: dict[str, type[DataObjectList]] = {} 

73 """Contains the datamodel objectlist types specified in server datamodel or in 

74 client schema with object name as key, and dynamically created DataObjectList 

75 subclass as value 

76 """ 

77 

78 # Args validity check 

79 if from_raw_dict is None and from_json_dict is None: 

80 err = ( 

81 "Cannot instantiate schema from nothing: you must specify one data" 

82 " source" 

83 ) 

84 __hermes__.logger.critical(err) 

85 raise AttributeError(err) 

86 

87 if from_raw_dict is not None and from_json_dict is not None: 

88 err = "Cannot instantiate schema from multiple data sources at once" 

89 __hermes__.logger.critical(err) 

90 raise AttributeError(err) 

91 

92 from_dict: dict[str, Any] = ( 

93 from_raw_dict if from_raw_dict is not None else from_json_dict 

94 ) 

95 

96 if from_json_dict is not None: 

97 # Update data types if imported from json 

98 for typesettings in from_dict.values(): 

99 for k, v in typesettings.items(): 

100 if type(v) is list: 

101 if k == "PRIMARYKEY_ATTRIBUTE": 

102 typesettings[k] = tuple(v) 

103 else: 

104 typesettings[k] = set(v) 

105 super().__init__("schema", "_dataschema") 

106 

107 # Schema validity check 

108 self._schema: dict[str, Any] = {} 

109 

110 for objtype, objdata in from_dict.items(): 

111 for attr, attrtype in { 

112 "HERMES_ATTRIBUTES": [list, tuple, set], 

113 "SECRETS_ATTRIBUTES": [list, tuple, set], 

114 "CACHEONLY_ATTRIBUTES": [list, tuple, set], 

115 "LOCAL_ATTRIBUTES": [list, tuple, set], 

116 "PRIMARYKEY_ATTRIBUTE": [str, list, tuple], 

117 "FOREIGN_KEYS": [dict], 

118 }.items(): 

119 if attr not in objdata: 

120 if attr in ("CACHEONLY_ATTRIBUTES", "LOCAL_ATTRIBUTES"): 

121 objdata[attr] = set() 

122 else: 

123 raise HermesInvalidDataschemaError( 

124 f"'{objtype}' is missing the attribute '{attr}' in received" 

125 " json Dataschema" 

126 ) 

127 if type(objdata[attr]) not in attrtype: 

128 raise HermesInvalidDataschemaError( 

129 f"'{objtype}.{attr}' has wrong type in received json Dataschema" 

130 f" ('{type(objdata[attr])}' instead of '{attrtype}')" 

131 ) 

132 self._schema[objtype] = { 

133 "HERMES_ATTRIBUTES": set(objdata["HERMES_ATTRIBUTES"]), 

134 "SECRETS_ATTRIBUTES": objdata["SECRETS_ATTRIBUTES"], 

135 "CACHEONLY_ATTRIBUTES": objdata["CACHEONLY_ATTRIBUTES"], 

136 "LOCAL_ATTRIBUTES": objdata["LOCAL_ATTRIBUTES"], 

137 "PRIMARYKEY_ATTRIBUTE": objdata["PRIMARYKEY_ATTRIBUTE"], 

138 "FOREIGN_KEYS": objdata["FOREIGN_KEYS"], 

139 } 

140 

141 if "TOSTRING" in objdata: 

142 self._schema[objtype]["TOSTRING"] = objdata["TOSTRING"] 

143 else: 

144 self._schema[objtype]["TOSTRING"] = None 

145 

146 self._setupDataobjects() 

147 self._setupForeignKeys() 

148 

149 def _setupForeignKeys(self): 

150 """Ensure that objtypes and attributes specified in FOREIGN_KEYS exist in 

151 schema, and create ForeignKey instances""" 

152 

153 # List of errors met 

154 errs: list[str] = [] 

155 fkeys: dict[str, list[ForeignKey]] = {} 

156 

157 for objname, objdata in self._schema.items(): 

158 fkeys[objname] = [] 

159 for attr, fk in objdata["FOREIGN_KEYS"].items(): 

160 # Validation 

161 if len(fk) != 2: 

162 errs.append( 

163 f"<{objname}.{attr}>: invalid content. 2 items expected," 

164 f" but {len(fk)} found. It is probably a bug." 

165 ) 

166 continue 

167 fkobjname, fkattr = fk 

168 if attr not in objdata["HERMES_ATTRIBUTES"]: 

169 errs.append( 

170 f"<{objname}.{attr}>: the attribute '{attr}' doesn't exist in" 

171 f" '{objname}' in datamodel" 

172 ) 

173 continue 

174 if type(objdata["PRIMARYKEY_ATTRIBUTE"]) is str: 

175 if attr != objdata["PRIMARYKEY_ATTRIBUTE"]: 

176 errs.append( 

177 f"<{objname}.{attr}>: the attribute '{attr}' isn't the" 

178 f" primary key of '{objname}' in datamodel" 

179 ) 

180 continue 

181 else: 

182 if attr not in objdata["PRIMARYKEY_ATTRIBUTE"]: 

183 errs.append( 

184 f"<{objname}.{attr}>: the attribute '{attr}' isn't a" 

185 f" primary key of '{objname}' in datamodel" 

186 ) 

187 continue 

188 if fkobjname not in self._schema: 

189 errs.append( 

190 f"<{objname}.{attr}>: the objtype '{fkobjname}' doesn't exist" 

191 " in datamodel" 

192 ) 

193 continue 

194 if fkattr not in self._schema[fkobjname]["HERMES_ATTRIBUTES"]: 

195 errs.append( 

196 f"<{objname}.{attr}>: the attribute '{fkattr}' doesn't exist in" 

197 f" '{fkobjname}' in datamodel" 

198 ) 

199 continue 

200 if type(self._schema[fkobjname]["PRIMARYKEY_ATTRIBUTE"]) is not str: 

201 # Implementation may be possible, but with poor performances 

202 errs.append( 

203 f"<{objname}.{attr}>: the objtype '{fkobjname}' has a tuple as" 

204 " primary key, foreign keys can't currently be set on a tuple" 

205 ) 

206 continue 

207 if fkattr != self._schema[fkobjname]["PRIMARYKEY_ATTRIBUTE"]: 

208 errs.append( 

209 f"<{objname}.{attr}>: the attribute '{fkattr}' is not the" 

210 f" primary key of '{fkobjname}' in datamodel" 

211 ) 

212 continue 

213 

214 # No errors, instanciate ForeignKey object 

215 fk = ForeignKey( 

216 from_obj=objname, 

217 from_attr=attr, 

218 to_obj=fkobjname, 

219 to_attr=fkattr, 

220 ) 

221 fkeys[objname].append(fk) 

222 

223 # Add fkeys to objects lists 

224 self.objectlistTypes[objname].FOREIGNKEYS = fkeys[objname] 

225 

226 if errs: 

227 errmsg = "Invalid foreignkeys:\n - " + "\n - ".join(errs) 

228 __hermes__.logger.critical(errmsg) 

229 raise HermesInvalidForeignkeysError(errmsg) 

230 

231 # No errors met, check for circular foreign keys references 

232 for objname in self._schema: 

233 ForeignKey.checkForCircularForeignKeysRefs( 

234 fkeys, self.objectlistTypes[objname].FOREIGNKEYS 

235 ) 

236 

237 def _setupDataobjects(self): 

238 """Set up dynamic subclasses according to schema""" 

239 self._fillObjectTypes(self._schema.keys()) 

240 

241 for objname, objcls in self.objectTypes.items(): 

242 objcls.HERMES_ATTRIBUTES = set(self._schema[objname]["HERMES_ATTRIBUTES"]) 

243 objcls.SECRETS_ATTRIBUTES = self._schema[objname]["SECRETS_ATTRIBUTES"] 

244 objcls.CACHEONLY_ATTRIBUTES = self._schema[objname]["CACHEONLY_ATTRIBUTES"] 

245 objcls.LOCAL_ATTRIBUTES = self._schema[objname]["LOCAL_ATTRIBUTES"] 

246 objcls.PRIMARYKEY_ATTRIBUTE = self._schema[objname]["PRIMARYKEY_ATTRIBUTE"] 

247 objcls.TOSTRING = self._schema[objname]["TOSTRING"] 

248 # Remove TOSTRING as we don't need it anymore, and because a compiled Jinja 

249 # template can't be copied with deepcopy 

250 del self._schema[objname]["TOSTRING"] 

251 __hermes__.logger.debug( 

252 f"<{objname} has been set up from schema>:" 

253 f" PRIMARYKEY_ATTRIBUTE='{objcls.PRIMARYKEY_ATTRIBUTE}'" 

254 f" - HERMES_ATTRIBUTES={objcls.HERMES_ATTRIBUTES}" 

255 f" - SECRETS_ATTRIBUTES={objcls.SECRETS_ATTRIBUTES}" 

256 f" - CACHEONLY_ATTRIBUTES={objcls.CACHEONLY_ATTRIBUTES}" 

257 f" - LOCAL_ATTRIBUTES={objcls.LOCAL_ATTRIBUTES}" 

258 ) 

259 

260 def _fillObjectTypes(self, objnames: list[str]): 

261 """Create empty dynamic subclasses from list of datamodel object types names""" 

262 # Delete old and unused classes if schema has changed 

263 for objname in self.objectTypes.keys() - set(objnames): 

264 del self.objectTypes[objname] 

265 del self.objectlistTypes[objname] 

266 

267 # (Re-)create classes 

268 for objname in objnames: 

269 self.objectTypes[objname] = Dataschema.createSubclass(objname, DataObject) 

270 

271 objlistcls = Dataschema.createSubclass( 

272 # The trailing underscore is here to avoid name conflicts between 

273 # DataObjectLists and server DatamodelFragment DataObjects 

274 objname + "List_", 

275 DataObjectList, 

276 ) 

277 objlistcls.OBJTYPE = self.objectTypes[objname] 

278 self.objectlistTypes[objname] = objlistcls 

279 

280 @staticmethod 

281 def createSubclass(name: str, baseClass: type[Any]) -> type[Any]: 

282 """Dynamically create a subclass of baseClass with specified name, and return 

283 it""" 

284 newclass: type[Any] = type(name, (baseClass,), {}) 

285 newclass._clsname_ = name 

286 return newclass 

287 

288 def diffFrom(self, other: "Dataschema") -> DiffObject: 

289 """Return DiffObject with differences (attributes names) of current instance 

290 from another""" 

291 diff = DiffObject() 

292 

293 s = self.schema.keys() 

294 o = other.schema.keys() 

295 commonattrs = s & o 

296 

297 diff.appendRemoved(o - s) 

298 diff.appendAdded(s - o) 

299 

300 for k, v in self.schema.items(): 

301 if k in commonattrs and DataObject.isDifferent(v, other.schema[k]): 

302 diff.appendModified(k) 

303 

304 return diff 

305 

306 def secretsAttributesOf(self, objtype: str) -> set[str]: 

307 """Returns a set containing the SECRETS_ATTRIBUTES of specified objtype 

308 As the set is returned by reference, it MUST not be modified 

309 """ 

310 return self._schema[objtype]["SECRETS_ATTRIBUTES"] 

311 

312 @property 

313 def schema(self) -> dict[str, Any]: 

314 """Returns the public schema (without CACHEONLY_ATTRIBUTES and 

315 LOCAL_ATTRIBUTES)""" 

316 schema = deepcopy(self._schema) 

317 for objschema in schema.values(): 

318 objschema["HERMES_ATTRIBUTES"] -= ( 

319 objschema["CACHEONLY_ATTRIBUTES"] | objschema["LOCAL_ATTRIBUTES"] 

320 ) 

321 del objschema["CACHEONLY_ATTRIBUTES"] 

322 del objschema["LOCAL_ATTRIBUTES"] 

323 

324 return schema