Coverage for lib/datamodel/dataschema.py: 96%
140 statements
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-28 07:25 +0000
« prev ^ index » next coverage.py v7.10.1, created at 2025-07-28 07:25 +0000
1#!/usr/bin/env python3
2# -*- coding: utf-8 -*-
4# Hermes : Change Data Capture (CDC) tool from any source(s) to any target
5# Copyright (C) 2023, 2024 INSA Strasbourg
6#
7# This file is part of Hermes.
8#
9# Hermes is free software: you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation, either version 3 of the License, or
12# (at your option) any later version.
13#
14# Hermes is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with Hermes. If not, see <https://www.gnu.org/licenses/>.
23from copy import deepcopy
24from typing import Any
26from lib.datamodel.serialization import LocalCache
27from lib.datamodel.dataobject import DataObject
28from lib.datamodel.dataobjectlist import DataObjectList
29from lib.datamodel.diffobject import DiffObject
30from lib.datamodel.foreignkey import ForeignKey
33class HermesInvalidDataschemaError(Exception):
34 """Raised when the dataschema is invalid"""
37class HermesInvalidForeignkeysError(Exception):
38 """Raised when the dataschema contains invalid foreign keys"""
41class Dataschema(LocalCache):
42 """Handle the Dataschema computed from server config, or received from server on
43 clients side
44 This class will offer the main datamodel types names and their corresponding
45 DataObject and DataObjectList subclasses in 'objectTypes' and 'objectlistTypes'
46 attributes.
47 """
49 @classmethod
50 def migrate_from_v1_0_0_alpha_5_to_v1_0_0_alpha_6(
51 cls: "Dataschema", jsondict: Any | dict[Any, Any]
52 ) -> Any | dict[Any, Any]:
53 # Initialize new FOREIGN_KEYS attribute
54 for objtypedata in jsondict.values():
55 if "FOREIGN_KEYS" not in objtypedata:
56 objtypedata["FOREIGN_KEYS"] = {}
57 return jsondict
59 def __init__(
60 self,
61 from_raw_dict: None | dict[str, Any] = None,
62 from_json_dict: None | dict[str, Any] = None,
63 ):
64 """Setup a new DataSchema"""
66 self.objectTypes: dict[str, type[DataObject]] = {}
67 """Contains the datamodel object types specified in server datamodel or in
68 client schema with object name as key, and dynamically created DataObject
69 subclass as value
70 """
72 self.objectlistTypes: dict[str, type[DataObjectList]] = {}
73 """Contains the datamodel objectlist types specified in server datamodel or in
74 client schema with object name as key, and dynamically created DataObjectList
75 subclass as value
76 """
78 # Args validity check
79 if from_raw_dict is None and from_json_dict is None:
80 err = (
81 "Cannot instantiate schema from nothing: you must specify one data"
82 " source"
83 )
84 __hermes__.logger.critical(err)
85 raise AttributeError(err)
87 if from_raw_dict is not None and from_json_dict is not None:
88 err = "Cannot instantiate schema from multiple data sources at once"
89 __hermes__.logger.critical(err)
90 raise AttributeError(err)
92 from_dict: dict[str, Any] = (
93 from_raw_dict if from_raw_dict is not None else from_json_dict
94 )
96 if from_json_dict is not None:
97 # Update data types if imported from json
98 for typesettings in from_dict.values():
99 for k, v in typesettings.items():
100 if type(v) is list:
101 if k == "PRIMARYKEY_ATTRIBUTE":
102 typesettings[k] = tuple(v)
103 else:
104 typesettings[k] = set(v)
105 super().__init__("schema", "_dataschema")
107 # Schema validity check
108 self._schema: dict[str, Any] = {}
110 for objtype, objdata in from_dict.items():
111 for attr, attrtype in {
112 "HERMES_ATTRIBUTES": [list, tuple, set],
113 "SECRETS_ATTRIBUTES": [list, tuple, set],
114 "CACHEONLY_ATTRIBUTES": [list, tuple, set],
115 "LOCAL_ATTRIBUTES": [list, tuple, set],
116 "PRIMARYKEY_ATTRIBUTE": [str, list, tuple],
117 "FOREIGN_KEYS": [dict],
118 }.items():
119 if attr not in objdata:
120 if attr in ("CACHEONLY_ATTRIBUTES", "LOCAL_ATTRIBUTES"):
121 objdata[attr] = set()
122 else:
123 raise HermesInvalidDataschemaError(
124 f"'{objtype}' is missing the attribute '{attr}' in received"
125 " json Dataschema"
126 )
127 if type(objdata[attr]) not in attrtype:
128 raise HermesInvalidDataschemaError(
129 f"'{objtype}.{attr}' has wrong type in received json Dataschema"
130 f" ('{type(objdata[attr])}' instead of '{attrtype}')"
131 )
132 self._schema[objtype] = {
133 "HERMES_ATTRIBUTES": set(objdata["HERMES_ATTRIBUTES"]),
134 "SECRETS_ATTRIBUTES": objdata["SECRETS_ATTRIBUTES"],
135 "CACHEONLY_ATTRIBUTES": objdata["CACHEONLY_ATTRIBUTES"],
136 "LOCAL_ATTRIBUTES": objdata["LOCAL_ATTRIBUTES"],
137 "PRIMARYKEY_ATTRIBUTE": objdata["PRIMARYKEY_ATTRIBUTE"],
138 "FOREIGN_KEYS": objdata["FOREIGN_KEYS"],
139 }
141 if "TOSTRING" in objdata:
142 self._schema[objtype]["TOSTRING"] = objdata["TOSTRING"]
143 else:
144 self._schema[objtype]["TOSTRING"] = None
146 self._setupDataobjects()
147 self._setupForeignKeys()
149 def _setupForeignKeys(self):
150 """Ensure that objtypes and attributes specified in FOREIGN_KEYS exist in
151 schema, and create ForeignKey instances"""
153 # List of errors met
154 errs: list[str] = []
155 fkeys: dict[str, list[ForeignKey]] = {}
157 for objname, objdata in self._schema.items():
158 fkeys[objname] = []
159 for attr, fk in objdata["FOREIGN_KEYS"].items():
160 # Validation
161 if len(fk) != 2:
162 errs.append(
163 f"<{objname}.{attr}>: invalid content. 2 items expected,"
164 f" but {len(fk)} found. It is probably a bug."
165 )
166 continue
167 fkobjname, fkattr = fk
168 if attr not in objdata["HERMES_ATTRIBUTES"]:
169 errs.append(
170 f"<{objname}.{attr}>: the attribute '{attr}' doesn't exist in"
171 f" '{objname}' in datamodel"
172 )
173 continue
174 if type(objdata["PRIMARYKEY_ATTRIBUTE"]) is str:
175 if attr != objdata["PRIMARYKEY_ATTRIBUTE"]:
176 errs.append(
177 f"<{objname}.{attr}>: the attribute '{attr}' isn't the"
178 f" primary key of '{objname}' in datamodel"
179 )
180 continue
181 else:
182 if attr not in objdata["PRIMARYKEY_ATTRIBUTE"]:
183 errs.append(
184 f"<{objname}.{attr}>: the attribute '{attr}' isn't a"
185 f" primary key of '{objname}' in datamodel"
186 )
187 continue
188 if fkobjname not in self._schema:
189 errs.append(
190 f"<{objname}.{attr}>: the objtype '{fkobjname}' doesn't exist"
191 " in datamodel"
192 )
193 continue
194 if fkattr not in self._schema[fkobjname]["HERMES_ATTRIBUTES"]:
195 errs.append(
196 f"<{objname}.{attr}>: the attribute '{fkattr}' doesn't exist in"
197 f" '{fkobjname}' in datamodel"
198 )
199 continue
200 if type(self._schema[fkobjname]["PRIMARYKEY_ATTRIBUTE"]) is not str:
201 # Implementation may be possible, but with poor performances
202 errs.append(
203 f"<{objname}.{attr}>: the objtype '{fkobjname}' has a tuple as"
204 " primary key, foreign keys can't currently be set on a tuple"
205 )
206 continue
207 if fkattr != self._schema[fkobjname]["PRIMARYKEY_ATTRIBUTE"]:
208 errs.append(
209 f"<{objname}.{attr}>: the attribute '{fkattr}' is not the"
210 f" primary key of '{fkobjname}' in datamodel"
211 )
212 continue
214 # No errors, instanciate ForeignKey object
215 fk = ForeignKey(
216 from_obj=objname,
217 from_attr=attr,
218 to_obj=fkobjname,
219 to_attr=fkattr,
220 )
221 fkeys[objname].append(fk)
223 # Add fkeys to objects lists
224 self.objectlistTypes[objname].FOREIGNKEYS = fkeys[objname]
226 if errs:
227 errmsg = "Invalid foreignkeys:\n - " + "\n - ".join(errs)
228 __hermes__.logger.critical(errmsg)
229 raise HermesInvalidForeignkeysError(errmsg)
231 # No errors met, check for circular foreign keys references
232 for objname in self._schema:
233 ForeignKey.checkForCircularForeignKeysRefs(
234 fkeys, self.objectlistTypes[objname].FOREIGNKEYS
235 )
237 def _setupDataobjects(self):
238 """Set up dynamic subclasses according to schema"""
239 self._fillObjectTypes(self._schema.keys())
241 for objname, objcls in self.objectTypes.items():
242 objcls.HERMES_ATTRIBUTES = set(self._schema[objname]["HERMES_ATTRIBUTES"])
243 objcls.SECRETS_ATTRIBUTES = self._schema[objname]["SECRETS_ATTRIBUTES"]
244 objcls.CACHEONLY_ATTRIBUTES = self._schema[objname]["CACHEONLY_ATTRIBUTES"]
245 objcls.LOCAL_ATTRIBUTES = self._schema[objname]["LOCAL_ATTRIBUTES"]
246 objcls.PRIMARYKEY_ATTRIBUTE = self._schema[objname]["PRIMARYKEY_ATTRIBUTE"]
247 objcls.TOSTRING = self._schema[objname]["TOSTRING"]
248 # Remove TOSTRING as we don't need it anymore, and because a compiled Jinja
249 # template can't be copied with deepcopy
250 del self._schema[objname]["TOSTRING"]
251 __hermes__.logger.debug(
252 f"<{objname} has been set up from schema>:"
253 f" PRIMARYKEY_ATTRIBUTE='{objcls.PRIMARYKEY_ATTRIBUTE}'"
254 f" - HERMES_ATTRIBUTES={objcls.HERMES_ATTRIBUTES}"
255 f" - SECRETS_ATTRIBUTES={objcls.SECRETS_ATTRIBUTES}"
256 f" - CACHEONLY_ATTRIBUTES={objcls.CACHEONLY_ATTRIBUTES}"
257 f" - LOCAL_ATTRIBUTES={objcls.LOCAL_ATTRIBUTES}"
258 )
260 def _fillObjectTypes(self, objnames: list[str]):
261 """Create empty dynamic subclasses from list of datamodel object types names"""
262 # Delete old and unused classes if schema has changed
263 for objname in self.objectTypes.keys() - set(objnames):
264 del self.objectTypes[objname]
265 del self.objectlistTypes[objname]
267 # (Re-)create classes
268 for objname in objnames:
269 self.objectTypes[objname] = Dataschema.createSubclass(objname, DataObject)
271 objlistcls = Dataschema.createSubclass(
272 # The trailing underscore is here to avoid name conflicts between
273 # DataObjectLists and server DatamodelFragment DataObjects
274 objname + "List_",
275 DataObjectList,
276 )
277 objlistcls.OBJTYPE = self.objectTypes[objname]
278 self.objectlistTypes[objname] = objlistcls
280 @staticmethod
281 def createSubclass(name: str, baseClass: type[Any]) -> type[Any]:
282 """Dynamically create a subclass of baseClass with specified name, and return
283 it"""
284 newclass: type[Any] = type(name, (baseClass,), {})
285 newclass._clsname_ = name
286 return newclass
288 def diffFrom(self, other: "Dataschema") -> DiffObject:
289 """Return DiffObject with differences (attributes names) of current instance
290 from another"""
291 diff = DiffObject()
293 s = self.schema.keys()
294 o = other.schema.keys()
295 commonattrs = s & o
297 diff.appendRemoved(o - s)
298 diff.appendAdded(s - o)
300 for k, v in self.schema.items():
301 if k in commonattrs and DataObject.isDifferent(v, other.schema[k]):
302 diff.appendModified(k)
304 return diff
306 def secretsAttributesOf(self, objtype: str) -> set[str]:
307 """Returns a set containing the SECRETS_ATTRIBUTES of specified objtype
308 As the set is returned by reference, it MUST not be modified
309 """
310 return self._schema[objtype]["SECRETS_ATTRIBUTES"]
312 @property
313 def schema(self) -> dict[str, Any]:
314 """Returns the public schema (without CACHEONLY_ATTRIBUTES and
315 LOCAL_ATTRIBUTES)"""
316 schema = deepcopy(self._schema)
317 for objschema in schema.values():
318 objschema["HERMES_ATTRIBUTES"] -= (
319 objschema["CACHEONLY_ATTRIBUTES"] | objschema["LOCAL_ATTRIBUTES"]
320 )
321 del objschema["CACHEONLY_ATTRIBUTES"]
322 del objschema["LOCAL_ATTRIBUTES"]
324 return schema