Coverage for jsonsubschema/_canonicalization.py : 51%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1'''
2Created on June 24, 2019
3@author: Andrew Habib
4'''
6import copy
7import jsonschema
8import numbers
9import numpy
10import sys
12import jsonsubschema._constants as definitions
13import jsonsubschema._utils as utils
14from jsonsubschema._checkers import (
15 typeToConstructor,
16 boolToConstructor,
17 JSONtop,
18 JSONbot
19)
20from jsonsubschema.exceptions import UnsupportedEnumCanonicalization
22TOP = {}
23BOT = {"not": {}}
26def canonicalize_schema(obj):
27 # First, make sure the given json is a valid json schema.
28 # should throw jsonschema.SchemaError on unknown types
29 utils.validate_schema(obj)
31 # Second, canonicalize the schema.
32 if utils.is_dict(obj): 32 ↛ 37line 32 didn't jump to line 37, because the condition on line 32 was never false
33 print('bommmm')
34 canonical_schema = canonicalize_dict(obj)
36 # Finally, ensure that canonicalized schema is till a valid json schema.
37 utils.validate_schema(canonical_schema)
39 return canonical_schema
42def canonicalize_dict(d, outer_key=None):
43 # not actually needed, but for testing
44 # canonicalization to work properly;
45 if d == {} or d == {"not": {}}: 45 ↛ 46, 45 ↛ 622 missed branches: 1) line 45 didn't jump to line 46, because the condition on line 45 was never true, 2) line 45 didn't jump to line 62, because the condition on line 45 was never false
46 return d
48 # Ignore (drop) any other validatoin keyword when there is a $ref
49 # Currently, jsonref handles this case properly,
50 # We might need to handle it again on out own when
51 # we handle recursive $ref independently from jsonref.
52 # if d.get("$ref"):
53 # for k in list(d.keys()):
54 # if k != "$ref" and k not in definitions.JNonValidation:
55 # del d[k]
57 # Skip normal dict canonicalization
58 # for object.properties;
59 # patternProperties;
60 # dependencies
61 # because these should be usual dict containers.
62 if outer_key in ["properties", "patternProperties"]: 62 ↛ 66line 62 didn't jump to line 66, because the condition on line 62 was never false
63 for k, v in d.items(): 63 ↛ 64, 63 ↛ 652 missed branches: 1) line 63 didn't jump to line 64, because the loop on line 63 never started, 2) line 63 didn't jump to line 65, because the loop on line 63 didn't complete
64 d[k] = canonicalize_dict(v)
65 return d
66 if outer_key == "dependencies": 66 ↛ 73line 66 didn't jump to line 73, because the condition on line 66 was never false
67 for k, v in d.items(): 67 ↛ 70line 67 didn't jump to line 70, because the loop on line 67 didn't complete
68 if utils.is_dict(v): 68 ↛ 67, 68 ↛ 692 missed branches: 1) line 68 didn't jump to line 67, because the condition on line 68 was never false, 2) line 68 didn't jump to line 69, because the condition on line 68 was never true
69 d[k] = canonicalize_dict(v)
70 return d
72 # here, start dict canonicalization
73 if not definitions.Jkeywords.intersection(d.keys()): 73 ↛ 74, 73 ↛ 762 missed branches: 1) line 73 didn't jump to line 74, because the condition on line 73 was never true, 2) line 73 didn't jump to line 76, because the condition on line 73 was never false
74 return d
76 t = d.get("type")
77 has_connectors = definitions.Jconnectors.intersection(d.keys())
79 # Start canonicalization. Don't modify original dict.
80 d = copy.deepcopy(d)
82 if has_connectors: 82 ↛ 83, 82 ↛ 842 missed branches: 1) line 82 didn't jump to line 83, because the condition on line 82 was never true, 2) line 82 didn't jump to line 84, because the condition on line 82 was never false
83 return canonicalize_connectors(d)
84 elif "enum" in d.keys(): 84 ↛ 85, 84 ↛ 862 missed branches: 1) line 84 didn't jump to line 85, because the condition on line 84 was never true, 2) line 84 didn't jump to line 86, because the condition on line 84 was never false
85 return canonicalize_enum(d)
86 elif utils.is_str(t): 86 ↛ 87, 86 ↛ 882 missed branches: 1) line 86 didn't jump to line 87, because the condition on line 86 was never true, 2) line 86 didn't jump to line 88, because the condition on line 86 was never false
87 return canonicalize_single_type(d)
88 elif utils.is_list(t): 88 ↛ 89, 88 ↛ 912 missed branches: 1) line 88 didn't jump to line 89, because the condition on line 88 was never true, 2) line 88 didn't jump to line 91, because the condition on line 88 was never false
89 return canonicalize_list_of_types(d)
90 else:
91 d["type"] = definitions.Jtypes
92 return canonicalize_list_of_types(d)
95def canonicalize_single_type(d):
96 t = d.get("type")
97 if t in definitions.Jtypes:
98 # Remove irrelevant keywords
99 for k, v in list(d.items()): 99 ↛ 116line 99 didn't jump to line 116, because the loop on line 99 didn't complete
100 if k not in definitions.Jcommonkw and k not in definitions.JtypesToKeywords.get(t) and k not in definitions.JNonValidation: 100 ↛ 101, 100 ↛ 1022 missed branches: 1) line 100 didn't jump to line 101, because the condition on line 100 was never true, 2) line 100 didn't jump to line 102, because the condition on line 100 was never false
101 d.pop(k)
102 elif utils.is_dict(v): 102 ↛ 103, 102 ↛ 1042 missed branches: 1) line 102 didn't jump to line 103, because the condition on line 102 was never true, 2) line 102 didn't jump to line 104, because the condition on line 102 was never false
103 d[k] = canonicalize_dict(v, k)
104 elif utils.is_list(v): 104 ↛ 99line 104 didn't jump to line 99, because the condition on line 104 was never false
105 if k == "enum": 105 ↛ 106, 105 ↛ 1112 missed branches: 1) line 105 didn't jump to line 106, because the condition on line 105 was never true, 2) line 105 didn't jump to line 111, because the condition on line 105 was never false
106 v = utils.get_typed_enum_vals(v, t)
107 # if not v:
108 # return BOT
109 # else:
110 d[k] = v
111 elif k == "required": 111 ↛ 112, 111 ↛ 1152 missed branches: 1) line 111 didn't jump to line 112, because the condition on line 111 was never true, 2) line 111 didn't jump to line 115, because the condition on line 111 was never false
112 d[k] = sorted(set(v))
113 else:
114 # "list" must be operand of boolean connectors
115 d[k] = [canonicalize_dict(i) for i in v] 115 ↛ exit, 115 ↛ 992 missed branches: 1) line 115 didn't run the list comprehension on line 115, 2) line 115 didn't jump to line 99
116 if "enum" in d: 116 ↛ 117, 116 ↛ 1192 missed branches: 1) line 116 didn't jump to line 117, because the condition on line 116 was never true, 2) line 116 didn't jump to line 119, because the condition on line 116 was never false
117 return rewrite_enum(d)
118 else:
119 return d
121 # jsonschema validation in the begining prevents
122 # reaching this case. So we don't need this.
123 # else:
124 # print("Unknown schema type {} at:".format(t))
125 # print(d)
126 # print("Exiting...")
127 # sys.exit(1)
130def canonicalize_list_of_types(d):
131 t = sorted(d.get("type"))
132 anyofs = []
133 for t_i in t: 133 ↛ 151line 133 didn't jump to line 151, because the loop on line 133 didn't complete
134 if t_i in definitions.Jtypes: 134 ↛ 133line 134 didn't jump to line 133, because the condition on line 134 was never false
135 s_i = copy.deepcopy(d)
136 s_i["type"] = t_i
137 s_i = canonicalize_single_type(s_i)
138 anyofs.append(s_i)
140 # jsonschema validation in the begining prevents
141 # reaching this case. So we don't need this.
142 # else:
143 # print("Unknown schema type {} at: {}".format(t_i, t))
144 # print(d)
145 # print("Exiting...")
146 # sys.exit(1)
148 # if len(anyofs) == 1:
149 # return anyofs[0]
150 # elif len(anyofs) > 1:
151 return {"anyOf": anyofs}
154def canonicalize_enum(d):
155 valid_vals = utils.get_valid_enum_vals(d["enum"], d)
156 if not valid_vals: 156 ↛ 157, 156 ↛ 1592 missed branches: 1) line 156 didn't jump to line 157, because the condition on line 156 was never true, 2) line 156 didn't jump to line 159, because the condition on line 156 was never false
157 return BOT
159 d["enum"] = valid_vals
160 actual_t = sorted(
161 set(map(lambda i: definitions.PyTypesToJtypes.get(type(i)), d.get("enum"))))
162 if "type" in d: 162 ↛ 167line 162 didn't jump to line 167, because the condition on line 162 was never false
163 orig_t = d["type"]
164 orig_t = set([orig_t]) if utils.is_str(orig_t) else set(orig_t)
165 d["type"] = orig_t.intersection(actual_t)
166 else:
167 d["type"] = actual_t
168 return canonicalize_list_of_types(d)
171def canonicalize_connectors(d):
172 connectors = definitions.Jconnectors.intersection(d.keys())
173 lhs_kw = definitions.Jkeywords.intersection(d.keys())
174 lhs_kw_without_connectors = lhs_kw.difference(connectors)
176 # Single connector.
177 if len(connectors) == 1 and not lhs_kw_without_connectors: 177 ↛ 178, 177 ↛ 2062 missed branches: 1) line 177 didn't jump to line 178, because the condition on line 177 was never true, 2) line 177 didn't jump to line 206, because the condition on line 177 was never false
178 c = connectors.pop()
180 if c == "not": 180 ↛ 184line 180 didn't jump to line 184, because the condition on line 180 was never false
181 d["not"] = canonicalize_dict(d["not"])
182 return canonicalize_not(d)
184 elif c == "oneOf": 184 ↛ 199line 184 didn't jump to line 199, because the condition on line 184 was never false
185 if len(d[c]) == 1: 185 ↛ 186, 185 ↛ 1872 missed branches: 1) line 185 didn't jump to line 186, because the condition on line 185 was never true, 2) line 185 didn't jump to line 187, because the condition on line 185 was never false
186 return canonicalize_dict(d[c].pop())
187 anyofs = []
188 for i in range(len(d[c])): 188 ↛ 194line 188 didn't jump to line 194, because the loop on line 188 didn't complete
189 one = [d[c][i]]
190 nots = [{"not": j} for j in d[c][:i]] + [{"not": j} 190 ↛ exitline 190 didn't run the list comprehension on line 190 or line 190 didn't run the list comprehension on line 190
191 for j in d[c][i+1:]]
192 allofs = one + nots
193 anyofs.append({"allOf": allofs})
194 return canonicalize_connectors({"anyOf": anyofs})
196 # Here, the connector is either allOf or oneOf
197 # So we better simplify them before proceeding more.
198 else:
199 d[c] = [canonicalize_dict(i) for i in d[c]]
200 # return d
201 simplified = simplify_schema_and_embed_checkers(d)
202 return simplified
204 # Connector + other keywords. Combine them first.
205 else:
206 allofs = []
207 for c in connectors: 207 ↛ 210line 207 didn't jump to line 210, because the loop on line 207 didn't complete
208 allofs.append(canonicalize_dict({c: d[c]}))
209 del d[c]
210 if lhs_kw_without_connectors: 210 ↛ 213line 210 didn't jump to line 213, because the condition on line 210 was never false
211 allofs.append(canonicalize_dict( 211 ↛ 213line 211 didn't jump to line 213
212 {k: d[k] for k in lhs_kw_without_connectors}))
213 return {"allOf": allofs}
214 # return simplify_schema_and_embed_checkers({"allOf": allofs})
217def canonicalize_not(d):
218 # d: {} has a 'not' schema
219 negated_schema = d["not"]
221 t = negated_schema.get("type")
223 # if "enum" in negated_schema:
224 # return canonicalize_negated_enum(negated_schema)
226 if negated_schema == {} or t in definitions.Jtypes: 226 ↛ 227, 226 ↛ 2292 missed branches: 1) line 226 didn't jump to line 227, because the condition on line 226 was never true, 2) line 226 didn't jump to line 229, because the condition on line 226 was never false
227 return d
229 connectors = definitions.Jconnectors.intersection(negated_schema.keys())
230 if connectors and len(connectors) == 1: 230 ↛ 231, 230 ↛ 2572 missed branches: 1) line 230 didn't jump to line 231, because the condition on line 230 was never true, 2) line 230 didn't jump to line 257, because the condition on line 230 was never false
231 c = connectors.pop()
232 # Case "not: {"not": {...}}
233 # Return positive schema (2 nots cancel each other)
234 if c == "not": 234 ↛ 235, 234 ↛ 2372 missed branches: 1) line 234 didn't jump to line 235, because the condition on line 234 was never true, 2) line 234 didn't jump to line 237, because the condition on line 234 was never false
235 return negated_schema["not"]
237 elif c == "anyOf": 237 ↛ 245line 237 didn't jump to line 245, because the condition on line 237 was never false
238 allofs = []
239 for i in negated_schema["anyOf"]: 239 ↛ 240, 239 ↛ 2412 missed branches: 1) line 239 didn't jump to line 240, because the loop on line 239 never started, 2) line 239 didn't jump to line 241, because the loop on line 239 didn't complete
240 allofs.append(canonicalize_not({"not": i}))
241 return {"allOf": allofs}
243 # Should not reach here. Should be canonicalized and
244 # simplified by now.
245 elif c == "allOf":
246 # anyofs = []
247 # for i in negated_schema["allOf"]:
248 # anyofs.append(canonicalize_not({"not": i}))
249 # return {"anyOf": anyofs}
250 return canonicalize_not({'not': canonicalize_connectors(negated_schema)})
252 # anyofs.append(canonicalize_not({"not": i}))
253 # Should not reach here. Should be canonicalized by now.
254 # elif c == "oneOf":
255 # return canonicalize_not({"not": canonicalize_connectors(negated_schema)})
256 else:
257 sys.exit(">>>>>> Ewwwww! Shouldn't be here during canonicalization. <<<<<<")
260def rewrite_enum(d):
261 t = d.get("type")
262 enum = d.get("enum")
263 ret = None
265 if t == "string": 265 ↛ 269line 265 didn't jump to line 269, because the condition on line 265 was never false
266 pattern = "|".join(map(lambda x: "^"+str(x)+"$", enum)) 266 ↛ exit, 266 ↛ 2672 missed branches: 1) line 266 didn't run the lambda on line 266, 2) line 266 didn't jump to line 267
267 ret = {"type": "string", "pattern": pattern}
269 if t == "integer": 269 ↛ 276line 269 didn't jump to line 276, because the condition on line 269 was never false
270 ret = {"anyOf": []}
271 for i in enum: 271 ↛ 276line 271 didn't jump to line 276, because the loop on line 271 didn't complete
272 ret["anyOf"].append(
273 # {"type": "number", "minimum": i, "maximum": i, "multipleOf": 1}) # check test_numeric/test_join_mulof10
274 {"type": "integer", "minimum": i, "maximum": i})
276 if t == "number": 276 ↛ 288line 276 didn't jump to line 288, because the condition on line 276 was never false
277 ret = {"anyOf": []}
278 for i in enum: 278 ↛ 288line 278 didn't jump to line 288, because the loop on line 278 didn't complete
279 if utils.is_int_equiv(i): 279 ↛ 282line 279 didn't jump to line 282, because the condition on line 279 was never false
280 ret["anyOf"].append(
281 {"type": "integer", "minimum": i, "maximum": i})
282 elif numpy.isnan(i): 282 ↛ 283, 282 ↛ 2852 missed branches: 1) line 282 didn't jump to line 283, because the condition on line 282 was never true, 2) line 282 didn't jump to line 285, because the condition on line 282 was never false
283 ret["anyOf"].append({"type": "number", "enum": [numpy.NaN]})
284 else:
285 ret["anyOf"].append(
286 {"type": "number", "minimum": i, "maximum": i})
288 if t == "boolean":
289 # booleans are allowed to keep enums,
290 # since there are only two values.
291 return d
293 if t == "null":
294 # null schema should be rewritten without enum
295 # it is a single value anyways.
296 return {"type": "null"}
298 if ret: 298 ↛ 304line 298 didn't jump to line 304, because the condition on line 298 was never false
299 ret["enum"] = enum
300 return ret
301 # return canonicalize_dict(ret)
303 # Unsupported cases of rewriting enums
304 elif t == 'array' or t == 'object': 304 ↛ exit, 304 ↛ 3052 missed branches: 1) line 304 didn't return from function 'rewrite_enum', because the condition on line 304 was never false, 2) line 304 didn't jump to line 305, because the condition on line 304 was never true
305 raise UnsupportedEnumCanonicalization(tau=t, schema=d)
308def simplify_schema_and_embed_checkers(s):
309 ''' This function assumes the schema s is already canonicalized.
310 So it must be a dict '''
311 #
312 if s == {} or not definitions.Jkeywords.intersection(s.keys()): 312 ↛ 313, 312 ↛ 3162 missed branches: 1) line 312 didn't jump to line 313, because the condition on line 312 was never true, 2) line 312 didn't jump to line 316, because the condition on line 312 was never false
313 top = JSONtop()
314 # top.update(s)
315 return top
316 if "not" in s.keys() and s["not"] == {}: 316 ↛ 317, 316 ↛ 3232 missed branches: 1) line 316 didn't jump to line 317, because the condition on line 316 was never true, 2) line 316 didn't jump to line 323, because the condition on line 316 was never false
317 bot = JSONbot()
318 # del s["not"]
319 # bot.update(s)
320 return bot
322 # json.array specific
323 if "items" in s: 323 ↛ 330line 323 didn't jump to line 330, because the condition on line 323 was never false
324 if utils.is_dict(s["items"]): 324 ↛ 325, 324 ↛ 3262 missed branches: 1) line 324 didn't jump to line 325, because the condition on line 324 was never true, 2) line 324 didn't jump to line 326, because the condition on line 324 was never false
325 s["items"] = simplify_schema_and_embed_checkers(s["items"])
326 elif utils.is_list(s["items"]): 326 ↛ 330line 326 didn't jump to line 330, because the condition on line 326 was never false
327 s["items"] = [simplify_schema_and_embed_checkers( 327 ↛ exit, 327 ↛ 3302 missed branches: 1) line 327 didn't run the list comprehension on line 327, 2) line 327 didn't jump to line 330
328 i) for i in s["items"]]
330 if "additionalItems" in s and utils.is_dict(s["additionalItems"]): 330 ↛ 335line 330 didn't jump to line 335, because the condition on line 330 was never false
331 s["additionalItems"] = simplify_schema_and_embed_checkers(
332 s["additionalItems"])
334 # json.object specific
335 if "properties" in s: 335 ↛ 339line 335 didn't jump to line 339, because the condition on line 335 was never false
336 s["properties"] = dict([(k, simplify_schema_and_embed_checkers(v)) 336 ↛ exit, 336 ↛ 3392 missed branches: 1) line 336 didn't run the list comprehension on line 336, 2) line 336 didn't jump to line 339
337 for k, v in s["properties"].items()])
339 if "patternProperties" in s: 339 ↛ 343line 339 didn't jump to line 343, because the condition on line 339 was never false
340 s["patternProperties"] = dict([(k, simplify_schema_and_embed_checkers( 340 ↛ exit, 340 ↛ 3432 missed branches: 1) line 340 didn't run the list comprehension on line 340, 2) line 340 didn't jump to line 343
341 v)) for k, v in s["patternProperties"].items()])
343 if "additionalProperties" in s and utils.is_dict(s["additionalProperties"]): 343 ↛ 348line 343 didn't jump to line 348, because the condition on line 343 was never false
344 s["additionalProperties"] = simplify_schema_and_embed_checkers(
345 s["additionalProperties"])
347 #
348 if "type" in s: 348 ↛ 349, 348 ↛ 3512 missed branches: 1) line 348 didn't jump to line 349, because the condition on line 348 was never true, 2) line 348 didn't jump to line 351, because the condition on line 348 was never false
349 return typeToConstructor.get(s["type"])(s)
351 if "not" in s: 351 ↛ 352, 351 ↛ 3542 missed branches: 1) line 351 didn't jump to line 352, because the condition on line 351 was never true, 2) line 351 didn't jump to line 354, because the condition on line 351 was never false
352 return typeToConstructor.get(s["not"]["type"]).neg(s["not"])
354 if "anyOf" in s: 354 ↛ 358line 354 didn't jump to line 358, because the condition on line 354 was never false
355 anyofs = [simplify_schema_and_embed_checkers(i) for i in s["anyOf"]] 355 ↛ exit, 355 ↛ 3562 missed branches: 1) line 355 didn't run the list comprehension on line 355, 2) line 355 didn't jump to line 356
356 return boolToConstructor.get("anyOf")({"anyOf": anyofs})
358 if "allOf" in s: 358 ↛ exitline 358 didn't return from function 'simplify_schema_and_embed_checkers', because the condition on line 358 was never false
359 allofs = [simplify_schema_and_embed_checkers(i) for i in s["allOf"]] 359 ↛ exit, 359 ↛ 3602 missed branches: 1) line 359 didn't run the list comprehension on line 359, 2) line 359 didn't jump to line 360
360 return boolToConstructor.get("allOf")({"allOf": allofs})