Hide keyboard shortcuts

Hot-keys on this page

r m x p   toggle line displays

j k   next/prev highlighted chunk

0   (zero) top of page

1   (one) first highlighted chunk

1''' 

2Created on June 24, 2019 

3@author: Andrew Habib 

4''' 

5 

6import copy 

7import jsonschema 

8import numbers 

9import numpy 

10import sys 

11 

12import jsonsubschema._constants as definitions 

13import jsonsubschema._utils as utils 

14from jsonsubschema._checkers import ( 

15 typeToConstructor, 

16 boolToConstructor, 

17 JSONtop, 

18 JSONbot 

19) 

20from jsonsubschema.exceptions import UnsupportedEnumCanonicalization 

21 

22TOP = {} 

23BOT = {"not": {}} 

24 

25 

26def canonicalize_schema(obj): 

27 # First, make sure the given json is a valid json schema. 

28 # should throw jsonschema.SchemaError on unknown types 

29 utils.validate_schema(obj) 

30 

31 # Second, canonicalize the schema. 

32 if utils.is_dict(obj): 32 ↛ 37line 32 didn't jump to line 37, because the condition on line 32 was never false

33 print('bommmm') 

34 canonical_schema = canonicalize_dict(obj) 

35 

36 # Finally, ensure that canonicalized schema is till a valid json schema. 

37 utils.validate_schema(canonical_schema) 

38 

39 return canonical_schema 

40 

41 

42def canonicalize_dict(d, outer_key=None): 

43 # not actually needed, but for testing 

44 # canonicalization to work properly; 

45 if d == {} or d == {"not": {}}: 45 ↛ 46,   45 ↛ 622 missed branches: 1) line 45 didn't jump to line 46, because the condition on line 45 was never true, 2) line 45 didn't jump to line 62, because the condition on line 45 was never false

46 return d 

47 

48 # Ignore (drop) any other validatoin keyword when there is a $ref 

49 # Currently, jsonref handles this case properly, 

50 # We might need to handle it again on out own when 

51 # we handle recursive $ref independently from jsonref. 

52 # if d.get("$ref"): 

53 # for k in list(d.keys()): 

54 # if k != "$ref" and k not in definitions.JNonValidation: 

55 # del d[k] 

56 

57 # Skip normal dict canonicalization 

58 # for object.properties; 

59 # patternProperties; 

60 # dependencies 

61 # because these should be usual dict containers. 

62 if outer_key in ["properties", "patternProperties"]: 62 ↛ 66line 62 didn't jump to line 66, because the condition on line 62 was never false

63 for k, v in d.items(): 63 ↛ 64,   63 ↛ 652 missed branches: 1) line 63 didn't jump to line 64, because the loop on line 63 never started, 2) line 63 didn't jump to line 65, because the loop on line 63 didn't complete

64 d[k] = canonicalize_dict(v) 

65 return d 

66 if outer_key == "dependencies": 66 ↛ 73line 66 didn't jump to line 73, because the condition on line 66 was never false

67 for k, v in d.items(): 67 ↛ 70line 67 didn't jump to line 70, because the loop on line 67 didn't complete

68 if utils.is_dict(v): 68 ↛ 67,   68 ↛ 692 missed branches: 1) line 68 didn't jump to line 67, because the condition on line 68 was never false, 2) line 68 didn't jump to line 69, because the condition on line 68 was never true

69 d[k] = canonicalize_dict(v) 

70 return d 

71 

72 # here, start dict canonicalization 

73 if not definitions.Jkeywords.intersection(d.keys()): 73 ↛ 74,   73 ↛ 762 missed branches: 1) line 73 didn't jump to line 74, because the condition on line 73 was never true, 2) line 73 didn't jump to line 76, because the condition on line 73 was never false

74 return d 

75 

76 t = d.get("type") 

77 has_connectors = definitions.Jconnectors.intersection(d.keys()) 

78 

79 # Start canonicalization. Don't modify original dict. 

80 d = copy.deepcopy(d) 

81 

82 if has_connectors: 82 ↛ 83,   82 ↛ 842 missed branches: 1) line 82 didn't jump to line 83, because the condition on line 82 was never true, 2) line 82 didn't jump to line 84, because the condition on line 82 was never false

83 return canonicalize_connectors(d) 

84 elif "enum" in d.keys(): 84 ↛ 85,   84 ↛ 862 missed branches: 1) line 84 didn't jump to line 85, because the condition on line 84 was never true, 2) line 84 didn't jump to line 86, because the condition on line 84 was never false

85 return canonicalize_enum(d) 

86 elif utils.is_str(t): 86 ↛ 87,   86 ↛ 882 missed branches: 1) line 86 didn't jump to line 87, because the condition on line 86 was never true, 2) line 86 didn't jump to line 88, because the condition on line 86 was never false

87 return canonicalize_single_type(d) 

88 elif utils.is_list(t): 88 ↛ 89,   88 ↛ 912 missed branches: 1) line 88 didn't jump to line 89, because the condition on line 88 was never true, 2) line 88 didn't jump to line 91, because the condition on line 88 was never false

89 return canonicalize_list_of_types(d) 

90 else: 

91 d["type"] = definitions.Jtypes 

92 return canonicalize_list_of_types(d) 

93 

94 

95def canonicalize_single_type(d): 

96 t = d.get("type") 

97 if t in definitions.Jtypes: 

98 # Remove irrelevant keywords 

99 for k, v in list(d.items()): 99 ↛ 116line 99 didn't jump to line 116, because the loop on line 99 didn't complete

100 if k not in definitions.Jcommonkw and k not in definitions.JtypesToKeywords.get(t) and k not in definitions.JNonValidation: 100 ↛ 101,   100 ↛ 1022 missed branches: 1) line 100 didn't jump to line 101, because the condition on line 100 was never true, 2) line 100 didn't jump to line 102, because the condition on line 100 was never false

101 d.pop(k) 

102 elif utils.is_dict(v): 102 ↛ 103,   102 ↛ 1042 missed branches: 1) line 102 didn't jump to line 103, because the condition on line 102 was never true, 2) line 102 didn't jump to line 104, because the condition on line 102 was never false

103 d[k] = canonicalize_dict(v, k) 

104 elif utils.is_list(v): 104 ↛ 99line 104 didn't jump to line 99, because the condition on line 104 was never false

105 if k == "enum": 105 ↛ 106,   105 ↛ 1112 missed branches: 1) line 105 didn't jump to line 106, because the condition on line 105 was never true, 2) line 105 didn't jump to line 111, because the condition on line 105 was never false

106 v = utils.get_typed_enum_vals(v, t) 

107 # if not v: 

108 # return BOT 

109 # else: 

110 d[k] = v 

111 elif k == "required": 111 ↛ 112,   111 ↛ 1152 missed branches: 1) line 111 didn't jump to line 112, because the condition on line 111 was never true, 2) line 111 didn't jump to line 115, because the condition on line 111 was never false

112 d[k] = sorted(set(v)) 

113 else: 

114 # "list" must be operand of boolean connectors 

115 d[k] = [canonicalize_dict(i) for i in v] 115 ↛ exit,   115 ↛ 992 missed branches: 1) line 115 didn't run the list comprehension on line 115, 2) line 115 didn't jump to line 99

116 if "enum" in d: 116 ↛ 117,   116 ↛ 1192 missed branches: 1) line 116 didn't jump to line 117, because the condition on line 116 was never true, 2) line 116 didn't jump to line 119, because the condition on line 116 was never false

117 return rewrite_enum(d) 

118 else: 

119 return d 

120 

121 # jsonschema validation in the begining prevents 

122 # reaching this case. So we don't need this. 

123 # else: 

124 # print("Unknown schema type {} at:".format(t)) 

125 # print(d) 

126 # print("Exiting...") 

127 # sys.exit(1) 

128 

129 

130def canonicalize_list_of_types(d): 

131 t = sorted(d.get("type")) 

132 anyofs = [] 

133 for t_i in t: 133 ↛ 151line 133 didn't jump to line 151, because the loop on line 133 didn't complete

134 if t_i in definitions.Jtypes: 134 ↛ 133line 134 didn't jump to line 133, because the condition on line 134 was never false

135 s_i = copy.deepcopy(d) 

136 s_i["type"] = t_i 

137 s_i = canonicalize_single_type(s_i) 

138 anyofs.append(s_i) 

139 

140 # jsonschema validation in the begining prevents 

141 # reaching this case. So we don't need this. 

142 # else: 

143 # print("Unknown schema type {} at: {}".format(t_i, t)) 

144 # print(d) 

145 # print("Exiting...") 

146 # sys.exit(1) 

147 

148 # if len(anyofs) == 1: 

149 # return anyofs[0] 

150 # elif len(anyofs) > 1: 

151 return {"anyOf": anyofs} 

152 

153 

154def canonicalize_enum(d): 

155 valid_vals = utils.get_valid_enum_vals(d["enum"], d) 

156 if not valid_vals: 156 ↛ 157,   156 ↛ 1592 missed branches: 1) line 156 didn't jump to line 157, because the condition on line 156 was never true, 2) line 156 didn't jump to line 159, because the condition on line 156 was never false

157 return BOT 

158 

159 d["enum"] = valid_vals 

160 actual_t = sorted( 

161 set(map(lambda i: definitions.PyTypesToJtypes.get(type(i)), d.get("enum")))) 

162 if "type" in d: 162 ↛ 167line 162 didn't jump to line 167, because the condition on line 162 was never false

163 orig_t = d["type"] 

164 orig_t = set([orig_t]) if utils.is_str(orig_t) else set(orig_t) 

165 d["type"] = orig_t.intersection(actual_t) 

166 else: 

167 d["type"] = actual_t 

168 return canonicalize_list_of_types(d) 

169 

170 

171def canonicalize_connectors(d): 

172 connectors = definitions.Jconnectors.intersection(d.keys()) 

173 lhs_kw = definitions.Jkeywords.intersection(d.keys()) 

174 lhs_kw_without_connectors = lhs_kw.difference(connectors) 

175 

176 # Single connector. 

177 if len(connectors) == 1 and not lhs_kw_without_connectors: 177 ↛ 178,   177 ↛ 2062 missed branches: 1) line 177 didn't jump to line 178, because the condition on line 177 was never true, 2) line 177 didn't jump to line 206, because the condition on line 177 was never false

178 c = connectors.pop() 

179 

180 if c == "not": 180 ↛ 184line 180 didn't jump to line 184, because the condition on line 180 was never false

181 d["not"] = canonicalize_dict(d["not"]) 

182 return canonicalize_not(d) 

183 

184 elif c == "oneOf": 184 ↛ 199line 184 didn't jump to line 199, because the condition on line 184 was never false

185 if len(d[c]) == 1: 185 ↛ 186,   185 ↛ 1872 missed branches: 1) line 185 didn't jump to line 186, because the condition on line 185 was never true, 2) line 185 didn't jump to line 187, because the condition on line 185 was never false

186 return canonicalize_dict(d[c].pop()) 

187 anyofs = [] 

188 for i in range(len(d[c])): 188 ↛ 194line 188 didn't jump to line 194, because the loop on line 188 didn't complete

189 one = [d[c][i]] 

190 nots = [{"not": j} for j in d[c][:i]] + [{"not": j} 190 ↛ exitline 190 didn't run the list comprehension on line 190 or line 190 didn't run the list comprehension on line 190

191 for j in d[c][i+1:]] 

192 allofs = one + nots 

193 anyofs.append({"allOf": allofs}) 

194 return canonicalize_connectors({"anyOf": anyofs}) 

195 

196 # Here, the connector is either allOf or oneOf 

197 # So we better simplify them before proceeding more. 

198 else: 

199 d[c] = [canonicalize_dict(i) for i in d[c]] 

200 # return d 

201 simplified = simplify_schema_and_embed_checkers(d) 

202 return simplified 

203 

204 # Connector + other keywords. Combine them first. 

205 else: 

206 allofs = [] 

207 for c in connectors: 207 ↛ 210line 207 didn't jump to line 210, because the loop on line 207 didn't complete

208 allofs.append(canonicalize_dict({c: d[c]})) 

209 del d[c] 

210 if lhs_kw_without_connectors: 210 ↛ 213line 210 didn't jump to line 213, because the condition on line 210 was never false

211 allofs.append(canonicalize_dict( 211 ↛ 213line 211 didn't jump to line 213

212 {k: d[k] for k in lhs_kw_without_connectors})) 

213 return {"allOf": allofs} 

214 # return simplify_schema_and_embed_checkers({"allOf": allofs}) 

215 

216 

217def canonicalize_not(d): 

218 # d: {} has a 'not' schema 

219 negated_schema = d["not"] 

220 

221 t = negated_schema.get("type") 

222 

223 # if "enum" in negated_schema: 

224 # return canonicalize_negated_enum(negated_schema) 

225 

226 if negated_schema == {} or t in definitions.Jtypes: 226 ↛ 227,   226 ↛ 2292 missed branches: 1) line 226 didn't jump to line 227, because the condition on line 226 was never true, 2) line 226 didn't jump to line 229, because the condition on line 226 was never false

227 return d 

228 

229 connectors = definitions.Jconnectors.intersection(negated_schema.keys()) 

230 if connectors and len(connectors) == 1: 230 ↛ 231,   230 ↛ 2572 missed branches: 1) line 230 didn't jump to line 231, because the condition on line 230 was never true, 2) line 230 didn't jump to line 257, because the condition on line 230 was never false

231 c = connectors.pop() 

232 # Case "not: {"not": {...}} 

233 # Return positive schema (2 nots cancel each other) 

234 if c == "not": 234 ↛ 235,   234 ↛ 2372 missed branches: 1) line 234 didn't jump to line 235, because the condition on line 234 was never true, 2) line 234 didn't jump to line 237, because the condition on line 234 was never false

235 return negated_schema["not"] 

236 

237 elif c == "anyOf": 237 ↛ 245line 237 didn't jump to line 245, because the condition on line 237 was never false

238 allofs = [] 

239 for i in negated_schema["anyOf"]: 239 ↛ 240,   239 ↛ 2412 missed branches: 1) line 239 didn't jump to line 240, because the loop on line 239 never started, 2) line 239 didn't jump to line 241, because the loop on line 239 didn't complete

240 allofs.append(canonicalize_not({"not": i})) 

241 return {"allOf": allofs} 

242 

243 # Should not reach here. Should be canonicalized and 

244 # simplified by now. 

245 elif c == "allOf": 

246 # anyofs = [] 

247 # for i in negated_schema["allOf"]: 

248 # anyofs.append(canonicalize_not({"not": i})) 

249 # return {"anyOf": anyofs} 

250 return canonicalize_not({'not': canonicalize_connectors(negated_schema)}) 

251 

252 # anyofs.append(canonicalize_not({"not": i})) 

253 # Should not reach here. Should be canonicalized by now. 

254 # elif c == "oneOf": 

255 # return canonicalize_not({"not": canonicalize_connectors(negated_schema)}) 

256 else: 

257 sys.exit(">>>>>> Ewwwww! Shouldn't be here during canonicalization. <<<<<<") 

258 

259 

260def rewrite_enum(d): 

261 t = d.get("type") 

262 enum = d.get("enum") 

263 ret = None 

264 

265 if t == "string": 265 ↛ 269line 265 didn't jump to line 269, because the condition on line 265 was never false

266 pattern = "|".join(map(lambda x: "^"+str(x)+"$", enum)) 266 ↛ exit,   266 ↛ 2672 missed branches: 1) line 266 didn't run the lambda on line 266, 2) line 266 didn't jump to line 267

267 ret = {"type": "string", "pattern": pattern} 

268 

269 if t == "integer": 269 ↛ 276line 269 didn't jump to line 276, because the condition on line 269 was never false

270 ret = {"anyOf": []} 

271 for i in enum: 271 ↛ 276line 271 didn't jump to line 276, because the loop on line 271 didn't complete

272 ret["anyOf"].append( 

273 # {"type": "number", "minimum": i, "maximum": i, "multipleOf": 1}) # check test_numeric/test_join_mulof10 

274 {"type": "integer", "minimum": i, "maximum": i}) 

275 

276 if t == "number": 276 ↛ 288line 276 didn't jump to line 288, because the condition on line 276 was never false

277 ret = {"anyOf": []} 

278 for i in enum: 278 ↛ 288line 278 didn't jump to line 288, because the loop on line 278 didn't complete

279 if utils.is_int_equiv(i): 279 ↛ 282line 279 didn't jump to line 282, because the condition on line 279 was never false

280 ret["anyOf"].append( 

281 {"type": "integer", "minimum": i, "maximum": i}) 

282 elif numpy.isnan(i): 282 ↛ 283,   282 ↛ 2852 missed branches: 1) line 282 didn't jump to line 283, because the condition on line 282 was never true, 2) line 282 didn't jump to line 285, because the condition on line 282 was never false

283 ret["anyOf"].append({"type": "number", "enum": [numpy.NaN]}) 

284 else: 

285 ret["anyOf"].append( 

286 {"type": "number", "minimum": i, "maximum": i}) 

287 

288 if t == "boolean": 

289 # booleans are allowed to keep enums, 

290 # since there are only two values. 

291 return d 

292 

293 if t == "null": 

294 # null schema should be rewritten without enum 

295 # it is a single value anyways. 

296 return {"type": "null"} 

297 

298 if ret: 298 ↛ 304line 298 didn't jump to line 304, because the condition on line 298 was never false

299 ret["enum"] = enum 

300 return ret 

301 # return canonicalize_dict(ret) 

302 

303 # Unsupported cases of rewriting enums 

304 elif t == 'array' or t == 'object': 304 ↛ exit,   304 ↛ 3052 missed branches: 1) line 304 didn't return from function 'rewrite_enum', because the condition on line 304 was never false, 2) line 304 didn't jump to line 305, because the condition on line 304 was never true

305 raise UnsupportedEnumCanonicalization(tau=t, schema=d) 

306 

307 

308def simplify_schema_and_embed_checkers(s): 

309 ''' This function assumes the schema s is already canonicalized.  

310 So it must be a dict ''' 

311 # 

312 if s == {} or not definitions.Jkeywords.intersection(s.keys()): 312 ↛ 313,   312 ↛ 3162 missed branches: 1) line 312 didn't jump to line 313, because the condition on line 312 was never true, 2) line 312 didn't jump to line 316, because the condition on line 312 was never false

313 top = JSONtop() 

314 # top.update(s) 

315 return top 

316 if "not" in s.keys() and s["not"] == {}: 316 ↛ 317,   316 ↛ 3232 missed branches: 1) line 316 didn't jump to line 317, because the condition on line 316 was never true, 2) line 316 didn't jump to line 323, because the condition on line 316 was never false

317 bot = JSONbot() 

318 # del s["not"] 

319 # bot.update(s) 

320 return bot 

321 

322 # json.array specific 

323 if "items" in s: 323 ↛ 330line 323 didn't jump to line 330, because the condition on line 323 was never false

324 if utils.is_dict(s["items"]): 324 ↛ 325,   324 ↛ 3262 missed branches: 1) line 324 didn't jump to line 325, because the condition on line 324 was never true, 2) line 324 didn't jump to line 326, because the condition on line 324 was never false

325 s["items"] = simplify_schema_and_embed_checkers(s["items"]) 

326 elif utils.is_list(s["items"]): 326 ↛ 330line 326 didn't jump to line 330, because the condition on line 326 was never false

327 s["items"] = [simplify_schema_and_embed_checkers( 327 ↛ exit,   327 ↛ 3302 missed branches: 1) line 327 didn't run the list comprehension on line 327, 2) line 327 didn't jump to line 330

328 i) for i in s["items"]] 

329 

330 if "additionalItems" in s and utils.is_dict(s["additionalItems"]): 330 ↛ 335line 330 didn't jump to line 335, because the condition on line 330 was never false

331 s["additionalItems"] = simplify_schema_and_embed_checkers( 

332 s["additionalItems"]) 

333 

334 # json.object specific 

335 if "properties" in s: 335 ↛ 339line 335 didn't jump to line 339, because the condition on line 335 was never false

336 s["properties"] = dict([(k, simplify_schema_and_embed_checkers(v)) 336 ↛ exit,   336 ↛ 3392 missed branches: 1) line 336 didn't run the list comprehension on line 336, 2) line 336 didn't jump to line 339

337 for k, v in s["properties"].items()]) 

338 

339 if "patternProperties" in s: 339 ↛ 343line 339 didn't jump to line 343, because the condition on line 339 was never false

340 s["patternProperties"] = dict([(k, simplify_schema_and_embed_checkers( 340 ↛ exit,   340 ↛ 3432 missed branches: 1) line 340 didn't run the list comprehension on line 340, 2) line 340 didn't jump to line 343

341 v)) for k, v in s["patternProperties"].items()]) 

342 

343 if "additionalProperties" in s and utils.is_dict(s["additionalProperties"]): 343 ↛ 348line 343 didn't jump to line 348, because the condition on line 343 was never false

344 s["additionalProperties"] = simplify_schema_and_embed_checkers( 

345 s["additionalProperties"]) 

346 

347 # 

348 if "type" in s: 348 ↛ 349,   348 ↛ 3512 missed branches: 1) line 348 didn't jump to line 349, because the condition on line 348 was never true, 2) line 348 didn't jump to line 351, because the condition on line 348 was never false

349 return typeToConstructor.get(s["type"])(s) 

350 

351 if "not" in s: 351 ↛ 352,   351 ↛ 3542 missed branches: 1) line 351 didn't jump to line 352, because the condition on line 351 was never true, 2) line 351 didn't jump to line 354, because the condition on line 351 was never false

352 return typeToConstructor.get(s["not"]["type"]).neg(s["not"]) 

353 

354 if "anyOf" in s: 354 ↛ 358line 354 didn't jump to line 358, because the condition on line 354 was never false

355 anyofs = [simplify_schema_and_embed_checkers(i) for i in s["anyOf"]] 355 ↛ exit,   355 ↛ 3562 missed branches: 1) line 355 didn't run the list comprehension on line 355, 2) line 355 didn't jump to line 356

356 return boolToConstructor.get("anyOf")({"anyOf": anyofs}) 

357 

358 if "allOf" in s: 358 ↛ exitline 358 didn't return from function 'simplify_schema_and_embed_checkers', because the condition on line 358 was never false

359 allofs = [simplify_schema_and_embed_checkers(i) for i in s["allOf"]] 359 ↛ exit,   359 ↛ 3602 missed branches: 1) line 359 didn't run the list comprehension on line 359, 2) line 359 didn't jump to line 360

360 return boolToConstructor.get("allOf")({"allOf": allofs})