Coverage for jsonsubschema/_utils.py : 79%

Hot-keys on this page
r m x p toggle line displays
j k next/prev highlighted chunk
0 (zero) top of page
1 (one) first highlighted chunk
1'''
2Created on May 24, 2019
3@author: Andrew Habib
4'''
7import copy
8import fractions
9import math
10import numbers
11import re
12import sys
13import json
15import jsonschema
16import portion as I
17from greenery.lego import parse
19import jsonsubschema.config as config
20import jsonsubschema._constants as definitions
23def is_str(i):
24 return isinstance(i, str)
27def is_int(i):
28 if isinstance(i, bool): 28 ↛ 29line 28 didn't jump to line 29, because the condition on line 28 was never true
29 return False
30 return isinstance(i, int)
33def is_int_equiv(i):
34 if isinstance(i, bool): 34 ↛ 35line 34 didn't jump to line 35, because the condition on line 34 was never true
35 return False
36 return isinstance(i, int) or (isinstance(i, float) and float(i).is_integer())
39# def is_float(i):
40# return isinstance(i, float)
43def is_num(i):
44 if isinstance(i, bool): 44 ↛ 45line 44 didn't jump to line 45, because the condition on line 44 was never true
45 return False
46 return isinstance(i, numbers.Number)
49def is_bool(i):
50 return isinstance(i, bool)
53# def is_null(i):
54# isinstance(i, type(None))
57def is_list(i):
58 return isinstance(i, list)
61def is_dict(i):
62 return isinstance(i, dict)
65# def is_empty_dict_or_none(i):
66# return i == {} or i == None
69# def is_dict_or_true(i):
70# return isinstance(i, dict) or i == True
73def validate_schema(s):
74 return config.VALIDATOR.check_schema(s)
77def get_valid_enum_vals(enum, s):
78 # copy eum into set for two reasons:
79 # 1- we need to modify a different copy from what we iterate on
80 # 2- hashing elements into set and back to list will guarantee
81 # the list is ordered and hence JSONschema __eq__ with enums should work.
82 vals = copy.deepcopy(enum)
83 for i in enum:
84 try:
85 jsonschema.validate(instance=i, schema=s)
86 except jsonschema.ValidationError:
87 vals.remove(i)
88 # try:
89 # return sorted(vals)
90 # except TypeError:
91 # return list(vals)
92 return vals
95def get_typed_enum_vals(enum, t):
96 if t == "integer":
97 enum = filter(lambda i: not isinstance(i, bool), enum)
98 # try:
99 # return sorted(filter(lambda i: isinstance(i, definitions.JtypesToPyTypes[t]), enum))
100 # except TypeError:
101 # return list(filter(lambda i: isinstance(i, definitions.JtypesToPyTypes[t]), enum))
102 return list(filter(lambda i: isinstance(i, definitions.JtypesToPyTypes[t]), enum))
105def print_db(*args):
106 if config.PRINT_DB: 106 ↛ 107line 106 didn't jump to line 107, because the condition on line 106 was never true
107 if args:
108 print("".join(str(arg) + " " for arg in args))
109 else:
110 print()
113# def one(iterable):
114# for i in range(len(iterable)):
115# if iterable[i]:
116# return not (any(iterable[:i]) or any(iterable[i+1:]))
117# return False
119#
120# To avoid regex bottlenecks, instead of using '.*'
121# as the default value for string.pattern, we use
122# 'None' and apply explicit checks for 'None'.
123# E.g. regex_meet(s1, None) = s1
124#
127def prepare_pattern_for_greenry(s):
128 ''' The greenery library we use for regex intersection assumes
129 patterns are unanchored by default. Anchoring chars ^ and $ are
130 treated as literals by greenery.
131 So basically strip any non-escaped ^ and $ when using greenery.
132 Moreover, for any escaped ^ or $, we remove the \ to adhere to
133 greenery syntax (when they are escaped, they are literals). '''
135 s = re.sub(r'(?<!\\|\[)((?:\\{2})*)\^', r'\g<1>',
136 s) # strip non-escaped ^ that is not inside []
137 s = re.sub(r'(?<!\\)((?:\\{2})*)\$', r'\g<1>', s) # strip non-escaped $
138 s = re.sub(r'(?<!\\)((?:\\{1})*)\\\^', r'\g<1>^', s) # strip \ before ^
139 s = re.sub(r'(?<!\\)((?:\\{1})*)\\\$', r'\g<1>$', s) # strip \ before $
141 return s
144def regex_unanchor(p):
145 # We need this cuz JSON regexs are not anchored by default
146 # while the regex library we use assumes the opposite:
147 # regexes are anchored by default AND ^ and $ are literals
148 # and don't carry their anchoring meaning.
149 if p:
150 if p[0] == "^":
151 p = p[1:]
152 elif p[:2] != ".*": 152 ↛ 154line 152 didn't jump to line 154, because the condition on line 152 was never false
153 p = ".*" + p
154 if p[-1] == "$":
155 p = p[:-1]
156 elif p[-2:] != ".*": 156 ↛ 160line 156 didn't jump to line 160, because the condition on line 156 was never false
157 p = p + ".*"
158 # else: # case p == "" the empty string
159 # p = ".*"
160 return p
163def regex_matches_string(regex=None, s=None):
164 if regex: 164 ↛ 167line 164 didn't jump to line 167, because the condition on line 164 was never false
165 return parse(regex).matches(s)
166 else:
167 return True
170def regex_meet(s1, s2):
171 if s1 and s2:
172 ret = parse(s1) & parse(s2)
173 return str(ret.reduce()) if not ret.empty() else None
174 elif s1:
175 return s1
176 elif s2:
177 return s2
178 else:
179 return None
182def regex_isSubset(s1, s2):
183 ''' regex subset is quite expensive to compute
184 especially for complex patterns. '''
185 if s1 and s2:
186 s1 = parse(s1).reduce()
187 s2 = parse(s2).reduce()
188 try:
189 s1.cardinality()
190 s2.cardinality()
191 return set(s1.strings()).issubset(s2.strings())
192 except (OverflowError, Exception): 192 ↛ 197line 192 didn't jump to line 197
193 # catching a general exception thrown from greenery
194 # see https://github.com/qntm/greenery/blob/master/greenery/lego.py
195 # ... raise Exception("Please choose an 'otherchar'")
196 return s1.equivalent(s2) or (s1 & s2.everythingbut()).empty()
197 except Exception as e:
198 exit_with_msg("regex failure from greenry", e)
199 elif s1:
200 return True
201 elif s2: 201 ↛ exitline 201 didn't return from function 'regex_isSubset', because the condition on line 201 was never false
202 return False
205# def regex_isProperSubset(s1, s2):
206# ''' regex proper subset is quite expensive to compute
207# so we try to break it into two separate checks,
208# and do the more expensive check, only if the
209# cheaper one passes first. '''
211# s1 = parse(s1).reduce()
212# s2 = parse(s2).reduce()
213# if not s1.equivalent(s2):
214# return (s1 & s2.everythingbut()).empty()
215# return False
218def string_range_to_regex(min, max):
219 assert min <= max, ""
220 if min == max: 220 ↛ 221line 220 didn't jump to line 221, because the condition on line 220 was never true
221 pattern = ".{" + str(min) + "}" # '.{min}'
222 elif max == I.inf:
223 pattern = ".{" + str(min) + ",}" # '.{min,}'
224 else:
225 pattern = ".{" + str(min) + "," + str(max) + "}" # '.{min, max}'
227 return pattern
230def complement_of_string_pattern(s):
231 return str(parse(s).everythingbut().reduce())
234def lcm(x, y):
235 bad_values = [None, ] # I.inf, -I.inf]
236 if x in bad_values:
237 if y in bad_values:
238 return None
239 else:
240 return y
241 elif y in bad_values:
242 return x
243 else:
244 if is_int(x) and is_int(y): 244 ↛ 250line 244 didn't jump to line 250, because the condition on line 244 was never false
245 return x * y / math.gcd(int(x), int(y))
246 else:
247 # import warnings
248 # with warnings.catch_warnings():
249 # warnings.filterwarnings("ignore", category=DeprecationWarning)
250 return x * y / fractions.gcd(x, y)
253def gcd(x, y):
254 bad_values = [None, ] # I.inf, -I.inf, None]
255 if x in bad_values: 255 ↛ 260line 255 didn't jump to line 260, because the condition on line 255 was never false
256 if y in bad_values: 256 ↛ 259line 256 didn't jump to line 259, because the condition on line 256 was never false
257 return None
258 else:
259 return None
260 elif y in bad_values:
261 return None
262 else:
263 if is_int(x) and is_int(y):
264 return math.gcd(int(x), int(y))
265 else:
266 # import warnings
267 # with warnings.catch_warnings():
268 # warnings.filterwarnings("ignore", category=DeprecationWarning)
269 return fractions.gcd(x, y)
272# def decrementFloat(f):
273# if f == 0.0:
274# return sys.float_info.min
275# m, e = math.frexp(f)
276# return math.ldexp(m - sys.float_info.epsilon / 2, e)
279# def incrementFloat(f):
280# if f == 0.0:
281# return sys.float_info.min
282# m, e = math.frexp(f)
283# return math.ldexp(m + sys.float_info.epsilon / 2, e)
286def generate_range_with_multipleOf_or(range_, pos_mul_of):
287 print(pos_mul_of)
288 if pos_mul_of:
289 for i in range_:
290 if any(i % k == 0 for k in pos_mul_of):
291 yield i
292 else:
293 for i in range_:
294 # if any(i % k == 0 for k in pos_mul_of):
295 yield i
298def generate_range_with_not_multipleOf_and(range_, neg_mul_of):
299 if neg_mul_of: 299 ↛ 300line 299 didn't jump to line 300, because the condition on line 299 was never true
300 for i in range_:
301 if all(i % k != 0 for k in neg_mul_of):
302 yield i
303 else:
304 for i in range_:
305 yield i
308def generate_range_with_multipleof(range_, pos, neg):
309 return generate_range_with_not_multipleOf_and(
310 generate_range_with_multipleOf_or(range_, pos),
311 neg)
314def get_new_min_max_with_mulof(mn, mx, mulof):
315 #
316 # At the moment, this is part of an enumerative solution
317 # for multipleOf integer.
318 # Is there a more efficient way to find, for x <= n <= y,
319 # what is the smallest x_min > x s.t. x_min % f = 0
320 # and the largest y_max < y s.t. x_max % f = 0
321 # for some factor f.
322 #
323 if is_num(mulof) and mulof < mx:
324 if is_num(mn):
325 while mn % mulof != 0:
326 mn = mn + 1
327 if is_num(mx):
328 while mx % mulof != 0:
329 mx = mx - 1
330 return mn, mx
333def is_interval_finite(i):
334 return is_num(i.lower) and is_num(i.upper)
337def are_intervals_mergable(i1, i2):
338 return i1.overlaps(i2) \
339 or (is_num(i1.lower) and is_num(i2.upper) and i1.lower - i2.upper == 1) \
340 or (is_num(i2.lower) and is_num(i1.upper) and i2.lower - i1.upper == 1)
343def load_json_file(path, msg=None):
344 with open(path, "r") as fh:
345 try:
346 return json.load(fh)
347 except Exception as e:
348 exit_with_msg(msg, e)
351def exit_with_msg(msg, e=None):
352 print("Message:", msg, ";", "Exception:", e)
353 sys.exit(1)