Stan  2.10.0
probability, sampling & optimization
json_parser.hpp
Go to the documentation of this file.
1 #ifndef STAN_IO_JSON_JSON_PARSER_HPP
2 #define STAN_IO_JSON_JSON_PARSER_HPP
3 
4 #include <boost/lexical_cast.hpp>
5 
8 
9 #include <stdexcept>
10 #include <iostream>
11 #include <istream>
12 #include <sstream>
13 #include <string>
14 
15 namespace stan {
16 
17  namespace json {
18 
19  namespace {
20 
21  const unsigned int MIN_HIGH_SURROGATE = 0xD800;
22  const unsigned int MAX_HIGH_SURROGATE = 0xDBFF;
23  const unsigned int MIN_LOW_SURROGATE = 0xDC00;
24  const unsigned int MAX_LOW_SURROGATE = 0xDFFF;
25  const unsigned int MIN_SUPPLEMENTARY_CODE_POINT = 0x010000;
26 
27  inline bool is_high_surrogate(unsigned int cp) {
28  return (cp >= MIN_HIGH_SURROGATE && cp <= MAX_HIGH_SURROGATE);
29  }
30 
31  inline bool is_low_surrogate(unsigned int cp) {
32  return (cp >= MIN_LOW_SURROGATE && cp <= MAX_LOW_SURROGATE);
33  }
34 
35  inline bool is_whitespace(char c) {
36  return c == ' ' || c == '\n' || c == '\t' || c == '\r';
37  }
38 
45  template <typename Handler, bool Validate_UTF_8>
46  class parser {
47  public:
48  parser(Handler& h,
49  std::istream& in)
50  : h_(h),
51  in_(in),
52  next_char_(0),
53  line_(0),
54  column_(0)
55  { }
56 
57  ~parser() {
58  }
59 
60  void parse() {
61  h_.start_text();
62  parse_text();
63  h_.end_text();
64  }
65 
66  private:
67  json_error json_exception(const std::string& msg) const {
68  std::stringstream ss;
69  ss << "Error in JSON parsing at"
70  << " line=" << line_ << " column=" << column_
71  << std::endl
72  << msg
73  << std::endl;
74  return json_error(ss.str());
75  }
76 
77  // JSON-text = object / array
78  void parse_text() {
79  char c = get_non_ws_char();
80  if (c == '{') { // begin-object
81  h_.start_object();
82  parse_object_members_end_object();
83  h_.end_object();
84  } else if (c == '[') { // begin-array
85  // array
86  h_.start_array();
87  parse_array_values_end_array();
88  h_.end_array();
89  } else {
90  throw json_exception("expecting start of object ({) or array ([)");
91  }
92  }
93 
94  // value = false / null / true / object / array / number / string
95  void parse_value() {
96  // value
97  char c = get_non_ws_char();
98  if (c == 'f') {
99  // false
100  parse_false_literal();
101  } else if (c == 'n') {
102  // null
103  parse_null_literal();
104  } else if (c == 't') {
105  // true
106  parse_true_literal();
107  } else if (c == '"') {
108  // string
109  h_.string(parse_string_chars_quotation_mark());
110  } else if (c == '{' || c == '[') {
111  // object / array
112  unget_char();
113  parse_text();
114  } else if (c == '-' ||
115  (c >= '0' && c <= '9') ) {
116  unget_char();
117  parse_number();
118  } else {
119  throw json_exception("illegal value, expecting object, array, "
120  "number, string, or literal true/false/null");
121  }
122  }
123 
124  void parse_number() {
125  bool is_positive = true;
126 
127  std::stringstream ss;
128  char c = get_non_ws_char();
129  // minus
130  if (c == '-') {
131  is_positive = false;
132  ss << c;
133  c = get_char();
134  }
135 
136  // int
137  // zero / digit1-9
138  if (c < '0' || c > '9')
139  throw json_exception("expecting int part of number");
140  ss << c;
141 
142  // *DIGIT
143  bool leading_zero = (c == '0');
144  c = get_char();
145  if (leading_zero && (c == '0'))
146  throw json_exception("zero padded numbers not allowed");
147  while (c >= '0' && c <= '9') {
148  ss << c;
149  c = get_char();
150  }
151 
152  // frac
153  bool is_integer = true;
154  if (c == '.') {
155  is_integer = false;
156  ss << '.';
157  c = get_char();
158  if (c < '0' || c > '9')
159  throw json_exception("expected digit after decimal");
160  ss << c;
161  c = get_char();
162  while (c >= '0' && c <= '9') {
163  ss << c;
164  c = get_char();
165  }
166  }
167 
168  // exp
169  if (c == 'e' || c == 'E') {
170  is_integer = false;
171  ss << c;
172  c = get_char();
173  // minus / plus
174  if (c == '+' || c == '-') {
175  ss << c;
176  c = get_char();
177  }
178  // 1*DIGIT
179  if (c < '0' || c > '9')
180  throw json_exception("expected digit after e/E");
181  while (c >= '0' && c <= '9') {
182  ss << c;
183  c = get_char();
184  }
185  }
186  unget_char();
187 
188  if (is_integer) {
189  if (is_positive) {
190  unsigned long n; // NOLINT(runtime/int)
191  try {
192  // NOLINTNEXTLINE(runtime/int)
193  n = boost::lexical_cast<unsigned long>(ss.str());
194  } catch (const boost::bad_lexical_cast & ) {
195  throw json_exception("number exceeds integer range");
196  }
197  ss >> n;
198  h_.number_unsigned_long(n);
199  } else {
200  long n; // NOLINT(runtime/int)
201  try {
202  // NOLINTNEXTLINE(runtime/int)
203  n = boost::lexical_cast<unsigned long>(ss.str());
204  } catch (const boost::bad_lexical_cast & ) {
205  throw json_exception("number exceeds integer range");
206  }
207  ss >> n;
208  h_.number_long(n);
209  }
210  } else {
211  double x;
212  try {
213  std::string ss_str = ss.str();
214  x = boost::lexical_cast<double>(ss_str);
215  if (x == 0)
216  io::validate_zero_buf(ss_str);
217  } catch (const boost::bad_lexical_cast & ) {
218  throw json_exception("number exceeds double range");
219  }
220  ss >> x;
221  h_.number_double(x);
222  }
223  }
224 
225  std::string parse_string_chars_quotation_mark() {
226  std::stringstream s;
227  while (true) {
228  char c = get_char();
229  if (c == '"') {
230  return s.str();
231  } else if (c == '\\') {
232  c = get_char();
233  if (c == '\\' || c == '/' || c == '"') {
234  s << c;
235  } else if (c == 'b') {
236  s << '\b';
237  } else if (c == 'f') {
238  s << '\f';
239  } else if (c == 'n') {
240  s << '\n';
241  } else if (c == 'r') {
242  s << '\r';
243  } else if (c == 't') {
244  s << '\t';
245  } else if (c == 'u') {
246  get_escaped_unicode(s);
247  } else {
248  throw json_exception("expecting legal escape");
249  }
250  continue;
251  } else if (c > 0 && c < 0x20) { // ASCII control characters
252  throw json_exception("found control character, char values less "
253  "than U+0020 must be \\u escaped");
254  }
255  s << c;
256  }
257  }
258 
259  void parse_true_literal() {
260  get_chars("rue");
261  h_.boolean(true);
262  }
263 
264  void parse_false_literal() {
265  get_chars("alse");
266  h_.boolean(false);
267  }
268 
269  void parse_null_literal() {
270  get_chars("ull");
271  h_.null();
272  }
273 
274  void get_escaped_unicode(std::stringstream& s) {
275  unsigned int codepoint = get_int_as_hex_chars();
276  if (!(is_high_surrogate(codepoint) || is_low_surrogate(codepoint))) {
277  putCodepoint(s, codepoint);
278  } else if (!is_high_surrogate(codepoint)) {
279  throw json_exception("illegal unicode values, found "
280  "low-surrogate, missing high-surrogate");
281  } else {
282  char c = get_char();
283  if (!(c == '\\'))
284  throw json_exception("illegal unicode values, found "
285  "high-surrogate, expecting low-surrogate");
286  c = get_char();
287  if (!(c == 'u'))
288  throw json_exception("illegal unicode values, found "
289  "high-surrogate, expecting low-surrogate");
290  unsigned int codepoint2 = get_int_as_hex_chars();
291  unsigned int supplemental
292  = ((codepoint - MIN_HIGH_SURROGATE) << 10)
293  + (codepoint2 - MIN_LOW_SURROGATE)
294  + MIN_SUPPLEMENTARY_CODE_POINT;
295  putCodepoint(s, supplemental);
296  }
297  }
298 
299  unsigned int get_int_as_hex_chars() {
300  std::stringstream s;
301  s << std::hex;
302  for (int i = 0; i < 4; i++) {
303  char c = get_char();
304  if (!((c >= 'a' && c<= 'f')
305  || (c >= 'A' && c<= 'F')
306  || (c >= '0' && c<= '9')))
307  throw json_exception("illegal unicode code point");
308  s << c;
309  }
310  unsigned int hex;
311  s >> hex;
312  return hex;
313  }
314 
315  void putCodepoint(std::stringstream& s, unsigned int codepoint) {
316  if (codepoint <= 0x7f) {
317  s.put(codepoint);
318  } else if (codepoint <= 0x7ff) {
319  s.put(0xc0 | ((codepoint >> 6) & 0x1f));
320  s.put(0x80 | (codepoint & 0x3f));
321  } else if (codepoint <= 0xffff) {
322  s.put(0xe0 | ((codepoint >> 12) & 0x0f));
323  s.put(0x80 | ((codepoint >> 6) & 0x3f));
324  s.put(0x80 | (codepoint & 0x3f));
325  } else {
326  s.put(0xf0 | ((codepoint >> 18) & 0x07));
327  s.put(0x80 | ((codepoint >> 12) & 0x3f));
328  s.put(0x80 | ((codepoint >> 6) & 0x3f));
329  s.put(0x80 | (codepoint & 0x3f));
330  }
331  }
332 
333  void get_chars(const std::string& s) {
334  for (size_t i = 0; i < s.size(); ++i) {
335  char c = get_char();
336  if (c != s[i])
337  throw json_exception("expecting rest of literal: "
338  + s.substr(i));
339  }
340  }
341 
342  void parse_array_values_end_array() {
343  char c = get_non_ws_char();
344  if (c == ']') return;
345  unget_char();
346  while (true) {
347  parse_value();
348  char c = get_non_ws_char();
349  if (c == ']') return;
350  if (c != ',') {
351  throw json_exception("in array, expecting ] or ,");
352  }
353  c = get_non_ws_char();
354  if (c == ']')
355  throw json_exception("in array, expecting value");
356  unget_char();
357  }
358  }
359 
360  void parse_object_members_end_object() {
361  char c = get_non_ws_char();
362  if (c == '}') return;
363  while (true) {
364  // string (key)
365  if (c != '"')
366  throw json_exception("expecting member key"
367  " or end of object marker (})");
368  std::string key = parse_string_chars_quotation_mark();
369  h_.key(key);
370  // name-separator separator
371  c = get_non_ws_char();
372  if (c != ':')
373  throw json_exception("expecting key-value separator :");
374  // value
375  parse_value();
376 
377  // continuation
378  c = get_non_ws_char();
379  if (c == '}')
380  return;
381  if (c != ',')
382  throw json_exception("expecting end of object } or separator ,");
383  c = get_non_ws_char();
384  }
385  }
386 
387  char get_char() {
388  char c = in_.get();
389  if (!in_.good())
390  throw json_exception("unexpected end of stream");
391  if (c == '\n') {
392  ++line_;
393  column_ = 1;
394  } else {
395  ++column_;
396  }
397  return c;
398  }
399 
400  char get_non_ws_char() {
401  while (true) {
402  char c = get_char();
403  if (is_whitespace(c)) continue;
404  return c;
405  }
406  }
407 
408  void unget_char() {
409  in_.unget();
410  --column_;
411  }
412 
413  Handler& h_;
414  std::istream& in_;
416  size_t line_;
417  size_t column_;
418  };
419 
420  }
421 
432  template <bool Validate_UTF_8, typename Handler>
433  void parse(std::istream& in,
434  Handler& handler) {
435  parser<Handler, Validate_UTF_8>(handler, in).parse();
436  }
437 
447  template <typename Handler>
448  void parse(std::istream& in,
449  Handler& handler) {
450  parse<false>(in, handler);
451  }
452 
453  }
454 }
455 #endif
Probability, optimization and sampling library.
void validate_zero_buf(const B &buf)
Throw an bad-cast exception if the specified buffer contains a digit other than 0 before an e or E...
std::istream & in_
size_t line_
void parse(std::istream &in, Handler &handler)
Parse the JSON text represented by the specified input stream, sending events to the specified handle...
Handler & h_
char next_char_
size_t column_

     [ Stan Home Page ] © 2011–2016, Stan Development Team.