xref: /petsc/src/sys/yaml/src/scanner.c (revision 834855d6effb0d027771461c8e947ee1ce5a1e17)
1 
2 /*
3  * Introduction
4  * ************
5  *
6  * The following notes assume that you are familiar with the YAML specification
7  * (http://yaml.org/spec/cvs/current.html).  We mostly follow it, although in
8  * some cases we are less restrictive that it requires.
9  *
10  * The process of transforming a YAML stream into a sequence of events is
11  * divided on two steps: Scanning and Parsing.
12  *
13  * The Scanner transforms the input stream into a sequence of tokens, while the
14  * parser transform the sequence of tokens produced by the Scanner into a
15  * sequence of parsing events.
16  *
17  * The Scanner is rather clever and complicated. The Parser, on the contrary,
18  * is a straightforward implementation of a recursive-descendant parser (or,
19  * LL(1) parser, as it is usually called).
20  *
21  * Actually there are two issues of Scanning that might be called "clever", the
22  * rest is quite straightforward.  The issues are "block collection start" and
23  * "simple keys".  Both issues are explained below in details.
24  *
25  * Here the Scanning step is explained and implemented.  We start with the list
26  * of all the tokens produced by the Scanner together with short descriptions.
27  *
28  * Now, tokens:
29  *
30  *      STREAM-START(encoding)          # The stream start.
31  *      STREAM-END                      # The stream end.
32  *      VERSION-DIRECTIVE(major,minor)  # The '%YAML' directive.
33  *      TAG-DIRECTIVE(handle,prefix)    # The '%TAG' directive.
34  *      DOCUMENT-START                  # '---'
35  *      DOCUMENT-END                    # '...'
36  *      BLOCK-SEQUENCE-START            # Indentation increase denoting a block
37  *      BLOCK-MAPPING-START             # sequence or a block mapping.
38  *      BLOCK-END                       # Indentation decrease.
39  *      FLOW-SEQUENCE-START             # '['
40  *      FLOW-SEQUENCE-END               # ']'
41  *      FLOW-MAPPING-START              # '{'
42  *      FLOW-MAPPING-END                # '}'
43  *      BLOCK-ENTRY                     # '-'
44  *      FLOW-ENTRY                      # ','
45  *      KEY                             # '?' or nothing (simple keys).
46  *      VALUE                           # ':'
47  *      ALIAS(anchor)                   # '*anchor'
48  *      ANCHOR(anchor)                  # '&anchor'
49  *      TAG(handle,suffix)              # '!handle!suffix'
50  *      SCALAR(value,style)             # A scalar.
51  *
52  * The following two tokens are "virtual" tokens denoting the beginning and the
53  * end of the stream:
54  *
55  *      STREAM-START(encoding)
56  *      STREAM-END
57  *
58  * We pass the information about the input stream encoding with the
59  * STREAM-START token.
60  *
61  * The next two tokens are responsible for tags:
62  *
63  *      VERSION-DIRECTIVE(major,minor)
64  *      TAG-DIRECTIVE(handle,prefix)
65  *
66  * Example:
67  *
68  *      %YAML   1.1
69  *      %TAG    !   !foo
70  *      %TAG    !yaml!  tag:yaml.org,2002:
71  *      ---
72  *
73  * The corresponding sequence of tokens:
74  *
75  *      STREAM-START(utf-8)
76  *      VERSION-DIRECTIVE(1,1)
77  *      TAG-DIRECTIVE("!","!foo")
78  *      TAG-DIRECTIVE("!yaml","tag:yaml.org,2002:")
79  *      DOCUMENT-START
80  *      STREAM-END
81  *
82  * Note that the VERSION-DIRECTIVE and TAG-DIRECTIVE tokens occupy a whole
83  * line.
84  *
85  * The document start and end indicators are represented by:
86  *
87  *      DOCUMENT-START
88  *      DOCUMENT-END
89  *
90  * Note that if a YAML stream contains an implicit document (without '---'
91  * and '...' indicators), no DOCUMENT-START and DOCUMENT-END tokens will be
92  * produced.
93  *
94  * In the following examples, we present whole documents together with the
95  * produced tokens.
96  *
97  *      1. An implicit document:
98  *
99  *          'a scalar'
100  *
101  *      Tokens:
102  *
103  *          STREAM-START(utf-8)
104  *          SCALAR("a scalar",single-quoted)
105  *          STREAM-END
106  *
107  *      2. An explicit document:
108  *
109  *          ---
110  *          'a scalar'
111  *          ...
112  *
113  *      Tokens:
114  *
115  *          STREAM-START(utf-8)
116  *          DOCUMENT-START
117  *          SCALAR("a scalar",single-quoted)
118  *          DOCUMENT-END
119  *          STREAM-END
120  *
121  *      3. Several documents in a stream:
122  *
123  *          'a scalar'
124  *          ---
125  *          'another scalar'
126  *          ---
127  *          'yet another scalar'
128  *
129  *      Tokens:
130  *
131  *          STREAM-START(utf-8)
132  *          SCALAR("a scalar",single-quoted)
133  *          DOCUMENT-START
134  *          SCALAR("another scalar",single-quoted)
135  *          DOCUMENT-START
136  *          SCALAR("yet another scalar",single-quoted)
137  *          STREAM-END
138  *
139  * We have already introduced the SCALAR token above.  The following tokens are
140  * used to describe aliases, anchors, tag, and scalars:
141  *
142  *      ALIAS(anchor)
143  *      ANCHOR(anchor)
144  *      TAG(handle,suffix)
145  *      SCALAR(value,style)
146  *
147  * The following series of examples illustrate the usage of these tokens:
148  *
149  *      1. A recursive sequence:
150  *
151  *          &A [ *A ]
152  *
153  *      Tokens:
154  *
155  *          STREAM-START(utf-8)
156  *          ANCHOR("A")
157  *          FLOW-SEQUENCE-START
158  *          ALIAS("A")
159  *          FLOW-SEQUENCE-END
160  *          STREAM-END
161  *
162  *      2. A tagged scalar:
163  *
164  *          !!float "3.14"  # A good approximation.
165  *
166  *      Tokens:
167  *
168  *          STREAM-START(utf-8)
169  *          TAG("!!","float")
170  *          SCALAR("3.14",double-quoted)
171  *          STREAM-END
172  *
173  *      3. Various scalar styles:
174  *
175  *          --- # Implicit empty plain scalars do not produce tokens.
176  *          --- a plain scalar
177  *          --- 'a single-quoted scalar'
178  *          --- "a double-quoted scalar"
179  *          --- |-
180  *            a literal scalar
181  *          --- >-
182  *            a folded
183  *            scalar
184  *
185  *      Tokens:
186  *
187  *          STREAM-START(utf-8)
188  *          DOCUMENT-START
189  *          DOCUMENT-START
190  *          SCALAR("a plain scalar",plain)
191  *          DOCUMENT-START
192  *          SCALAR("a single-quoted scalar",single-quoted)
193  *          DOCUMENT-START
194  *          SCALAR("a double-quoted scalar",double-quoted)
195  *          DOCUMENT-START
196  *          SCALAR("a literal scalar",literal)
197  *          DOCUMENT-START
198  *          SCALAR("a folded scalar",folded)
199  *          STREAM-END
200  *
201  * Now it's time to review collection-related tokens. We will start with
202  * flow collections:
203  *
204  *      FLOW-SEQUENCE-START
205  *      FLOW-SEQUENCE-END
206  *      FLOW-MAPPING-START
207  *      FLOW-MAPPING-END
208  *      FLOW-ENTRY
209  *      KEY
210  *      VALUE
211  *
212  * The tokens FLOW-SEQUENCE-START, FLOW-SEQUENCE-END, FLOW-MAPPING-START, and
213  * FLOW-MAPPING-END represent the indicators '[', ']', '{', and '}'
214  * correspondingly.  FLOW-ENTRY represent the ',' indicator.  Finally the
215  * indicators '?' and ':', which are used for denoting mapping keys and values,
216  * are represented by the KEY and VALUE tokens.
217  *
218  * The following examples show flow collections:
219  *
220  *      1. A flow sequence:
221  *
222  *          [item 1, item 2, item 3]
223  *
224  *      Tokens:
225  *
226  *          STREAM-START(utf-8)
227  *          FLOW-SEQUENCE-START
228  *          SCALAR("item 1",plain)
229  *          FLOW-ENTRY
230  *          SCALAR("item 2",plain)
231  *          FLOW-ENTRY
232  *          SCALAR("item 3",plain)
233  *          FLOW-SEQUENCE-END
234  *          STREAM-END
235  *
236  *      2. A flow mapping:
237  *
238  *          {
239  *              a simple key: a value,  # Note that the KEY token is produced.
240  *              ? a complex key: another value,
241  *          }
242  *
243  *      Tokens:
244  *
245  *          STREAM-START(utf-8)
246  *          FLOW-MAPPING-START
247  *          KEY
248  *          SCALAR("a simple key",plain)
249  *          VALUE
250  *          SCALAR("a value",plain)
251  *          FLOW-ENTRY
252  *          KEY
253  *          SCALAR("a complex key",plain)
254  *          VALUE
255  *          SCALAR("another value",plain)
256  *          FLOW-ENTRY
257  *          FLOW-MAPPING-END
258  *          STREAM-END
259  *
260  * A simple key is a key which is not denoted by the '?' indicator.  Note that
261  * the Scanner still produce the KEY token whenever it encounters a simple key.
262  *
263  * For scanning block collections, the following tokens are used (note that we
264  * repeat KEY and VALUE here):
265  *
266  *      BLOCK-SEQUENCE-START
267  *      BLOCK-MAPPING-START
268  *      BLOCK-END
269  *      BLOCK-ENTRY
270  *      KEY
271  *      VALUE
272  *
273  * The tokens BLOCK-SEQUENCE-START and BLOCK-MAPPING-START denote indentation
274  * increase that precedes a block collection (cf. the INDENT token in Python).
275  * The token BLOCK-END denote indentation decrease that ends a block collection
276  * (cf. the DEDENT token in Python).  However YAML has some syntax peculiarities
277  * that makes detections of these tokens more complex.
278  *
279  * The tokens BLOCK-ENTRY, KEY, and VALUE are used to represent the indicators
280  * '-', '?', and ':' correspondingly.
281  *
282  * The following examples show how the tokens BLOCK-SEQUENCE-START,
283  * BLOCK-MAPPING-START, and BLOCK-END are emitted by the Scanner:
284  *
285  *      1. Block sequences:
286  *
287  *          - item 1
288  *          - item 2
289  *          -
290  *            - item 3.1
291  *            - item 3.2
292  *          -
293  *            key 1: value 1
294  *            key 2: value 2
295  *
296  *      Tokens:
297  *
298  *          STREAM-START(utf-8)
299  *          BLOCK-SEQUENCE-START
300  *          BLOCK-ENTRY
301  *          SCALAR("item 1",plain)
302  *          BLOCK-ENTRY
303  *          SCALAR("item 2",plain)
304  *          BLOCK-ENTRY
305  *          BLOCK-SEQUENCE-START
306  *          BLOCK-ENTRY
307  *          SCALAR("item 3.1",plain)
308  *          BLOCK-ENTRY
309  *          SCALAR("item 3.2",plain)
310  *          BLOCK-END
311  *          BLOCK-ENTRY
312  *          BLOCK-MAPPING-START
313  *          KEY
314  *          SCALAR("key 1",plain)
315  *          VALUE
316  *          SCALAR("value 1",plain)
317  *          KEY
318  *          SCALAR("key 2",plain)
319  *          VALUE
320  *          SCALAR("value 2",plain)
321  *          BLOCK-END
322  *          BLOCK-END
323  *          STREAM-END
324  *
325  *      2. Block mappings:
326  *
327  *          a simple key: a value   # The KEY token is produced here.
328  *          ? a complex key
329  *          : another value
330  *          a mapping:
331  *            key 1: value 1
332  *            key 2: value 2
333  *          a sequence:
334  *            - item 1
335  *            - item 2
336  *
337  *      Tokens:
338  *
339  *          STREAM-START(utf-8)
340  *          BLOCK-MAPPING-START
341  *          KEY
342  *          SCALAR("a simple key",plain)
343  *          VALUE
344  *          SCALAR("a value",plain)
345  *          KEY
346  *          SCALAR("a complex key",plain)
347  *          VALUE
348  *          SCALAR("another value",plain)
349  *          KEY
350  *          SCALAR("a mapping",plain)
351  *          VALUE
352  *          BLOCK-MAPPING-START
353  *          KEY
354  *          SCALAR("key 1",plain)
355  *          VALUE
356  *          SCALAR("value 1",plain)
357  *          KEY
358  *          SCALAR("key 2",plain)
359  *          VALUE
360  *          SCALAR("value 2",plain)
361  *          BLOCK-END
362  *          KEY
363  *          SCALAR("a sequence",plain)
364  *          VALUE
365  *          BLOCK-SEQUENCE-START
366  *          BLOCK-ENTRY
367  *          SCALAR("item 1",plain)
368  *          BLOCK-ENTRY
369  *          SCALAR("item 2",plain)
370  *          BLOCK-END
371  *          BLOCK-END
372  *          STREAM-END
373  *
374  * YAML does not always require to start a new block collection from a new
375  * line.  If the current line contains only '-', '?', and ':' indicators, a new
376  * block collection may start at the current line.  The following examples
377  * illustrate this case:
378  *
379  *      1. Collections in a sequence:
380  *
381  *          - - item 1
382  *            - item 2
383  *          - key 1: value 1
384  *            key 2: value 2
385  *          - ? complex key
386  *            : complex value
387  *
388  *      Tokens:
389  *
390  *          STREAM-START(utf-8)
391  *          BLOCK-SEQUENCE-START
392  *          BLOCK-ENTRY
393  *          BLOCK-SEQUENCE-START
394  *          BLOCK-ENTRY
395  *          SCALAR("item 1",plain)
396  *          BLOCK-ENTRY
397  *          SCALAR("item 2",plain)
398  *          BLOCK-END
399  *          BLOCK-ENTRY
400  *          BLOCK-MAPPING-START
401  *          KEY
402  *          SCALAR("key 1",plain)
403  *          VALUE
404  *          SCALAR("value 1",plain)
405  *          KEY
406  *          SCALAR("key 2",plain)
407  *          VALUE
408  *          SCALAR("value 2",plain)
409  *          BLOCK-END
410  *          BLOCK-ENTRY
411  *          BLOCK-MAPPING-START
412  *          KEY
413  *          SCALAR("complex key")
414  *          VALUE
415  *          SCALAR("complex value")
416  *          BLOCK-END
417  *          BLOCK-END
418  *          STREAM-END
419  *
420  *      2. Collections in a mapping:
421  *
422  *          ? a sequence
423  *          : - item 1
424  *            - item 2
425  *          ? a mapping
426  *          : key 1: value 1
427  *            key 2: value 2
428  *
429  *      Tokens:
430  *
431  *          STREAM-START(utf-8)
432  *          BLOCK-MAPPING-START
433  *          KEY
434  *          SCALAR("a sequence",plain)
435  *          VALUE
436  *          BLOCK-SEQUENCE-START
437  *          BLOCK-ENTRY
438  *          SCALAR("item 1",plain)
439  *          BLOCK-ENTRY
440  *          SCALAR("item 2",plain)
441  *          BLOCK-END
442  *          KEY
443  *          SCALAR("a mapping",plain)
444  *          VALUE
445  *          BLOCK-MAPPING-START
446  *          KEY
447  *          SCALAR("key 1",plain)
448  *          VALUE
449  *          SCALAR("value 1",plain)
450  *          KEY
451  *          SCALAR("key 2",plain)
452  *          VALUE
453  *          SCALAR("value 2",plain)
454  *          BLOCK-END
455  *          BLOCK-END
456  *          STREAM-END
457  *
458  * YAML also permits non-indented sequences if they are included into a block
459  * mapping.  In this case, the token BLOCK-SEQUENCE-START is not produced:
460  *
461  *      key:
462  *      - item 1    # BLOCK-SEQUENCE-START is NOT produced here.
463  *      - item 2
464  *
465  * Tokens:
466  *
467  *      STREAM-START(utf-8)
468  *      BLOCK-MAPPING-START
469  *      KEY
470  *      SCALAR("key",plain)
471  *      VALUE
472  *      BLOCK-ENTRY
473  *      SCALAR("item 1",plain)
474  *      BLOCK-ENTRY
475  *      SCALAR("item 2",plain)
476  *      BLOCK-END
477  */
478 
479 #include "yaml_private.h"
480 
481 /*
482  * Ensure that the buffer contains the required number of characters.
483  * Return 1 on success, 0 on failure (reader error or memory error).
484  */
485 
486 #define CACHE(parser,length)                                                    \
487     (parser->unread >= (length)                                                 \
488         ? 1                                                                     \
489         : yaml_parser_update_buffer(parser, (length)))
490 
491 /*
492  * Advance the buffer pointer.
493  */
494 
495 #define SKIP(parser)                                                            \
496      (parser->mark.index ++,                                                    \
497       parser->mark.column ++,                                                   \
498       parser->unread --,                                                        \
499       parser->buffer.pointer += WIDTH(parser->buffer))
500 
501 #define SKIP_LINE(parser)                                                       \
502      (IS_CRLF(parser->buffer) ?                                                 \
503       (parser->mark.index += 2,                                                 \
504        parser->mark.column = 0,                                                 \
505        parser->mark.line ++,                                                    \
506        parser->unread -= 2,                                                     \
507        parser->buffer.pointer += 2) :                                           \
508       IS_BREAK(parser->buffer) ?                                                \
509       (parser->mark.index ++,                                                   \
510        parser->mark.column = 0,                                                 \
511        parser->mark.line ++,                                                    \
512        parser->unread --,                                                       \
513        parser->buffer.pointer += WIDTH(parser->buffer)) : NULL)
514 
515 /*
516  * Copy a character to a string buffer and advance pointers.
517  */
518 
519 #define READ(parser,string)                                                     \
520      (STRING_EXTEND(parser,string) ?                                            \
521          (COPY(string,parser->buffer),                                          \
522           parser->mark.index ++,                                                \
523           parser->mark.column ++,                                               \
524           parser->unread --,                                                    \
525           1) : 0)
526 
527 /*
528  * Copy a line break character to a string buffer and advance pointers.
529  */
530 
531 #define READ_LINE(parser,string)                                                \
532     (STRING_EXTEND(parser,string) ?                                             \
533     (((CHECK_AT(parser->buffer,'\r',0)                                          \
534        && CHECK_AT(parser->buffer,'\n',1)) ?        /* CR LF -> LF */           \
535      (*((string).pointer++) = (yaml_char_t) '\n',                               \
536       parser->buffer.pointer += 2,                                              \
537       parser->mark.index += 2,                                                  \
538       parser->mark.column = 0,                                                  \
539       parser->mark.line ++,                                                     \
540       parser->unread -= 2) :                                                    \
541      (CHECK_AT(parser->buffer,'\r',0)                                           \
542       || CHECK_AT(parser->buffer,'\n',0)) ?         /* CR|LF -> LF */           \
543      (*((string).pointer++) = (yaml_char_t) '\n',                               \
544       parser->buffer.pointer ++,                                                \
545       parser->mark.index ++,                                                    \
546       parser->mark.column = 0,                                                  \
547       parser->mark.line ++,                                                     \
548       parser->unread --) :                                                      \
549      (CHECK_AT(parser->buffer,'\xC2',0)                                         \
550       && CHECK_AT(parser->buffer,'\x85',1)) ?       /* NEL -> LF */             \
551      (*((string).pointer++) = (yaml_char_t) '\n',                               \
552       parser->buffer.pointer += 2,                                              \
553       parser->mark.index ++,                                                    \
554       parser->mark.column = 0,                                                  \
555       parser->mark.line ++,                                                     \
556       parser->unread --) :                                                      \
557      (CHECK_AT(parser->buffer,'\xE2',0) &&                                      \
558       CHECK_AT(parser->buffer,'\x80',1) &&                                      \
559       (CHECK_AT(parser->buffer,'\xA8',2) ||                                     \
560        CHECK_AT(parser->buffer,'\xA9',2))) ?        /* LS|PS -> LS|PS */        \
561      (*((string).pointer++) = *(parser->buffer.pointer++),                      \
562       *((string).pointer++) = *(parser->buffer.pointer++),                      \
563       *((string).pointer++) = *(parser->buffer.pointer++),                      \
564       parser->mark.index ++,                                                    \
565       parser->mark.column = 0,                                                  \
566       parser->mark.line ++,                                                     \
567       parser->unread --) : 0),                                                  \
568     1) : 0)
569 
570 /*
571  * Public API declarations.
572  */
573 
574 YAML_DECLARE(int)
575 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token);
576 
577 /*
578  * Error handling.
579  */
580 
581 static int
582 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
583         yaml_mark_t context_mark, const char *problem);
584 
585 /*
586  * High-level token API.
587  */
588 
589 YAML_DECLARE(int)
590 yaml_parser_fetch_more_tokens(yaml_parser_t *parser);
591 
592 static int
593 yaml_parser_fetch_next_token(yaml_parser_t *parser);
594 
595 /*
596  * Potential simple keys.
597  */
598 
599 static int
600 yaml_parser_stale_simple_keys(yaml_parser_t *parser);
601 
602 static int
603 yaml_parser_save_simple_key(yaml_parser_t *parser);
604 
605 static int
606 yaml_parser_remove_simple_key(yaml_parser_t *parser);
607 
608 static int
609 yaml_parser_increase_flow_level(yaml_parser_t *parser);
610 
611 static int
612 yaml_parser_decrease_flow_level(yaml_parser_t *parser);
613 
614 /*
615  * Indentation treatment.
616  */
617 
618 static int
619 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
620         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark);
621 
622 static int
623 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column);
624 
625 /*
626  * Token fetchers.
627  */
628 
629 static int
630 yaml_parser_fetch_stream_start(yaml_parser_t *parser);
631 
632 static int
633 yaml_parser_fetch_stream_end(yaml_parser_t *parser);
634 
635 static int
636 yaml_parser_fetch_directive(yaml_parser_t *parser);
637 
638 static int
639 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
640         yaml_token_type_t type);
641 
642 static int
643 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
644         yaml_token_type_t type);
645 
646 static int
647 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
648         yaml_token_type_t type);
649 
650 static int
651 yaml_parser_fetch_flow_entry(yaml_parser_t *parser);
652 
653 static int
654 yaml_parser_fetch_block_entry(yaml_parser_t *parser);
655 
656 static int
657 yaml_parser_fetch_key(yaml_parser_t *parser);
658 
659 static int
660 yaml_parser_fetch_value(yaml_parser_t *parser);
661 
662 static int
663 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type);
664 
665 static int
666 yaml_parser_fetch_tag(yaml_parser_t *parser);
667 
668 static int
669 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal);
670 
671 static int
672 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single);
673 
674 static int
675 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser);
676 
677 /*
678  * Token scanners.
679  */
680 
681 static int
682 yaml_parser_scan_to_next_token(yaml_parser_t *parser);
683 
684 static int
685 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token);
686 
687 static int
688 yaml_parser_scan_directive_name(yaml_parser_t *parser,
689         yaml_mark_t start_mark, yaml_char_t **name);
690 
691 static int
692 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
693         yaml_mark_t start_mark, int *major, int *minor);
694 
695 static int
696 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
697         yaml_mark_t start_mark, int *number);
698 
699 static int
700 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
701         yaml_mark_t mark, yaml_char_t **handle, yaml_char_t **prefix);
702 
703 static int
704 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
705         yaml_token_type_t type);
706 
707 static int
708 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token);
709 
710 static int
711 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
712         yaml_mark_t start_mark, yaml_char_t **handle);
713 
714 static int
715 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
716         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri);
717 
718 static int
719 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
720         yaml_mark_t start_mark, yaml_string_t *string);
721 
722 static int
723 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
724         int literal);
725 
726 static int
727 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
728         int *indent, yaml_string_t *breaks,
729         yaml_mark_t start_mark, yaml_mark_t *end_mark);
730 
731 static int
732 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
733         int single);
734 
735 static int
736 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token);
737 
738 /*
739  * Get the next token.
740  */
741 
742 YAML_DECLARE(int)
yaml_parser_scan(yaml_parser_t * parser,yaml_token_t * token)743 yaml_parser_scan(yaml_parser_t *parser, yaml_token_t *token)
744 {
745     assert(parser); /* Non-NULL parser object is expected. */
746     assert(token);  /* Non-NULL token object is expected. */
747 
748     /* Erase the token object. */
749 
750     memset(token, 0, sizeof(yaml_token_t));
751 
752     /* No tokens after STREAM-END or error. */
753 
754     if (parser->stream_end_produced || parser->error) return 1;
755 
756     /* Ensure that the tokens queue contains enough tokens. */
757 
758     if (!parser->token_available) {
759         if (!yaml_parser_fetch_more_tokens(parser))
760             return 0;
761     }
762 
763     /* Fetch the next token from the queue. */
764 
765     *token = DEQUEUE(parser, parser->tokens);
766     parser->token_available = 0;
767     parser->tokens_parsed ++;
768 
769     if (token->type == YAML_STREAM_END_TOKEN) parser->stream_end_produced = 1;
770 
771     return 1;
772 }
773 
774 /*
775  * Set the scanner error and return 0.
776  */
777 
778 static int
yaml_parser_set_scanner_error(yaml_parser_t * parser,const char * context,yaml_mark_t context_mark,const char * problem)779 yaml_parser_set_scanner_error(yaml_parser_t *parser, const char *context,
780         yaml_mark_t context_mark, const char *problem)
781 {
782     parser->error = YAML_SCANNER_ERROR;
783     parser->context = context;
784     parser->context_mark = context_mark;
785     parser->problem = problem;
786     parser->problem_mark = parser->mark;
787 
788     return 0;
789 }
790 
791 /*
792  * Ensure that the tokens queue contains at least one token which can be
793  * returned to the Parser.
794  */
795 
796 YAML_DECLARE(int)
yaml_parser_fetch_more_tokens(yaml_parser_t * parser)797 yaml_parser_fetch_more_tokens(yaml_parser_t *parser)
798 {
799     int need_more_tokens;
800 
801     /* While we need more tokens to fetch, do it. */
802 
803     while (1)
804     {
805         /*
806          * Check if we really need to fetch more tokens.
807          */
808 
809         need_more_tokens = 0;
810 
811         if (parser->tokens.head == parser->tokens.tail)
812         {
813             /* Queue is empty. */
814 
815             need_more_tokens = 1;
816         }
817         else
818         {
819             yaml_simple_key_t *simple_key;
820 
821             /* Check if any potential simple key may occupy the head position. */
822 
823             if (!yaml_parser_stale_simple_keys(parser))
824                 return 0;
825 
826             for (simple_key = parser->simple_keys.start;
827                     simple_key != parser->simple_keys.top; simple_key++) {
828                 if (simple_key->possible
829                         && simple_key->token_number == parser->tokens_parsed) {
830                     need_more_tokens = 1;
831                     break;
832                 }
833             }
834         }
835 
836         /* We are finished. */
837 
838         if (!need_more_tokens)
839             break;
840 
841         /* Fetch the next token. */
842 
843         if (!yaml_parser_fetch_next_token(parser))
844             return 0;
845     }
846 
847     parser->token_available = 1;
848 
849     return 1;
850 }
851 
852 /*
853  * The dispatcher for token fetchers.
854  */
855 
856 static int
yaml_parser_fetch_next_token(yaml_parser_t * parser)857 yaml_parser_fetch_next_token(yaml_parser_t *parser)
858 {
859     /* Ensure that the buffer is initialized. */
860 
861     if (!CACHE(parser, 1))
862         return 0;
863 
864     /* Check if we just started scanning.  Fetch STREAM-START then. */
865 
866     if (!parser->stream_start_produced)
867         return yaml_parser_fetch_stream_start(parser);
868 
869     /* Eat whitespaces and comments until we reach the next token. */
870 
871     if (!yaml_parser_scan_to_next_token(parser))
872         return 0;
873 
874     /* Remove obsolete potential simple keys. */
875 
876     if (!yaml_parser_stale_simple_keys(parser))
877         return 0;
878 
879     /* Check the indentation level against the current column. */
880 
881     if (!yaml_parser_unroll_indent(parser, parser->mark.column))
882         return 0;
883 
884     /*
885      * Ensure that the buffer contains at least 4 characters.  4 is the length
886      * of the longest indicators ('--- ' and '... ').
887      */
888 
889     if (!CACHE(parser, 4))
890         return 0;
891 
892     /* Is it the end of the stream? */
893 
894     if (IS_Z(parser->buffer))
895         return yaml_parser_fetch_stream_end(parser);
896 
897     /* Is it a directive? */
898 
899     if (parser->mark.column == 0 && CHECK(parser->buffer, '%'))
900         return yaml_parser_fetch_directive(parser);
901 
902     /* Is it the document start indicator? */
903 
904     if (parser->mark.column == 0
905             && CHECK_AT(parser->buffer, '-', 0)
906             && CHECK_AT(parser->buffer, '-', 1)
907             && CHECK_AT(parser->buffer, '-', 2)
908             && IS_BLANKZ_AT(parser->buffer, 3))
909         return yaml_parser_fetch_document_indicator(parser,
910                 YAML_DOCUMENT_START_TOKEN);
911 
912     /* Is it the document end indicator? */
913 
914     if (parser->mark.column == 0
915             && CHECK_AT(parser->buffer, '.', 0)
916             && CHECK_AT(parser->buffer, '.', 1)
917             && CHECK_AT(parser->buffer, '.', 2)
918             && IS_BLANKZ_AT(parser->buffer, 3))
919         return yaml_parser_fetch_document_indicator(parser,
920                 YAML_DOCUMENT_END_TOKEN);
921 
922     /* Is it the flow sequence start indicator? */
923 
924     if (CHECK(parser->buffer, '['))
925         return yaml_parser_fetch_flow_collection_start(parser,
926                 YAML_FLOW_SEQUENCE_START_TOKEN);
927 
928     /* Is it the flow mapping start indicator? */
929 
930     if (CHECK(parser->buffer, '{'))
931         return yaml_parser_fetch_flow_collection_start(parser,
932                 YAML_FLOW_MAPPING_START_TOKEN);
933 
934     /* Is it the flow sequence end indicator? */
935 
936     if (CHECK(parser->buffer, ']'))
937         return yaml_parser_fetch_flow_collection_end(parser,
938                 YAML_FLOW_SEQUENCE_END_TOKEN);
939 
940     /* Is it the flow mapping end indicator? */
941 
942     if (CHECK(parser->buffer, '}'))
943         return yaml_parser_fetch_flow_collection_end(parser,
944                 YAML_FLOW_MAPPING_END_TOKEN);
945 
946     /* Is it the flow entry indicator? */
947 
948     if (CHECK(parser->buffer, ','))
949         return yaml_parser_fetch_flow_entry(parser);
950 
951     /* Is it the block entry indicator? */
952 
953     if (CHECK(parser->buffer, '-') && IS_BLANKZ_AT(parser->buffer, 1))
954         return yaml_parser_fetch_block_entry(parser);
955 
956     /* Is it the key indicator? */
957 
958     if (CHECK(parser->buffer, '?')
959             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
960         return yaml_parser_fetch_key(parser);
961 
962     /* Is it the value indicator? */
963 
964     if (CHECK(parser->buffer, ':')
965             && (parser->flow_level || IS_BLANKZ_AT(parser->buffer, 1)))
966         return yaml_parser_fetch_value(parser);
967 
968     /* Is it an alias? */
969 
970     if (CHECK(parser->buffer, '*'))
971         return yaml_parser_fetch_anchor(parser, YAML_ALIAS_TOKEN);
972 
973     /* Is it an anchor? */
974 
975     if (CHECK(parser->buffer, '&'))
976         return yaml_parser_fetch_anchor(parser, YAML_ANCHOR_TOKEN);
977 
978     /* Is it a tag? */
979 
980     if (CHECK(parser->buffer, '!'))
981         return yaml_parser_fetch_tag(parser);
982 
983     /* Is it a literal scalar? */
984 
985     if (CHECK(parser->buffer, '|') && !parser->flow_level)
986         return yaml_parser_fetch_block_scalar(parser, 1);
987 
988     /* Is it a folded scalar? */
989 
990     if (CHECK(parser->buffer, '>') && !parser->flow_level)
991         return yaml_parser_fetch_block_scalar(parser, 0);
992 
993     /* Is it a single-quoted scalar? */
994 
995     if (CHECK(parser->buffer, '\''))
996         return yaml_parser_fetch_flow_scalar(parser, 1);
997 
998     /* Is it a double-quoted scalar? */
999 
1000     if (CHECK(parser->buffer, '"'))
1001         return yaml_parser_fetch_flow_scalar(parser, 0);
1002 
1003     /*
1004      * Is it a plain scalar?
1005      *
1006      * A plain scalar may start with any non-blank characters except
1007      *
1008      *      '-', '?', ':', ',', '[', ']', '{', '}',
1009      *      '#', '&', '*', '!', '|', '>', '\'', '\"',
1010      *      '%', '@', '`'.
1011      *
1012      * In the block context (and, for the '-' indicator, in the flow context
1013      * too), it may also start with the characters
1014      *
1015      *      '-', '?', ':'
1016      *
1017      * if it is followed by a non-space character.
1018      *
1019      * The last rule is more restrictive than the specification requires.
1020      */
1021 
1022     if (!(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '-')
1023                 || CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':')
1024                 || CHECK(parser->buffer, ',') || CHECK(parser->buffer, '[')
1025                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
1026                 || CHECK(parser->buffer, '}') || CHECK(parser->buffer, '#')
1027                 || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '*')
1028                 || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '|')
1029                 || CHECK(parser->buffer, '>') || CHECK(parser->buffer, '\'')
1030                 || CHECK(parser->buffer, '"') || CHECK(parser->buffer, '%')
1031                 || CHECK(parser->buffer, '@') || CHECK(parser->buffer, '`')) ||
1032             (CHECK(parser->buffer, '-') && !IS_BLANK_AT(parser->buffer, 1)) ||
1033             (!parser->flow_level &&
1034              (CHECK(parser->buffer, '?') || CHECK(parser->buffer, ':'))
1035              && !IS_BLANKZ_AT(parser->buffer, 1)))
1036         return yaml_parser_fetch_plain_scalar(parser);
1037 
1038     /*
1039      * If we don't determine the token type so far, it is an error.
1040      */
1041 
1042     return yaml_parser_set_scanner_error(parser,
1043             "while scanning for the next token", parser->mark,
1044             "found character that cannot start any token");
1045 }
1046 
1047 /*
1048  * Check the list of potential simple keys and remove the positions that
1049  * cannot contain simple keys anymore.
1050  */
1051 
1052 static int
yaml_parser_stale_simple_keys(yaml_parser_t * parser)1053 yaml_parser_stale_simple_keys(yaml_parser_t *parser)
1054 {
1055     yaml_simple_key_t *simple_key;
1056 
1057     /* Check for a potential simple key for each flow level. */
1058 
1059     for (simple_key = parser->simple_keys.start;
1060             simple_key != parser->simple_keys.top; simple_key ++)
1061     {
1062         /*
1063          * The specification requires that a simple key
1064          *
1065          *  - is limited to a single line,
1066          *  - is shorter than 1024 characters.
1067          */
1068 
1069         if (simple_key->possible
1070                 && (simple_key->mark.line < parser->mark.line
1071                     || simple_key->mark.index+1024 < parser->mark.index)) {
1072 
1073             /* Check if the potential simple key to be removed is required. */
1074 
1075             if (simple_key->required) {
1076                 return yaml_parser_set_scanner_error(parser,
1077                         "while scanning a simple key", simple_key->mark,
1078                         "could not find expected ':'");
1079             }
1080 
1081             simple_key->possible = 0;
1082         }
1083     }
1084 
1085     return 1;
1086 }
1087 
1088 /*
1089  * Check if a simple key may start at the current position and add it if
1090  * needed.
1091  */
1092 
1093 static int
yaml_parser_save_simple_key(yaml_parser_t * parser)1094 yaml_parser_save_simple_key(yaml_parser_t *parser)
1095 {
1096     /*
1097      * A simple key is required at the current position if the scanner is in
1098      * the block context and the current column coincides with the indentation
1099      * level.
1100      */
1101 
1102     int required = (!parser->flow_level
1103             && parser->indent == (ptrdiff_t)parser->mark.column);
1104 
1105     /*
1106      * If the current position may start a simple key, save it.
1107      */
1108 
1109     if (parser->simple_key_allowed)
1110     {
1111         yaml_simple_key_t simple_key;
1112         simple_key.possible = 1;
1113         simple_key.required = required;
1114         simple_key.token_number =
1115             parser->tokens_parsed + (parser->tokens.tail - parser->tokens.head);
1116         simple_key.mark = parser->mark;
1117 
1118         if (!yaml_parser_remove_simple_key(parser)) return 0;
1119 
1120         *(parser->simple_keys.top-1) = simple_key;
1121     }
1122 
1123     return 1;
1124 }
1125 
1126 /*
1127  * Remove a potential simple key at the current flow level.
1128  */
1129 
1130 static int
yaml_parser_remove_simple_key(yaml_parser_t * parser)1131 yaml_parser_remove_simple_key(yaml_parser_t *parser)
1132 {
1133     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1134 
1135     if (simple_key->possible)
1136     {
1137         /* If the key is required, it is an error. */
1138 
1139         if (simple_key->required) {
1140             return yaml_parser_set_scanner_error(parser,
1141                     "while scanning a simple key", simple_key->mark,
1142                     "could not find expected ':'");
1143         }
1144     }
1145 
1146     /* Remove the key from the stack. */
1147 
1148     simple_key->possible = 0;
1149 
1150     return 1;
1151 }
1152 
1153 /*
1154  * Increase the flow level and resize the simple key list if needed.
1155  */
1156 
1157 static int
yaml_parser_increase_flow_level(yaml_parser_t * parser)1158 yaml_parser_increase_flow_level(yaml_parser_t *parser)
1159 {
1160     yaml_simple_key_t empty_simple_key = { 0, 0, 0, { 0, 0, 0 } };
1161 
1162     /* Reset the simple key on the next level. */
1163 
1164     if (!PUSH(parser, parser->simple_keys, empty_simple_key))
1165         return 0;
1166 
1167     /* Increase the flow level. */
1168 
1169     if (parser->flow_level == INT_MAX) {
1170         parser->error = YAML_MEMORY_ERROR;
1171         return 0;
1172     }
1173 
1174     parser->flow_level++;
1175 
1176     return 1;
1177 }
1178 
1179 /*
1180  * Decrease the flow level.
1181  */
1182 
1183 static int
yaml_parser_decrease_flow_level(yaml_parser_t * parser)1184 yaml_parser_decrease_flow_level(yaml_parser_t *parser)
1185 {
1186     if (parser->flow_level) {
1187         parser->flow_level --;
1188         (void)POP(parser, parser->simple_keys);
1189     }
1190 
1191     return 1;
1192 }
1193 
1194 /*
1195  * Push the current indentation level to the stack and set the new level
1196  * the current column is greater than the indentation level.  In this case,
1197  * append or insert the specified token into the token queue.
1198  *
1199  */
1200 
1201 static int
yaml_parser_roll_indent(yaml_parser_t * parser,ptrdiff_t column,ptrdiff_t number,yaml_token_type_t type,yaml_mark_t mark)1202 yaml_parser_roll_indent(yaml_parser_t *parser, ptrdiff_t column,
1203         ptrdiff_t number, yaml_token_type_t type, yaml_mark_t mark)
1204 {
1205     yaml_token_t token;
1206 
1207     /* In the flow context, do nothing. */
1208 
1209     if (parser->flow_level)
1210         return 1;
1211 
1212     if (parser->indent < column)
1213     {
1214         /*
1215          * Push the current indentation level to the stack and set the new
1216          * indentation level.
1217          */
1218 
1219         if (!PUSH(parser, parser->indents, parser->indent))
1220             return 0;
1221 
1222         if (column > INT_MAX) {
1223             parser->error = YAML_MEMORY_ERROR;
1224             return 0;
1225         }
1226 
1227         parser->indent = column;
1228 
1229         /* Create a token and insert it into the queue. */
1230 
1231         TOKEN_INIT(token, type, mark, mark);
1232 
1233         if (number == -1) {
1234             if (!ENQUEUE(parser, parser->tokens, token))
1235                 return 0;
1236         }
1237         else {
1238             if (!QUEUE_INSERT(parser,
1239                         parser->tokens, number - parser->tokens_parsed, token))
1240                 return 0;
1241         }
1242     }
1243 
1244     return 1;
1245 }
1246 
1247 /*
1248  * Pop indentation levels from the indents stack until the current level
1249  * becomes less or equal to the column.  For each indentation level, append
1250  * the BLOCK-END token.
1251  */
1252 
1253 
1254 static int
yaml_parser_unroll_indent(yaml_parser_t * parser,ptrdiff_t column)1255 yaml_parser_unroll_indent(yaml_parser_t *parser, ptrdiff_t column)
1256 {
1257     yaml_token_t token;
1258 
1259     /* In the flow context, do nothing. */
1260 
1261     if (parser->flow_level)
1262         return 1;
1263 
1264     /* Loop through the indentation levels in the stack. */
1265 
1266     while (parser->indent > column)
1267     {
1268         /* Create a token and append it to the queue. */
1269 
1270         TOKEN_INIT(token, YAML_BLOCK_END_TOKEN, parser->mark, parser->mark);
1271 
1272         if (!ENQUEUE(parser, parser->tokens, token))
1273             return 0;
1274 
1275         /* Pop the indentation level. */
1276 
1277         parser->indent = POP(parser, parser->indents);
1278     }
1279 
1280     return 1;
1281 }
1282 
1283 /*
1284  * Initialize the scanner and produce the STREAM-START token.
1285  */
1286 
1287 static int
yaml_parser_fetch_stream_start(yaml_parser_t * parser)1288 yaml_parser_fetch_stream_start(yaml_parser_t *parser)
1289 {
1290     yaml_simple_key_t simple_key = { 0, 0, 0, { 0, 0, 0 } };
1291     yaml_token_t token;
1292 
1293     /* Set the initial indentation. */
1294 
1295     parser->indent = -1;
1296 
1297     /* Initialize the simple key stack. */
1298 
1299     if (!PUSH(parser, parser->simple_keys, simple_key))
1300         return 0;
1301 
1302     /* A simple key is allowed at the beginning of the stream. */
1303 
1304     parser->simple_key_allowed = 1;
1305 
1306     /* We have started. */
1307 
1308     parser->stream_start_produced = 1;
1309 
1310     /* Create the STREAM-START token and append it to the queue. */
1311 
1312     STREAM_START_TOKEN_INIT(token, parser->encoding,
1313             parser->mark, parser->mark);
1314 
1315     if (!ENQUEUE(parser, parser->tokens, token))
1316         return 0;
1317 
1318     return 1;
1319 }
1320 
1321 /*
1322  * Produce the STREAM-END token and shut down the scanner.
1323  */
1324 
1325 static int
yaml_parser_fetch_stream_end(yaml_parser_t * parser)1326 yaml_parser_fetch_stream_end(yaml_parser_t *parser)
1327 {
1328     yaml_token_t token;
1329 
1330     /* Force new line. */
1331 
1332     if (parser->mark.column != 0) {
1333         parser->mark.column = 0;
1334         parser->mark.line ++;
1335     }
1336 
1337     /* Reset the indentation level. */
1338 
1339     if (!yaml_parser_unroll_indent(parser, -1))
1340         return 0;
1341 
1342     /* Reset simple keys. */
1343 
1344     if (!yaml_parser_remove_simple_key(parser))
1345         return 0;
1346 
1347     parser->simple_key_allowed = 0;
1348 
1349     /* Create the STREAM-END token and append it to the queue. */
1350 
1351     STREAM_END_TOKEN_INIT(token, parser->mark, parser->mark);
1352 
1353     if (!ENQUEUE(parser, parser->tokens, token))
1354         return 0;
1355 
1356     return 1;
1357 }
1358 
1359 /*
1360  * Produce a VERSION-DIRECTIVE or TAG-DIRECTIVE token.
1361  */
1362 
1363 static int
yaml_parser_fetch_directive(yaml_parser_t * parser)1364 yaml_parser_fetch_directive(yaml_parser_t *parser)
1365 {
1366     yaml_token_t token;
1367 
1368     /* Reset the indentation level. */
1369 
1370     if (!yaml_parser_unroll_indent(parser, -1))
1371         return 0;
1372 
1373     /* Reset simple keys. */
1374 
1375     if (!yaml_parser_remove_simple_key(parser))
1376         return 0;
1377 
1378     parser->simple_key_allowed = 0;
1379 
1380     /* Create the YAML-DIRECTIVE or TAG-DIRECTIVE token. */
1381 
1382     if (!yaml_parser_scan_directive(parser, &token))
1383         return 0;
1384 
1385     /* Append the token to the queue. */
1386 
1387     if (!ENQUEUE(parser, parser->tokens, token)) {
1388         yaml_token_delete(&token);
1389         return 0;
1390     }
1391 
1392     return 1;
1393 }
1394 
1395 /*
1396  * Produce the DOCUMENT-START or DOCUMENT-END token.
1397  */
1398 
1399 static int
yaml_parser_fetch_document_indicator(yaml_parser_t * parser,yaml_token_type_t type)1400 yaml_parser_fetch_document_indicator(yaml_parser_t *parser,
1401         yaml_token_type_t type)
1402 {
1403     yaml_mark_t start_mark, end_mark;
1404     yaml_token_t token;
1405 
1406     /* Reset the indentation level. */
1407 
1408     if (!yaml_parser_unroll_indent(parser, -1))
1409         return 0;
1410 
1411     /* Reset simple keys. */
1412 
1413     if (!yaml_parser_remove_simple_key(parser))
1414         return 0;
1415 
1416     parser->simple_key_allowed = 0;
1417 
1418     /* Consume the token. */
1419 
1420     start_mark = parser->mark;
1421 
1422     SKIP(parser);
1423     SKIP(parser);
1424     SKIP(parser);
1425 
1426     end_mark = parser->mark;
1427 
1428     /* Create the DOCUMENT-START or DOCUMENT-END token. */
1429 
1430     TOKEN_INIT(token, type, start_mark, end_mark);
1431 
1432     /* Append the token to the queue. */
1433 
1434     if (!ENQUEUE(parser, parser->tokens, token))
1435         return 0;
1436 
1437     return 1;
1438 }
1439 
1440 /*
1441  * Produce the FLOW-SEQUENCE-START or FLOW-MAPPING-START token.
1442  */
1443 
1444 static int
yaml_parser_fetch_flow_collection_start(yaml_parser_t * parser,yaml_token_type_t type)1445 yaml_parser_fetch_flow_collection_start(yaml_parser_t *parser,
1446         yaml_token_type_t type)
1447 {
1448     yaml_mark_t start_mark, end_mark;
1449     yaml_token_t token;
1450 
1451     /* The indicators '[' and '{' may start a simple key. */
1452 
1453     if (!yaml_parser_save_simple_key(parser))
1454         return 0;
1455 
1456     /* Increase the flow level. */
1457 
1458     if (!yaml_parser_increase_flow_level(parser))
1459         return 0;
1460 
1461     /* A simple key may follow the indicators '[' and '{'. */
1462 
1463     parser->simple_key_allowed = 1;
1464 
1465     /* Consume the token. */
1466 
1467     start_mark = parser->mark;
1468     SKIP(parser);
1469     end_mark = parser->mark;
1470 
1471     /* Create the FLOW-SEQUENCE-START of FLOW-MAPPING-START token. */
1472 
1473     TOKEN_INIT(token, type, start_mark, end_mark);
1474 
1475     /* Append the token to the queue. */
1476 
1477     if (!ENQUEUE(parser, parser->tokens, token))
1478         return 0;
1479 
1480     return 1;
1481 }
1482 
1483 /*
1484  * Produce the FLOW-SEQUENCE-END or FLOW-MAPPING-END token.
1485  */
1486 
1487 static int
yaml_parser_fetch_flow_collection_end(yaml_parser_t * parser,yaml_token_type_t type)1488 yaml_parser_fetch_flow_collection_end(yaml_parser_t *parser,
1489         yaml_token_type_t type)
1490 {
1491     yaml_mark_t start_mark, end_mark;
1492     yaml_token_t token;
1493 
1494     /* Reset any potential simple key on the current flow level. */
1495 
1496     if (!yaml_parser_remove_simple_key(parser))
1497         return 0;
1498 
1499     /* Decrease the flow level. */
1500 
1501     if (!yaml_parser_decrease_flow_level(parser))
1502         return 0;
1503 
1504     /* No simple keys after the indicators ']' and '}'. */
1505 
1506     parser->simple_key_allowed = 0;
1507 
1508     /* Consume the token. */
1509 
1510     start_mark = parser->mark;
1511     SKIP(parser);
1512     end_mark = parser->mark;
1513 
1514     /* Create the FLOW-SEQUENCE-END of FLOW-MAPPING-END token. */
1515 
1516     TOKEN_INIT(token, type, start_mark, end_mark);
1517 
1518     /* Append the token to the queue. */
1519 
1520     if (!ENQUEUE(parser, parser->tokens, token))
1521         return 0;
1522 
1523     return 1;
1524 }
1525 
1526 /*
1527  * Produce the FLOW-ENTRY token.
1528  */
1529 
1530 static int
yaml_parser_fetch_flow_entry(yaml_parser_t * parser)1531 yaml_parser_fetch_flow_entry(yaml_parser_t *parser)
1532 {
1533     yaml_mark_t start_mark, end_mark;
1534     yaml_token_t token;
1535 
1536     /* Reset any potential simple keys on the current flow level. */
1537 
1538     if (!yaml_parser_remove_simple_key(parser))
1539         return 0;
1540 
1541     /* Simple keys are allowed after ','. */
1542 
1543     parser->simple_key_allowed = 1;
1544 
1545     /* Consume the token. */
1546 
1547     start_mark = parser->mark;
1548     SKIP(parser);
1549     end_mark = parser->mark;
1550 
1551     /* Create the FLOW-ENTRY token and append it to the queue. */
1552 
1553     TOKEN_INIT(token, YAML_FLOW_ENTRY_TOKEN, start_mark, end_mark);
1554 
1555     if (!ENQUEUE(parser, parser->tokens, token))
1556         return 0;
1557 
1558     return 1;
1559 }
1560 
1561 /*
1562  * Produce the BLOCK-ENTRY token.
1563  */
1564 
1565 static int
yaml_parser_fetch_block_entry(yaml_parser_t * parser)1566 yaml_parser_fetch_block_entry(yaml_parser_t *parser)
1567 {
1568     yaml_mark_t start_mark, end_mark;
1569     yaml_token_t token;
1570 
1571     /* Check if the scanner is in the block context. */
1572 
1573     if (!parser->flow_level)
1574     {
1575         /* Check if we are allowed to start a new entry. */
1576 
1577         if (!parser->simple_key_allowed) {
1578             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1579                     "block sequence entries are not allowed in this context");
1580         }
1581 
1582         /* Add the BLOCK-SEQUENCE-START token if needed. */
1583 
1584         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1585                     YAML_BLOCK_SEQUENCE_START_TOKEN, parser->mark))
1586             return 0;
1587     }
1588     else
1589     {
1590         /*
1591          * It is an error for the '-' indicator to occur in the flow context,
1592          * but we let the Parser detect and report about it because the Parser
1593          * is able to point to the context.
1594          */
1595     }
1596 
1597     /* Reset any potential simple keys on the current flow level. */
1598 
1599     if (!yaml_parser_remove_simple_key(parser))
1600         return 0;
1601 
1602     /* Simple keys are allowed after '-'. */
1603 
1604     parser->simple_key_allowed = 1;
1605 
1606     /* Consume the token. */
1607 
1608     start_mark = parser->mark;
1609     SKIP(parser);
1610     end_mark = parser->mark;
1611 
1612     /* Create the BLOCK-ENTRY token and append it to the queue. */
1613 
1614     TOKEN_INIT(token, YAML_BLOCK_ENTRY_TOKEN, start_mark, end_mark);
1615 
1616     if (!ENQUEUE(parser, parser->tokens, token))
1617         return 0;
1618 
1619     return 1;
1620 }
1621 
1622 /*
1623  * Produce the KEY token.
1624  */
1625 
1626 static int
yaml_parser_fetch_key(yaml_parser_t * parser)1627 yaml_parser_fetch_key(yaml_parser_t *parser)
1628 {
1629     yaml_mark_t start_mark, end_mark;
1630     yaml_token_t token;
1631 
1632     /* In the block context, additional checks are required. */
1633 
1634     if (!parser->flow_level)
1635     {
1636         /* Check if we are allowed to start a new key (not necessary simple). */
1637 
1638         if (!parser->simple_key_allowed) {
1639             return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1640                     "mapping keys are not allowed in this context");
1641         }
1642 
1643         /* Add the BLOCK-MAPPING-START token if needed. */
1644 
1645         if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1646                     YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1647             return 0;
1648     }
1649 
1650     /* Reset any potential simple keys on the current flow level. */
1651 
1652     if (!yaml_parser_remove_simple_key(parser))
1653         return 0;
1654 
1655     /* Simple keys are allowed after '?' in the block context. */
1656 
1657     parser->simple_key_allowed = (!parser->flow_level);
1658 
1659     /* Consume the token. */
1660 
1661     start_mark = parser->mark;
1662     SKIP(parser);
1663     end_mark = parser->mark;
1664 
1665     /* Create the KEY token and append it to the queue. */
1666 
1667     TOKEN_INIT(token, YAML_KEY_TOKEN, start_mark, end_mark);
1668 
1669     if (!ENQUEUE(parser, parser->tokens, token))
1670         return 0;
1671 
1672     return 1;
1673 }
1674 
1675 /*
1676  * Produce the VALUE token.
1677  */
1678 
1679 static int
yaml_parser_fetch_value(yaml_parser_t * parser)1680 yaml_parser_fetch_value(yaml_parser_t *parser)
1681 {
1682     yaml_mark_t start_mark, end_mark;
1683     yaml_token_t token;
1684     yaml_simple_key_t *simple_key = parser->simple_keys.top-1;
1685 
1686     /* Have we found a simple key? */
1687 
1688     if (simple_key->possible)
1689     {
1690 
1691         /* Create the KEY token and insert it into the queue. */
1692 
1693         TOKEN_INIT(token, YAML_KEY_TOKEN, simple_key->mark, simple_key->mark);
1694 
1695         if (!QUEUE_INSERT(parser, parser->tokens,
1696                     simple_key->token_number - parser->tokens_parsed, token))
1697             return 0;
1698 
1699         /* In the block context, we may need to add the BLOCK-MAPPING-START token. */
1700 
1701         if (!yaml_parser_roll_indent(parser, simple_key->mark.column,
1702                     simple_key->token_number,
1703                     YAML_BLOCK_MAPPING_START_TOKEN, simple_key->mark))
1704             return 0;
1705 
1706         /* Remove the simple key. */
1707 
1708         simple_key->possible = 0;
1709 
1710         /* A simple key cannot follow another simple key. */
1711 
1712         parser->simple_key_allowed = 0;
1713     }
1714     else
1715     {
1716         /* The ':' indicator follows a complex key. */
1717 
1718         /* In the block context, extra checks are required. */
1719 
1720         if (!parser->flow_level)
1721         {
1722             /* Check if we are allowed to start a complex value. */
1723 
1724             if (!parser->simple_key_allowed) {
1725                 return yaml_parser_set_scanner_error(parser, NULL, parser->mark,
1726                         "mapping values are not allowed in this context");
1727             }
1728 
1729             /* Add the BLOCK-MAPPING-START token if needed. */
1730 
1731             if (!yaml_parser_roll_indent(parser, parser->mark.column, -1,
1732                         YAML_BLOCK_MAPPING_START_TOKEN, parser->mark))
1733                 return 0;
1734         }
1735 
1736         /* Simple keys after ':' are allowed in the block context. */
1737 
1738         parser->simple_key_allowed = (!parser->flow_level);
1739     }
1740 
1741     /* Consume the token. */
1742 
1743     start_mark = parser->mark;
1744     SKIP(parser);
1745     end_mark = parser->mark;
1746 
1747     /* Create the VALUE token and append it to the queue. */
1748 
1749     TOKEN_INIT(token, YAML_VALUE_TOKEN, start_mark, end_mark);
1750 
1751     if (!ENQUEUE(parser, parser->tokens, token))
1752         return 0;
1753 
1754     return 1;
1755 }
1756 
1757 /*
1758  * Produce the ALIAS or ANCHOR token.
1759  */
1760 
1761 static int
yaml_parser_fetch_anchor(yaml_parser_t * parser,yaml_token_type_t type)1762 yaml_parser_fetch_anchor(yaml_parser_t *parser, yaml_token_type_t type)
1763 {
1764     yaml_token_t token;
1765 
1766     /* An anchor or an alias could be a simple key. */
1767 
1768     if (!yaml_parser_save_simple_key(parser))
1769         return 0;
1770 
1771     /* A simple key cannot follow an anchor or an alias. */
1772 
1773     parser->simple_key_allowed = 0;
1774 
1775     /* Create the ALIAS or ANCHOR token and append it to the queue. */
1776 
1777     if (!yaml_parser_scan_anchor(parser, &token, type))
1778         return 0;
1779 
1780     if (!ENQUEUE(parser, parser->tokens, token)) {
1781         yaml_token_delete(&token);
1782         return 0;
1783     }
1784     return 1;
1785 }
1786 
1787 /*
1788  * Produce the TAG token.
1789  */
1790 
1791 static int
yaml_parser_fetch_tag(yaml_parser_t * parser)1792 yaml_parser_fetch_tag(yaml_parser_t *parser)
1793 {
1794     yaml_token_t token;
1795 
1796     /* A tag could be a simple key. */
1797 
1798     if (!yaml_parser_save_simple_key(parser))
1799         return 0;
1800 
1801     /* A simple key cannot follow a tag. */
1802 
1803     parser->simple_key_allowed = 0;
1804 
1805     /* Create the TAG token and append it to the queue. */
1806 
1807     if (!yaml_parser_scan_tag(parser, &token))
1808         return 0;
1809 
1810     if (!ENQUEUE(parser, parser->tokens, token)) {
1811         yaml_token_delete(&token);
1812         return 0;
1813     }
1814 
1815     return 1;
1816 }
1817 
1818 /*
1819  * Produce the SCALAR(...,literal) or SCALAR(...,folded) tokens.
1820  */
1821 
1822 static int
yaml_parser_fetch_block_scalar(yaml_parser_t * parser,int literal)1823 yaml_parser_fetch_block_scalar(yaml_parser_t *parser, int literal)
1824 {
1825     yaml_token_t token;
1826 
1827     /* Remove any potential simple keys. */
1828 
1829     if (!yaml_parser_remove_simple_key(parser))
1830         return 0;
1831 
1832     /* A simple key may follow a block scalar. */
1833 
1834     parser->simple_key_allowed = 1;
1835 
1836     /* Create the SCALAR token and append it to the queue. */
1837 
1838     if (!yaml_parser_scan_block_scalar(parser, &token, literal))
1839         return 0;
1840 
1841     if (!ENQUEUE(parser, parser->tokens, token)) {
1842         yaml_token_delete(&token);
1843         return 0;
1844     }
1845 
1846     return 1;
1847 }
1848 
1849 /*
1850  * Produce the SCALAR(...,single-quoted) or SCALAR(...,double-quoted) tokens.
1851  */
1852 
1853 static int
yaml_parser_fetch_flow_scalar(yaml_parser_t * parser,int single)1854 yaml_parser_fetch_flow_scalar(yaml_parser_t *parser, int single)
1855 {
1856     yaml_token_t token;
1857 
1858     /* A plain scalar could be a simple key. */
1859 
1860     if (!yaml_parser_save_simple_key(parser))
1861         return 0;
1862 
1863     /* A simple key cannot follow a flow scalar. */
1864 
1865     parser->simple_key_allowed = 0;
1866 
1867     /* Create the SCALAR token and append it to the queue. */
1868 
1869     if (!yaml_parser_scan_flow_scalar(parser, &token, single))
1870         return 0;
1871 
1872     if (!ENQUEUE(parser, parser->tokens, token)) {
1873         yaml_token_delete(&token);
1874         return 0;
1875     }
1876 
1877     return 1;
1878 }
1879 
1880 /*
1881  * Produce the SCALAR(...,plain) token.
1882  */
1883 
1884 static int
yaml_parser_fetch_plain_scalar(yaml_parser_t * parser)1885 yaml_parser_fetch_plain_scalar(yaml_parser_t *parser)
1886 {
1887     yaml_token_t token;
1888 
1889     /* A plain scalar could be a simple key. */
1890 
1891     if (!yaml_parser_save_simple_key(parser))
1892         return 0;
1893 
1894     /* A simple key cannot follow a flow scalar. */
1895 
1896     parser->simple_key_allowed = 0;
1897 
1898     /* Create the SCALAR token and append it to the queue. */
1899 
1900     if (!yaml_parser_scan_plain_scalar(parser, &token))
1901         return 0;
1902 
1903     if (!ENQUEUE(parser, parser->tokens, token)) {
1904         yaml_token_delete(&token);
1905         return 0;
1906     }
1907 
1908     return 1;
1909 }
1910 
1911 /*
1912  * Eat whitespaces and comments until the next token is found.
1913  */
1914 
1915 static int
yaml_parser_scan_to_next_token(yaml_parser_t * parser)1916 yaml_parser_scan_to_next_token(yaml_parser_t *parser)
1917 {
1918     /* Until the next token is not found. */
1919 
1920     while (1)
1921     {
1922         /* Allow the BOM mark to start a line. */
1923 
1924         if (!CACHE(parser, 1)) return 0;
1925 
1926         if (parser->mark.column == 0 && IS_BOM(parser->buffer))
1927             SKIP(parser);
1928 
1929         /*
1930          * Eat whitespaces.
1931          *
1932          * Tabs are allowed:
1933          *
1934          *  - in the flow context;
1935          *  - in the block context, but not at the beginning of the line or
1936          *  after '-', '?', or ':' (complex value).
1937          */
1938 
1939         if (!CACHE(parser, 1)) return 0;
1940 
1941         while (CHECK(parser->buffer,' ') ||
1942                 ((parser->flow_level || !parser->simple_key_allowed) &&
1943                  CHECK(parser->buffer, '\t'))) {
1944             SKIP(parser);
1945             if (!CACHE(parser, 1)) return 0;
1946         }
1947 
1948         /* Eat a comment until a line break. */
1949 
1950         if (CHECK(parser->buffer, '#')) {
1951             while (!IS_BREAKZ(parser->buffer)) {
1952                 SKIP(parser);
1953                 if (!CACHE(parser, 1)) return 0;
1954             }
1955         }
1956 
1957         /* If it is a line break, eat it. */
1958 
1959         if (IS_BREAK(parser->buffer))
1960         {
1961             if (!CACHE(parser, 2)) return 0;
1962             SKIP_LINE(parser);
1963 
1964             /* In the block context, a new line may start a simple key. */
1965 
1966             if (!parser->flow_level) parser->simple_key_allowed = 1;
1967         }
1968         else
1969         {
1970             /* We have found a token. */
1971 
1972             break;
1973         }
1974     }
1975 
1976     return 1;
1977 }
1978 
1979 /*
1980  * Scan a YAML-DIRECTIVE or TAG-DIRECTIVE token.
1981  *
1982  * Scope:
1983  *      %YAML    1.1    # a comment \n
1984  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1985  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
1986  *      ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
1987  */
1988 
1989 int
yaml_parser_scan_directive(yaml_parser_t * parser,yaml_token_t * token)1990 yaml_parser_scan_directive(yaml_parser_t *parser, yaml_token_t *token)
1991 {
1992     yaml_mark_t start_mark, end_mark;
1993     yaml_char_t *name = NULL;
1994     int major, minor;
1995     yaml_char_t *handle = NULL, *prefix = NULL;
1996 
1997     /* Eat '%'. */
1998 
1999     start_mark = parser->mark;
2000 
2001     SKIP(parser);
2002 
2003     /* Scan the directive name. */
2004 
2005     if (!yaml_parser_scan_directive_name(parser, start_mark, &name))
2006         goto error;
2007 
2008     /* Is it a YAML directive? */
2009 
2010     if (strcmp((char *)name, "YAML") == 0)
2011     {
2012         /* Scan the VERSION directive value. */
2013 
2014         if (!yaml_parser_scan_version_directive_value(parser, start_mark,
2015                     &major, &minor))
2016             goto error;
2017 
2018         end_mark = parser->mark;
2019 
2020         /* Create a VERSION-DIRECTIVE token. */
2021 
2022         VERSION_DIRECTIVE_TOKEN_INIT(*token, major, minor,
2023                 start_mark, end_mark);
2024     }
2025 
2026     /* Is it a TAG directive? */
2027 
2028     else if (strcmp((char *)name, "TAG") == 0)
2029     {
2030         /* Scan the TAG directive value. */
2031 
2032         if (!yaml_parser_scan_tag_directive_value(parser, start_mark,
2033                     &handle, &prefix))
2034             goto error;
2035 
2036         end_mark = parser->mark;
2037 
2038         /* Create a TAG-DIRECTIVE token. */
2039 
2040         TAG_DIRECTIVE_TOKEN_INIT(*token, handle, prefix,
2041                 start_mark, end_mark);
2042     }
2043 
2044     /* Unknown directive. */
2045 
2046     else
2047     {
2048         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2049                 start_mark, "found unknown directive name");
2050         goto error;
2051     }
2052 
2053     /* Eat the rest of the line including any comments. */
2054 
2055     if (!CACHE(parser, 1)) goto error;
2056 
2057     while (IS_BLANK(parser->buffer)) {
2058         SKIP(parser);
2059         if (!CACHE(parser, 1)) goto error;
2060     }
2061 
2062     if (CHECK(parser->buffer, '#')) {
2063         while (!IS_BREAKZ(parser->buffer)) {
2064             SKIP(parser);
2065             if (!CACHE(parser, 1)) goto error;
2066         }
2067     }
2068 
2069     /* Check if we are at the end of the line. */
2070 
2071     if (!IS_BREAKZ(parser->buffer)) {
2072         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2073                 start_mark, "did not find expected comment or line break");
2074         goto error;
2075     }
2076 
2077     /* Eat a line break. */
2078 
2079     if (IS_BREAK(parser->buffer)) {
2080         if (!CACHE(parser, 2)) goto error;
2081         SKIP_LINE(parser);
2082     }
2083 
2084     yaml_free(name);
2085 
2086     return 1;
2087 
2088 error:
2089     yaml_free(prefix);
2090     yaml_free(handle);
2091     yaml_free(name);
2092     return 0;
2093 }
2094 
2095 /*
2096  * Scan the directive name.
2097  *
2098  * Scope:
2099  *      %YAML   1.1     # a comment \n
2100  *       ^^^^
2101  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2102  *       ^^^
2103  */
2104 
2105 static int
yaml_parser_scan_directive_name(yaml_parser_t * parser,yaml_mark_t start_mark,yaml_char_t ** name)2106 yaml_parser_scan_directive_name(yaml_parser_t *parser,
2107         yaml_mark_t start_mark, yaml_char_t **name)
2108 {
2109     yaml_string_t string = NULL_STRING;
2110 
2111     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2112 
2113     /* Consume the directive name. */
2114 
2115     if (!CACHE(parser, 1)) goto error;
2116 
2117     while (IS_ALPHA(parser->buffer))
2118     {
2119         if (!READ(parser, string)) goto error;
2120         if (!CACHE(parser, 1)) goto error;
2121     }
2122 
2123     /* Check if the name is empty. */
2124 
2125     if (string.start == string.pointer) {
2126         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2127                 start_mark, "could not find expected directive name");
2128         goto error;
2129     }
2130 
2131     /* Check for an blank character after the name. */
2132 
2133     if (!IS_BLANKZ(parser->buffer)) {
2134         yaml_parser_set_scanner_error(parser, "while scanning a directive",
2135                 start_mark, "found unexpected non-alphabetical character");
2136         goto error;
2137     }
2138 
2139     *name = string.start;
2140 
2141     return 1;
2142 
2143 error:
2144     STRING_DEL(parser, string);
2145     return 0;
2146 }
2147 
2148 /*
2149  * Scan the value of VERSION-DIRECTIVE.
2150  *
2151  * Scope:
2152  *      %YAML   1.1     # a comment \n
2153  *           ^^^^^^
2154  */
2155 
2156 static int
yaml_parser_scan_version_directive_value(yaml_parser_t * parser,yaml_mark_t start_mark,int * major,int * minor)2157 yaml_parser_scan_version_directive_value(yaml_parser_t *parser,
2158         yaml_mark_t start_mark, int *major, int *minor)
2159 {
2160     /* Eat whitespaces. */
2161 
2162     if (!CACHE(parser, 1)) return 0;
2163 
2164     while (IS_BLANK(parser->buffer)) {
2165         SKIP(parser);
2166         if (!CACHE(parser, 1)) return 0;
2167     }
2168 
2169     /* Consume the major version number. */
2170 
2171     if (!yaml_parser_scan_version_directive_number(parser, start_mark, major))
2172         return 0;
2173 
2174     /* Eat '.'. */
2175 
2176     if (!CHECK(parser->buffer, '.')) {
2177         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2178                 start_mark, "did not find expected digit or '.' character");
2179     }
2180 
2181     SKIP(parser);
2182 
2183     /* Consume the minor version number. */
2184 
2185     if (!yaml_parser_scan_version_directive_number(parser, start_mark, minor))
2186         return 0;
2187 
2188     return 1;
2189 }
2190 
2191 #define MAX_NUMBER_LENGTH   9
2192 
2193 /*
2194  * Scan the version number of VERSION-DIRECTIVE.
2195  *
2196  * Scope:
2197  *      %YAML   1.1     # a comment \n
2198  *              ^
2199  *      %YAML   1.1     # a comment \n
2200  *                ^
2201  */
2202 
2203 static int
yaml_parser_scan_version_directive_number(yaml_parser_t * parser,yaml_mark_t start_mark,int * number)2204 yaml_parser_scan_version_directive_number(yaml_parser_t *parser,
2205         yaml_mark_t start_mark, int *number)
2206 {
2207     int value = 0;
2208     size_t length = 0;
2209 
2210     /* Repeat while the next character is digit. */
2211 
2212     if (!CACHE(parser, 1)) return 0;
2213 
2214     while (IS_DIGIT(parser->buffer))
2215     {
2216         /* Check if the number is too long. */
2217 
2218         if (++length > MAX_NUMBER_LENGTH) {
2219             return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2220                     start_mark, "found extremely long version number");
2221         }
2222 
2223         value = value*10 + AS_DIGIT(parser->buffer);
2224 
2225         SKIP(parser);
2226 
2227         if (!CACHE(parser, 1)) return 0;
2228     }
2229 
2230     /* Check if the number was present. */
2231 
2232     if (!length) {
2233         return yaml_parser_set_scanner_error(parser, "while scanning a %YAML directive",
2234                 start_mark, "did not find expected version number");
2235     }
2236 
2237     *number = value;
2238 
2239     return 1;
2240 }
2241 
2242 /*
2243  * Scan the value of a TAG-DIRECTIVE token.
2244  *
2245  * Scope:
2246  *      %TAG    !yaml!  tag:yaml.org,2002:  \n
2247  *          ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
2248  */
2249 
2250 static int
yaml_parser_scan_tag_directive_value(yaml_parser_t * parser,yaml_mark_t start_mark,yaml_char_t ** handle,yaml_char_t ** prefix)2251 yaml_parser_scan_tag_directive_value(yaml_parser_t *parser,
2252         yaml_mark_t start_mark, yaml_char_t **handle, yaml_char_t **prefix)
2253 {
2254     yaml_char_t *handle_value = NULL;
2255     yaml_char_t *prefix_value = NULL;
2256 
2257     /* Eat whitespaces. */
2258 
2259     if (!CACHE(parser, 1)) goto error;
2260 
2261     while (IS_BLANK(parser->buffer)) {
2262         SKIP(parser);
2263         if (!CACHE(parser, 1)) goto error;
2264     }
2265 
2266     /* Scan a handle. */
2267 
2268     if (!yaml_parser_scan_tag_handle(parser, 1, start_mark, &handle_value))
2269         goto error;
2270 
2271     /* Expect a whitespace. */
2272 
2273     if (!CACHE(parser, 1)) goto error;
2274 
2275     if (!IS_BLANK(parser->buffer)) {
2276         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2277                 start_mark, "did not find expected whitespace");
2278         goto error;
2279     }
2280 
2281     /* Eat whitespaces. */
2282 
2283     while (IS_BLANK(parser->buffer)) {
2284         SKIP(parser);
2285         if (!CACHE(parser, 1)) goto error;
2286     }
2287 
2288     /* Scan a prefix. */
2289 
2290     if (!yaml_parser_scan_tag_uri(parser, 1, 1, NULL, start_mark, &prefix_value))
2291         goto error;
2292 
2293     /* Expect a whitespace or line break. */
2294 
2295     if (!CACHE(parser, 1)) goto error;
2296 
2297     if (!IS_BLANKZ(parser->buffer)) {
2298         yaml_parser_set_scanner_error(parser, "while scanning a %TAG directive",
2299                 start_mark, "did not find expected whitespace or line break");
2300         goto error;
2301     }
2302 
2303     *handle = handle_value;
2304     *prefix = prefix_value;
2305 
2306     return 1;
2307 
2308 error:
2309     yaml_free(handle_value);
2310     yaml_free(prefix_value);
2311     return 0;
2312 }
2313 
2314 static int
yaml_parser_scan_anchor(yaml_parser_t * parser,yaml_token_t * token,yaml_token_type_t type)2315 yaml_parser_scan_anchor(yaml_parser_t *parser, yaml_token_t *token,
2316         yaml_token_type_t type)
2317 {
2318     int length = 0;
2319     yaml_mark_t start_mark, end_mark;
2320     yaml_string_t string = NULL_STRING;
2321 
2322     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2323 
2324     /* Eat the indicator character. */
2325 
2326     start_mark = parser->mark;
2327 
2328     SKIP(parser);
2329 
2330     /* Consume the value. */
2331 
2332     if (!CACHE(parser, 1)) goto error;
2333 
2334     while (IS_ALPHA(parser->buffer)) {
2335         if (!READ(parser, string)) goto error;
2336         if (!CACHE(parser, 1)) goto error;
2337         length ++;
2338     }
2339 
2340     end_mark = parser->mark;
2341 
2342     /*
2343      * Check if length of the anchor is greater than 0 and it is followed by
2344      * a whitespace character or one of the indicators:
2345      *
2346      *      '?', ':', ',', ']', '}', '%', '@', '`'.
2347      */
2348 
2349     if (!length || !(IS_BLANKZ(parser->buffer) || CHECK(parser->buffer, '?')
2350                 || CHECK(parser->buffer, ':') || CHECK(parser->buffer, ',')
2351                 || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '}')
2352                 || CHECK(parser->buffer, '%') || CHECK(parser->buffer, '@')
2353                 || CHECK(parser->buffer, '`'))) {
2354         yaml_parser_set_scanner_error(parser, type == YAML_ANCHOR_TOKEN ?
2355                 "while scanning an anchor" : "while scanning an alias", start_mark,
2356                 "did not find expected alphabetic or numeric character");
2357         goto error;
2358     }
2359 
2360     /* Create a token. */
2361 
2362     if (type == YAML_ANCHOR_TOKEN) ANCHOR_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2363     else {
2364         ALIAS_TOKEN_INIT(*token, string.start, start_mark, end_mark);
2365     }
2366 
2367     return 1;
2368 
2369 error:
2370     STRING_DEL(parser, string);
2371     return 0;
2372 }
2373 
2374 /*
2375  * Scan a TAG token.
2376  */
2377 
2378 static int
yaml_parser_scan_tag(yaml_parser_t * parser,yaml_token_t * token)2379 yaml_parser_scan_tag(yaml_parser_t *parser, yaml_token_t *token)
2380 {
2381     yaml_char_t *handle = NULL;
2382     yaml_char_t *suffix = NULL;
2383     yaml_mark_t start_mark, end_mark;
2384 
2385     start_mark = parser->mark;
2386 
2387     /* Check if the tag is in the canonical form. */
2388 
2389     if (!CACHE(parser, 2)) goto error;
2390 
2391     if (CHECK_AT(parser->buffer, '<', 1))
2392     {
2393         /* Set the handle to '' */
2394 
2395         handle = YAML_MALLOC(1);
2396         if (!handle) goto error;
2397         handle[0] = '\0';
2398 
2399         /* Eat '!<' */
2400 
2401         SKIP(parser);
2402         SKIP(parser);
2403 
2404         /* Consume the tag value. */
2405 
2406         if (!yaml_parser_scan_tag_uri(parser, 1, 0, NULL, start_mark, &suffix))
2407             goto error;
2408 
2409         /* Check for '>' and eat it. */
2410 
2411         if (!CHECK(parser->buffer, '>')) {
2412             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2413                     start_mark, "did not find the expected '>'");
2414             goto error;
2415         }
2416 
2417         SKIP(parser);
2418     }
2419     else
2420     {
2421         /* The tag has either the '!suffix' or the '!handle!suffix' form. */
2422 
2423         /* First, try to scan a handle. */
2424 
2425         if (!yaml_parser_scan_tag_handle(parser, 0, start_mark, &handle))
2426             goto error;
2427 
2428         /* Check if it is, indeed, handle. */
2429 
2430         if (handle[0] == '!' && handle[1] != '\0' && handle[strlen((char *)handle)-1] == '!')
2431         {
2432             /* Scan the suffix now. */
2433 
2434             if (!yaml_parser_scan_tag_uri(parser, 0, 0, NULL, start_mark, &suffix))
2435                 goto error;
2436         }
2437         else
2438         {
2439             /* It wasn't a handle after all.  Scan the rest of the tag. */
2440 
2441             if (!yaml_parser_scan_tag_uri(parser, 0, 0, handle, start_mark, &suffix))
2442                 goto error;
2443 
2444             /* Set the handle to '!'. */
2445 
2446             yaml_free(handle);
2447             handle = YAML_MALLOC(2);
2448             if (!handle) goto error;
2449             handle[0] = '!';
2450             handle[1] = '\0';
2451 
2452             /*
2453              * A special case: the '!' tag.  Set the handle to '' and the
2454              * suffix to '!'.
2455              */
2456 
2457             if (suffix[0] == '\0') {
2458                 yaml_char_t *tmp = handle;
2459                 handle = suffix;
2460                 suffix = tmp;
2461             }
2462         }
2463     }
2464 
2465     /* Check the character which ends the tag. */
2466 
2467     if (!CACHE(parser, 1)) goto error;
2468 
2469     if (!IS_BLANKZ(parser->buffer)) {
2470         if (!parser->flow_level || !CHECK(parser->buffer, ',') ) {
2471             yaml_parser_set_scanner_error(parser, "while scanning a tag",
2472                     start_mark, "did not find expected whitespace or line break");
2473             goto error;
2474         }
2475     }
2476 
2477     end_mark = parser->mark;
2478 
2479     /* Create a token. */
2480 
2481     TAG_TOKEN_INIT(*token, handle, suffix, start_mark, end_mark);
2482 
2483     return 1;
2484 
2485 error:
2486     yaml_free(handle);
2487     yaml_free(suffix);
2488     return 0;
2489 }
2490 
2491 /*
2492  * Scan a tag handle.
2493  */
2494 
2495 static int
yaml_parser_scan_tag_handle(yaml_parser_t * parser,int directive,yaml_mark_t start_mark,yaml_char_t ** handle)2496 yaml_parser_scan_tag_handle(yaml_parser_t *parser, int directive,
2497         yaml_mark_t start_mark, yaml_char_t **handle)
2498 {
2499     yaml_string_t string = NULL_STRING;
2500 
2501     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2502 
2503     /* Check the initial '!' character. */
2504 
2505     if (!CACHE(parser, 1)) goto error;
2506 
2507     if (!CHECK(parser->buffer, '!')) {
2508         yaml_parser_set_scanner_error(parser, directive ?
2509                 "while scanning a tag directive" : "while scanning a tag",
2510                 start_mark, "did not find expected '!'");
2511         goto error;
2512     }
2513 
2514     /* Copy the '!' character. */
2515 
2516     if (!READ(parser, string)) goto error;
2517 
2518     /* Copy all subsequent alphabetical and numerical characters. */
2519 
2520     if (!CACHE(parser, 1)) goto error;
2521 
2522     while (IS_ALPHA(parser->buffer))
2523     {
2524         if (!READ(parser, string)) goto error;
2525         if (!CACHE(parser, 1)) goto error;
2526     }
2527 
2528     /* Check if the trailing character is '!' and copy it. */
2529 
2530     if (CHECK(parser->buffer, '!'))
2531     {
2532         if (!READ(parser, string)) goto error;
2533     }
2534     else
2535     {
2536         /*
2537          * It's either the '!' tag or not really a tag handle.  If it's a %TAG
2538          * directive, it's an error.  If it's a tag token, it must be a part of
2539          * URI.
2540          */
2541 
2542         if (directive && !(string.start[0] == '!' && string.start[1] == '\0')) {
2543             yaml_parser_set_scanner_error(parser, "while parsing a tag directive",
2544                     start_mark, "did not find expected '!'");
2545             goto error;
2546         }
2547     }
2548 
2549     *handle = string.start;
2550 
2551     return 1;
2552 
2553 error:
2554     STRING_DEL(parser, string);
2555     return 0;
2556 }
2557 
2558 /*
2559  * Scan a tag.
2560  */
2561 
2562 static int
yaml_parser_scan_tag_uri(yaml_parser_t * parser,int uri_char,int directive,yaml_char_t * head,yaml_mark_t start_mark,yaml_char_t ** uri)2563 yaml_parser_scan_tag_uri(yaml_parser_t *parser, int uri_char, int directive,
2564         yaml_char_t *head, yaml_mark_t start_mark, yaml_char_t **uri)
2565 {
2566     size_t length = head ? strlen((char *)head) : 0;
2567     yaml_string_t string = NULL_STRING;
2568 
2569     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2570 
2571     /* Resize the string to include the head. */
2572 
2573     while ((size_t)(string.end - string.start) <= length) {
2574         if (!yaml_string_extend(&string.start, &string.pointer, &string.end)) {
2575             parser->error = YAML_MEMORY_ERROR;
2576             goto error;
2577         }
2578     }
2579 
2580     /*
2581      * Copy the head if needed.
2582      *
2583      * Note that we don't copy the leading '!' character.
2584      */
2585 
2586     if (length > 1) {
2587         memcpy(string.start, head+1, length-1);
2588         string.pointer += length-1;
2589     }
2590 
2591     /* Scan the tag. */
2592 
2593     if (!CACHE(parser, 1)) goto error;
2594 
2595     /*
2596      * The set of characters that may appear in URI is as follows:
2597      *
2598      *      '0'-'9', 'A'-'Z', 'a'-'z', '_', '-', ';', '/', '?', ':', '@', '&',
2599      *      '=', '+', '$', '.', '!', '~', '*', '\'', '(', ')', '%'.
2600      *
2601      * If we are inside a verbatim tag <...> (parameter uri_char is true)
2602      * then also the following flow indicators are allowed:
2603      *      ',', '[', ']'
2604      */
2605 
2606     while (IS_ALPHA(parser->buffer) || CHECK(parser->buffer, ';')
2607             || CHECK(parser->buffer, '/') || CHECK(parser->buffer, '?')
2608             || CHECK(parser->buffer, ':') || CHECK(parser->buffer, '@')
2609             || CHECK(parser->buffer, '&') || CHECK(parser->buffer, '=')
2610             || CHECK(parser->buffer, '+') || CHECK(parser->buffer, '$')
2611             || CHECK(parser->buffer, '.') || CHECK(parser->buffer, '%')
2612             || CHECK(parser->buffer, '!') || CHECK(parser->buffer, '~')
2613             || CHECK(parser->buffer, '*') || CHECK(parser->buffer, '\'')
2614             || CHECK(parser->buffer, '(') || CHECK(parser->buffer, ')')
2615             || (uri_char && (
2616                 CHECK(parser->buffer, ',')
2617                 || CHECK(parser->buffer, '[') || CHECK(parser->buffer, ']')
2618                 )
2619             ))
2620     {
2621         /* Check if it is a URI-escape sequence. */
2622 
2623         if (CHECK(parser->buffer, '%')) {
2624             if (!STRING_EXTEND(parser, string))
2625                 goto error;
2626 
2627             if (!yaml_parser_scan_uri_escapes(parser,
2628                         directive, start_mark, &string)) goto error;
2629         }
2630         else {
2631             if (!READ(parser, string)) goto error;
2632         }
2633 
2634         length ++;
2635         if (!CACHE(parser, 1)) goto error;
2636     }
2637 
2638     /* Check if the tag is non-empty. */
2639 
2640     if (!length) {
2641         if (!STRING_EXTEND(parser, string))
2642             goto error;
2643 
2644         yaml_parser_set_scanner_error(parser, directive ?
2645                 "while parsing a %TAG directive" : "while parsing a tag",
2646                 start_mark, "did not find expected tag URI");
2647         goto error;
2648     }
2649 
2650     *uri = string.start;
2651 
2652     return 1;
2653 
2654 error:
2655     STRING_DEL(parser, string);
2656     return 0;
2657 }
2658 
2659 /*
2660  * Decode an URI-escape sequence corresponding to a single UTF-8 character.
2661  */
2662 
2663 static int
yaml_parser_scan_uri_escapes(yaml_parser_t * parser,int directive,yaml_mark_t start_mark,yaml_string_t * string)2664 yaml_parser_scan_uri_escapes(yaml_parser_t *parser, int directive,
2665         yaml_mark_t start_mark, yaml_string_t *string)
2666 {
2667     int width = 0;
2668 
2669     /* Decode the required number of characters. */
2670 
2671     do {
2672 
2673         unsigned char octet = 0;
2674 
2675         /* Check for a URI-escaped octet. */
2676 
2677         if (!CACHE(parser, 3)) return 0;
2678 
2679         if (!(CHECK(parser->buffer, '%')
2680                     && IS_HEX_AT(parser->buffer, 1)
2681                     && IS_HEX_AT(parser->buffer, 2))) {
2682             return yaml_parser_set_scanner_error(parser, directive ?
2683                     "while parsing a %TAG directive" : "while parsing a tag",
2684                     start_mark, "did not find URI escaped octet");
2685         }
2686 
2687         /* Get the octet. */
2688 
2689         octet = (AS_HEX_AT(parser->buffer, 1) << 4) + AS_HEX_AT(parser->buffer, 2);
2690 
2691         /* If it is the leading octet, determine the length of the UTF-8 sequence. */
2692 
2693         if (!width)
2694         {
2695             width = (octet & 0x80) == 0x00 ? 1 :
2696                     (octet & 0xE0) == 0xC0 ? 2 :
2697                     (octet & 0xF0) == 0xE0 ? 3 :
2698                     (octet & 0xF8) == 0xF0 ? 4 : 0;
2699             if (!width) {
2700                 return yaml_parser_set_scanner_error(parser, directive ?
2701                         "while parsing a %TAG directive" : "while parsing a tag",
2702                         start_mark, "found an incorrect leading UTF-8 octet");
2703             }
2704         }
2705         else
2706         {
2707             /* Check if the trailing octet is correct. */
2708 
2709             if ((octet & 0xC0) != 0x80) {
2710                 return yaml_parser_set_scanner_error(parser, directive ?
2711                         "while parsing a %TAG directive" : "while parsing a tag",
2712                         start_mark, "found an incorrect trailing UTF-8 octet");
2713             }
2714         }
2715 
2716         /* Copy the octet and move the pointers. */
2717 
2718         *(string->pointer++) = octet;
2719         SKIP(parser);
2720         SKIP(parser);
2721         SKIP(parser);
2722 
2723     } while (--width);
2724 
2725     return 1;
2726 }
2727 
2728 /*
2729  * Scan a block scalar.
2730  */
2731 
2732 static int
yaml_parser_scan_block_scalar(yaml_parser_t * parser,yaml_token_t * token,int literal)2733 yaml_parser_scan_block_scalar(yaml_parser_t *parser, yaml_token_t *token,
2734         int literal)
2735 {
2736     yaml_mark_t start_mark;
2737     yaml_mark_t end_mark;
2738     yaml_string_t string = NULL_STRING;
2739     yaml_string_t leading_break = NULL_STRING;
2740     yaml_string_t trailing_breaks = NULL_STRING;
2741     int chomping = 0;
2742     int increment = 0;
2743     int indent = 0;
2744     int leading_blank = 0;
2745     int trailing_blank = 0;
2746 
2747     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
2748     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
2749     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
2750 
2751     /* Eat the indicator '|' or '>'. */
2752 
2753     start_mark = parser->mark;
2754 
2755     SKIP(parser);
2756 
2757     /* Scan the additional block scalar indicators. */
2758 
2759     if (!CACHE(parser, 1)) goto error;
2760 
2761     /* Check for a chomping indicator. */
2762 
2763     if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-'))
2764     {
2765         /* Set the chomping method and eat the indicator. */
2766 
2767         chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2768 
2769         SKIP(parser);
2770 
2771         /* Check for an indentation indicator. */
2772 
2773         if (!CACHE(parser, 1)) goto error;
2774 
2775         if (IS_DIGIT(parser->buffer))
2776         {
2777             /* Check that the indentation is greater than 0. */
2778 
2779             if (CHECK(parser->buffer, '0')) {
2780                 yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2781                         start_mark, "found an indentation indicator equal to 0");
2782                 goto error;
2783             }
2784 
2785             /* Get the indentation level and eat the indicator. */
2786 
2787             increment = AS_DIGIT(parser->buffer);
2788 
2789             SKIP(parser);
2790         }
2791     }
2792 
2793     /* Do the same as above, but in the opposite order. */
2794 
2795     else if (IS_DIGIT(parser->buffer))
2796     {
2797         if (CHECK(parser->buffer, '0')) {
2798             yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2799                     start_mark, "found an indentation indicator equal to 0");
2800             goto error;
2801         }
2802 
2803         increment = AS_DIGIT(parser->buffer);
2804 
2805         SKIP(parser);
2806 
2807         if (!CACHE(parser, 1)) goto error;
2808 
2809         if (CHECK(parser->buffer, '+') || CHECK(parser->buffer, '-')) {
2810             chomping = CHECK(parser->buffer, '+') ? +1 : -1;
2811 
2812             SKIP(parser);
2813         }
2814     }
2815 
2816     /* Eat whitespaces and comments to the end of the line. */
2817 
2818     if (!CACHE(parser, 1)) goto error;
2819 
2820     while (IS_BLANK(parser->buffer)) {
2821         SKIP(parser);
2822         if (!CACHE(parser, 1)) goto error;
2823     }
2824 
2825     if (CHECK(parser->buffer, '#')) {
2826         while (!IS_BREAKZ(parser->buffer)) {
2827             SKIP(parser);
2828             if (!CACHE(parser, 1)) goto error;
2829         }
2830     }
2831 
2832     /* Check if we are at the end of the line. */
2833 
2834     if (!IS_BREAKZ(parser->buffer)) {
2835         yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2836                 start_mark, "did not find expected comment or line break");
2837         goto error;
2838     }
2839 
2840     /* Eat a line break. */
2841 
2842     if (IS_BREAK(parser->buffer)) {
2843         if (!CACHE(parser, 2)) goto error;
2844         SKIP_LINE(parser);
2845     }
2846 
2847     end_mark = parser->mark;
2848 
2849     /* Set the indentation level if it was specified. */
2850 
2851     if (increment) indent = parser->indent >= 0 ? parser->indent+increment : increment;
2852 
2853     /* Scan the leading line breaks and determine the indentation level if needed. */
2854 
2855     if (!yaml_parser_scan_block_scalar_breaks(parser, &indent, &trailing_breaks,
2856                 start_mark, &end_mark)) goto error;
2857 
2858     /* Scan the block scalar content. */
2859 
2860     if (!CACHE(parser, 1)) goto error;
2861 
2862     while ((int)parser->mark.column == indent && !(IS_Z(parser->buffer)))
2863     {
2864         /*
2865          * We are at the beginning of a non-empty line.
2866          */
2867 
2868         /* Is it a trailing whitespace? */
2869 
2870         trailing_blank = IS_BLANK(parser->buffer);
2871 
2872         /* Check if we need to fold the leading line break. */
2873 
2874         if (!literal && (*leading_break.start == '\n')
2875                 && !leading_blank && !trailing_blank)
2876         {
2877             /* Do we need to join the lines by space? */
2878 
2879             if (*trailing_breaks.start == '\0') {
2880                 if (!STRING_EXTEND(parser, string)) goto error;
2881                 *(string.pointer ++) = ' ';
2882             }
2883 
2884             CLEAR(parser, leading_break);
2885         }
2886         else {
2887             if (!JOIN(parser, string, leading_break)) goto error;
2888             CLEAR(parser, leading_break);
2889         }
2890 
2891         /* Append the remaining line breaks. */
2892 
2893         if (!JOIN(parser, string, trailing_breaks)) goto error;
2894         CLEAR(parser, trailing_breaks);
2895 
2896         /* Is it a leading whitespace? */
2897 
2898         leading_blank = IS_BLANK(parser->buffer);
2899 
2900         /* Consume the current line. */
2901 
2902         while (!IS_BREAKZ(parser->buffer)) {
2903             if (!READ(parser, string)) goto error;
2904             if (!CACHE(parser, 1)) goto error;
2905         }
2906 
2907         /* Consume the line break. */
2908 
2909         if (!CACHE(parser, 2)) goto error;
2910 
2911         if (!READ_LINE(parser, leading_break)) goto error;
2912 
2913         /* Eat the following indentation spaces and line breaks. */
2914 
2915         if (!yaml_parser_scan_block_scalar_breaks(parser,
2916                     &indent, &trailing_breaks, start_mark, &end_mark)) goto error;
2917     }
2918 
2919     /* Chomp the tail. */
2920 
2921     if (chomping != -1) {
2922         if (!JOIN(parser, string, leading_break)) goto error;
2923     }
2924     if (chomping == 1) {
2925         if (!JOIN(parser, string, trailing_breaks)) goto error;
2926     }
2927 
2928     /* Create a token. */
2929 
2930     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
2931             literal ? YAML_LITERAL_SCALAR_STYLE : YAML_FOLDED_SCALAR_STYLE,
2932             start_mark, end_mark);
2933 
2934     STRING_DEL(parser, leading_break);
2935     STRING_DEL(parser, trailing_breaks);
2936 
2937     return 1;
2938 
2939 error:
2940     STRING_DEL(parser, string);
2941     STRING_DEL(parser, leading_break);
2942     STRING_DEL(parser, trailing_breaks);
2943 
2944     return 0;
2945 }
2946 
2947 /*
2948  * Scan indentation spaces and line breaks for a block scalar.  Determine the
2949  * indentation level if needed.
2950  */
2951 
2952 static int
yaml_parser_scan_block_scalar_breaks(yaml_parser_t * parser,int * indent,yaml_string_t * breaks,yaml_mark_t start_mark,yaml_mark_t * end_mark)2953 yaml_parser_scan_block_scalar_breaks(yaml_parser_t *parser,
2954         int *indent, yaml_string_t *breaks,
2955         yaml_mark_t start_mark, yaml_mark_t *end_mark)
2956 {
2957     int max_indent = 0;
2958 
2959     *end_mark = parser->mark;
2960 
2961     /* Eat the indentation spaces and line breaks. */
2962 
2963     while (1)
2964     {
2965         /* Eat the indentation spaces. */
2966 
2967         if (!CACHE(parser, 1)) return 0;
2968 
2969         while ((!*indent || (int)parser->mark.column < *indent)
2970                 && IS_SPACE(parser->buffer)) {
2971             SKIP(parser);
2972             if (!CACHE(parser, 1)) return 0;
2973         }
2974 
2975         if ((int)parser->mark.column > max_indent)
2976             max_indent = (int)parser->mark.column;
2977 
2978         /* Check for a tab character messing the indentation. */
2979 
2980         if ((!*indent || (int)parser->mark.column < *indent)
2981                 && IS_TAB(parser->buffer)) {
2982             return yaml_parser_set_scanner_error(parser, "while scanning a block scalar",
2983                     start_mark, "found a tab character where an indentation space is expected");
2984         }
2985 
2986         /* Have we found a non-empty line? */
2987 
2988         if (!IS_BREAK(parser->buffer)) break;
2989 
2990         /* Consume the line break. */
2991 
2992         if (!CACHE(parser, 2)) return 0;
2993         if (!READ_LINE(parser, *breaks)) return 0;
2994         *end_mark = parser->mark;
2995     }
2996 
2997     /* Determine the indentation level if needed. */
2998 
2999     if (!*indent) {
3000         *indent = max_indent;
3001         if (*indent < parser->indent + 1)
3002             *indent = parser->indent + 1;
3003         if (*indent < 1)
3004             *indent = 1;
3005     }
3006 
3007    return 1;
3008 }
3009 
3010 /*
3011  * Scan a quoted scalar.
3012  */
3013 
3014 static int
yaml_parser_scan_flow_scalar(yaml_parser_t * parser,yaml_token_t * token,int single)3015 yaml_parser_scan_flow_scalar(yaml_parser_t *parser, yaml_token_t *token,
3016         int single)
3017 {
3018     yaml_mark_t start_mark;
3019     yaml_mark_t end_mark;
3020     yaml_string_t string = NULL_STRING;
3021     yaml_string_t leading_break = NULL_STRING;
3022     yaml_string_t trailing_breaks = NULL_STRING;
3023     yaml_string_t whitespaces = NULL_STRING;
3024     int leading_blanks;
3025 
3026     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3027     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3028     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3029     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3030 
3031     /* Eat the left quote. */
3032 
3033     start_mark = parser->mark;
3034 
3035     SKIP(parser);
3036 
3037     /* Consume the content of the quoted scalar. */
3038 
3039     while (1)
3040     {
3041         /* Check that there are no document indicators at the beginning of the line. */
3042 
3043         if (!CACHE(parser, 4)) goto error;
3044 
3045         if (parser->mark.column == 0 &&
3046             ((CHECK_AT(parser->buffer, '-', 0) &&
3047               CHECK_AT(parser->buffer, '-', 1) &&
3048               CHECK_AT(parser->buffer, '-', 2)) ||
3049              (CHECK_AT(parser->buffer, '.', 0) &&
3050               CHECK_AT(parser->buffer, '.', 1) &&
3051               CHECK_AT(parser->buffer, '.', 2))) &&
3052             IS_BLANKZ_AT(parser->buffer, 3))
3053         {
3054             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3055                     start_mark, "found unexpected document indicator");
3056             goto error;
3057         }
3058 
3059         /* Check for EOF. */
3060 
3061         if (IS_Z(parser->buffer)) {
3062             yaml_parser_set_scanner_error(parser, "while scanning a quoted scalar",
3063                     start_mark, "found unexpected end of stream");
3064             goto error;
3065         }
3066 
3067         /* Consume non-blank characters. */
3068 
3069         if (!CACHE(parser, 2)) goto error;
3070 
3071         leading_blanks = 0;
3072 
3073         while (!IS_BLANKZ(parser->buffer))
3074         {
3075             /* Check for an escaped single quote. */
3076 
3077             if (single && CHECK_AT(parser->buffer, '\'', 0)
3078                     && CHECK_AT(parser->buffer, '\'', 1))
3079             {
3080                 if (!STRING_EXTEND(parser, string)) goto error;
3081                 *(string.pointer++) = '\'';
3082                 SKIP(parser);
3083                 SKIP(parser);
3084             }
3085 
3086             /* Check for the right quote. */
3087 
3088             else if (CHECK(parser->buffer, single ? '\'' : '"'))
3089             {
3090                 break;
3091             }
3092 
3093             /* Check for an escaped line break. */
3094 
3095             else if (!single && CHECK(parser->buffer, '\\')
3096                     && IS_BREAK_AT(parser->buffer, 1))
3097             {
3098                 if (!CACHE(parser, 3)) goto error;
3099                 SKIP(parser);
3100                 SKIP_LINE(parser);
3101                 leading_blanks = 1;
3102                 break;
3103             }
3104 
3105             /* Check for an escape sequence. */
3106 
3107             else if (!single && CHECK(parser->buffer, '\\'))
3108             {
3109                 size_t code_length = 0;
3110 
3111                 if (!STRING_EXTEND(parser, string)) goto error;
3112 
3113                 /* Check the escape character. */
3114 
3115                 switch (parser->buffer.pointer[1])
3116                 {
3117                     case '0':
3118                         *(string.pointer++) = '\0';
3119                         break;
3120 
3121                     case 'a':
3122                         *(string.pointer++) = '\x07';
3123                         break;
3124 
3125                     case 'b':
3126                         *(string.pointer++) = '\x08';
3127                         break;
3128 
3129                     case 't':
3130                     case '\t':
3131                         *(string.pointer++) = '\x09';
3132                         break;
3133 
3134                     case 'n':
3135                         *(string.pointer++) = '\x0A';
3136                         break;
3137 
3138                     case 'v':
3139                         *(string.pointer++) = '\x0B';
3140                         break;
3141 
3142                     case 'f':
3143                         *(string.pointer++) = '\x0C';
3144                         break;
3145 
3146                     case 'r':
3147                         *(string.pointer++) = '\x0D';
3148                         break;
3149 
3150                     case 'e':
3151                         *(string.pointer++) = '\x1B';
3152                         break;
3153 
3154                     case ' ':
3155                         *(string.pointer++) = '\x20';
3156                         break;
3157 
3158                     case '"':
3159                         *(string.pointer++) = '"';
3160                         break;
3161 
3162                     case '/':
3163                         *(string.pointer++) = '/';
3164                         break;
3165 
3166                     case '\\':
3167                         *(string.pointer++) = '\\';
3168                         break;
3169 
3170                     case 'N':   /* NEL (#x85) */
3171                         *(string.pointer++) = (unsigned char)'\xC2';
3172                         *(string.pointer++) = (unsigned char)'\x85';
3173                         break;
3174 
3175                     case '_':   /* #xA0 */
3176                         *(string.pointer++) = (unsigned char)'\xC2';
3177                         *(string.pointer++) = (unsigned char) '\xA0';
3178                         break;
3179 
3180                     case 'L':   /* LS (#x2028) */
3181                         *(string.pointer++) = (unsigned char)'\xE2';
3182                         *(string.pointer++) = (unsigned char)'\x80';
3183                         *(string.pointer++) = (unsigned char)'\xA8';
3184                         break;
3185 
3186                     case 'P':   /* PS (#x2029) */
3187                         *(string.pointer++) = (unsigned char)'\xE2';
3188                         *(string.pointer++) = (unsigned char)'\x80';
3189                         *(string.pointer++) = (unsigned char)'\xA9';
3190                         break;
3191 
3192                     case 'x':
3193                         code_length = 2;
3194                         break;
3195 
3196                     case 'u':
3197                         code_length = 4;
3198                         break;
3199 
3200                     case 'U':
3201                         code_length = 8;
3202                         break;
3203 
3204                     default:
3205                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3206                                 start_mark, "found unknown escape character");
3207                         goto error;
3208                 }
3209 
3210                 SKIP(parser);
3211                 SKIP(parser);
3212 
3213                 /* Consume an arbitrary escape code. */
3214 
3215                 if (code_length)
3216                 {
3217                     unsigned int value = 0;
3218                     size_t k;
3219 
3220                     /* Scan the character value. */
3221 
3222                     if (!CACHE(parser, code_length)) goto error;
3223 
3224                     for (k = 0; k < code_length; k ++) {
3225                         if (!IS_HEX_AT(parser->buffer, k)) {
3226                             yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3227                                     start_mark, "did not find expected hexadecimal number");
3228                             goto error;
3229                         }
3230                         value = (value << 4) + AS_HEX_AT(parser->buffer, k);
3231                     }
3232 
3233                     /* Check the value and write the character. */
3234 
3235                     if ((value >= 0xD800 && value <= 0xDFFF) || value > 0x10FFFF) {
3236                         yaml_parser_set_scanner_error(parser, "while parsing a quoted scalar",
3237                                 start_mark, "found invalid Unicode character escape code");
3238                         goto error;
3239                     }
3240 
3241                     if (value <= 0x7F) *(string.pointer++) = value;
3242                     else if (value <= 0x7FF) {
3243                         *(string.pointer++) = 0xC0 + (value >> 6);
3244                         *(string.pointer++) = 0x80 + (value & 0x3F);
3245                     }
3246                     else if (value <= 0xFFFF) {
3247                         *(string.pointer++) = 0xE0 + (value >> 12);
3248                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3249                         *(string.pointer++) = 0x80 + (value & 0x3F);
3250                     }
3251                     else {
3252                         *(string.pointer++) = 0xF0 + (value >> 18);
3253                         *(string.pointer++) = 0x80 + ((value >> 12) & 0x3F);
3254                         *(string.pointer++) = 0x80 + ((value >> 6) & 0x3F);
3255                         *(string.pointer++) = 0x80 + (value & 0x3F);
3256                     }
3257 
3258                     /* Advance the pointer. */
3259 
3260                     for (k = 0; k < code_length; k ++) SKIP(parser);
3261                 }
3262             }
3263 
3264             else
3265             {
3266                 /* It is a non-escaped non-blank character. */
3267 
3268                 if (!READ(parser, string)) goto error;
3269             }
3270 
3271             if (!CACHE(parser, 2)) goto error;
3272         }
3273 
3274         /* Check if we are at the end of the scalar. */
3275 
3276         /* Fix for crash uninitialized value crash
3277          * Credit for the bug and input is to OSS Fuzz
3278          * Credit for the fix to Alex Gaynor
3279          */
3280         if (!CACHE(parser, 1)) goto error;
3281         if (CHECK(parser->buffer, single ? '\'' : '"'))
3282             break;
3283 
3284         /* Consume blank characters. */
3285 
3286         if (!CACHE(parser, 1)) goto error;
3287 
3288         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3289         {
3290             if (IS_BLANK(parser->buffer))
3291             {
3292                 /* Consume a space or a tab character. */
3293 
3294                 if (!leading_blanks) {
3295                     if (!READ(parser, whitespaces)) goto error;
3296                 }
3297                 else {
3298                     SKIP(parser);
3299                 }
3300             }
3301             else
3302             {
3303                 if (!CACHE(parser, 2)) goto error;
3304 
3305                 /* Check if it is a first line break. */
3306 
3307                 if (!leading_blanks)
3308                 {
3309                     CLEAR(parser, whitespaces);
3310                     if (!READ_LINE(parser, leading_break)) goto error;
3311                     leading_blanks = 1;
3312                 }
3313                 else
3314                 {
3315                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3316                 }
3317             }
3318             if (!CACHE(parser, 1)) goto error;
3319         }
3320 
3321         /* Join the whitespaces or fold line breaks. */
3322 
3323         if (leading_blanks)
3324         {
3325             /* Do we need to fold line breaks? */
3326 
3327             if (leading_break.start[0] == '\n') {
3328                 if (trailing_breaks.start[0] == '\0') {
3329                     if (!STRING_EXTEND(parser, string)) goto error;
3330                     *(string.pointer++) = ' ';
3331                 }
3332                 else {
3333                     if (!JOIN(parser, string, trailing_breaks)) goto error;
3334                     CLEAR(parser, trailing_breaks);
3335                 }
3336                 CLEAR(parser, leading_break);
3337             }
3338             else {
3339                 if (!JOIN(parser, string, leading_break)) goto error;
3340                 if (!JOIN(parser, string, trailing_breaks)) goto error;
3341                 CLEAR(parser, leading_break);
3342                 CLEAR(parser, trailing_breaks);
3343             }
3344         }
3345         else
3346         {
3347             if (!JOIN(parser, string, whitespaces)) goto error;
3348             CLEAR(parser, whitespaces);
3349         }
3350     }
3351 
3352     /* Eat the right quote. */
3353 
3354     SKIP(parser);
3355 
3356     end_mark = parser->mark;
3357 
3358     /* Create a token. */
3359 
3360     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3361             single ? YAML_SINGLE_QUOTED_SCALAR_STYLE : YAML_DOUBLE_QUOTED_SCALAR_STYLE,
3362             start_mark, end_mark);
3363 
3364     STRING_DEL(parser, leading_break);
3365     STRING_DEL(parser, trailing_breaks);
3366     STRING_DEL(parser, whitespaces);
3367 
3368     return 1;
3369 
3370 error:
3371     STRING_DEL(parser, string);
3372     STRING_DEL(parser, leading_break);
3373     STRING_DEL(parser, trailing_breaks);
3374     STRING_DEL(parser, whitespaces);
3375 
3376     return 0;
3377 }
3378 
3379 /*
3380  * Scan a plain scalar.
3381  */
3382 
3383 static int
yaml_parser_scan_plain_scalar(yaml_parser_t * parser,yaml_token_t * token)3384 yaml_parser_scan_plain_scalar(yaml_parser_t *parser, yaml_token_t *token)
3385 {
3386     yaml_mark_t start_mark;
3387     yaml_mark_t end_mark;
3388     yaml_string_t string = NULL_STRING;
3389     yaml_string_t leading_break = NULL_STRING;
3390     yaml_string_t trailing_breaks = NULL_STRING;
3391     yaml_string_t whitespaces = NULL_STRING;
3392     int leading_blanks = 0;
3393     int indent = parser->indent+1;
3394 
3395     if (!STRING_INIT(parser, string, INITIAL_STRING_SIZE)) goto error;
3396     if (!STRING_INIT(parser, leading_break, INITIAL_STRING_SIZE)) goto error;
3397     if (!STRING_INIT(parser, trailing_breaks, INITIAL_STRING_SIZE)) goto error;
3398     if (!STRING_INIT(parser, whitespaces, INITIAL_STRING_SIZE)) goto error;
3399 
3400     start_mark = end_mark = parser->mark;
3401 
3402     /* Consume the content of the plain scalar. */
3403 
3404     while (1)
3405     {
3406         /* Check for a document indicator. */
3407 
3408         if (!CACHE(parser, 4)) goto error;
3409 
3410         if (parser->mark.column == 0 &&
3411             ((CHECK_AT(parser->buffer, '-', 0) &&
3412               CHECK_AT(parser->buffer, '-', 1) &&
3413               CHECK_AT(parser->buffer, '-', 2)) ||
3414              (CHECK_AT(parser->buffer, '.', 0) &&
3415               CHECK_AT(parser->buffer, '.', 1) &&
3416               CHECK_AT(parser->buffer, '.', 2))) &&
3417             IS_BLANKZ_AT(parser->buffer, 3)) break;
3418 
3419         /* Check for a comment. */
3420 
3421         if (CHECK(parser->buffer, '#'))
3422             break;
3423 
3424         /* Consume non-blank characters. */
3425 
3426         while (!IS_BLANKZ(parser->buffer))
3427         {
3428             /* Check for "x:" + one of ',?[]{}' in the flow context. TODO: Fix the test "spec-08-13".
3429              * This is not completely according to the spec
3430              * See http://yaml.org/spec/1.1/#id907281 9.1.3. Plain
3431              */
3432 
3433             if (parser->flow_level
3434                     && CHECK(parser->buffer, ':')
3435                     && (
3436                         CHECK_AT(parser->buffer, ',', 1)
3437                         || CHECK_AT(parser->buffer, '?', 1)
3438                         || CHECK_AT(parser->buffer, '[', 1)
3439                         || CHECK_AT(parser->buffer, ']', 1)
3440                         || CHECK_AT(parser->buffer, '{', 1)
3441                         || CHECK_AT(parser->buffer, '}', 1)
3442                     )
3443                     ) {
3444                 yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3445                         start_mark, "found unexpected ':'");
3446                 goto error;
3447             }
3448 
3449             /* Check for indicators that may end a plain scalar. */
3450 
3451             if ((CHECK(parser->buffer, ':') && IS_BLANKZ_AT(parser->buffer, 1))
3452                     || (parser->flow_level &&
3453                         (CHECK(parser->buffer, ',')
3454                          || CHECK(parser->buffer, '[')
3455                          || CHECK(parser->buffer, ']') || CHECK(parser->buffer, '{')
3456                          || CHECK(parser->buffer, '}'))))
3457                 break;
3458 
3459             /* Check if we need to join whitespaces and breaks. */
3460 
3461             if (leading_blanks || whitespaces.start != whitespaces.pointer)
3462             {
3463                 if (leading_blanks)
3464                 {
3465                     /* Do we need to fold line breaks? */
3466 
3467                     if (leading_break.start[0] == '\n') {
3468                         if (trailing_breaks.start[0] == '\0') {
3469                             if (!STRING_EXTEND(parser, string)) goto error;
3470                             *(string.pointer++) = ' ';
3471                         }
3472                         else {
3473                             if (!JOIN(parser, string, trailing_breaks)) goto error;
3474                             CLEAR(parser, trailing_breaks);
3475                         }
3476                         CLEAR(parser, leading_break);
3477                     }
3478                     else {
3479                         if (!JOIN(parser, string, leading_break)) goto error;
3480                         if (!JOIN(parser, string, trailing_breaks)) goto error;
3481                         CLEAR(parser, leading_break);
3482                         CLEAR(parser, trailing_breaks);
3483                     }
3484 
3485                     leading_blanks = 0;
3486                 }
3487                 else
3488                 {
3489                     if (!JOIN(parser, string, whitespaces)) goto error;
3490                     CLEAR(parser, whitespaces);
3491                 }
3492             }
3493 
3494             /* Copy the character. */
3495 
3496             if (!READ(parser, string)) goto error;
3497 
3498             end_mark = parser->mark;
3499 
3500             if (!CACHE(parser, 2)) goto error;
3501         }
3502 
3503         /* Is it the end? */
3504 
3505         if (!(IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer)))
3506             break;
3507 
3508         /* Consume blank characters. */
3509 
3510         if (!CACHE(parser, 1)) goto error;
3511 
3512         while (IS_BLANK(parser->buffer) || IS_BREAK(parser->buffer))
3513         {
3514             if (IS_BLANK(parser->buffer))
3515             {
3516                 /* Check for tab characters that abuse indentation. */
3517 
3518                 if (leading_blanks && (int)parser->mark.column < indent
3519                         && IS_TAB(parser->buffer)) {
3520                     yaml_parser_set_scanner_error(parser, "while scanning a plain scalar",
3521                             start_mark, "found a tab character that violates indentation");
3522                     goto error;
3523                 }
3524 
3525                 /* Consume a space or a tab character. */
3526 
3527                 if (!leading_blanks) {
3528                     if (!READ(parser, whitespaces)) goto error;
3529                 }
3530                 else {
3531                     SKIP(parser);
3532                 }
3533             }
3534             else
3535             {
3536                 if (!CACHE(parser, 2)) goto error;
3537 
3538                 /* Check if it is a first line break. */
3539 
3540                 if (!leading_blanks)
3541                 {
3542                     CLEAR(parser, whitespaces);
3543                     if (!READ_LINE(parser, leading_break)) goto error;
3544                     leading_blanks = 1;
3545                 }
3546                 else
3547                 {
3548                     if (!READ_LINE(parser, trailing_breaks)) goto error;
3549                 }
3550             }
3551             if (!CACHE(parser, 1)) goto error;
3552         }
3553 
3554         /* Check indentation level. */
3555 
3556         if (!parser->flow_level && (int)parser->mark.column < indent)
3557             break;
3558     }
3559 
3560     /* Create a token. */
3561 
3562     SCALAR_TOKEN_INIT(*token, string.start, string.pointer-string.start,
3563             YAML_PLAIN_SCALAR_STYLE, start_mark, end_mark);
3564 
3565     /* Note that we change the 'simple_key_allowed' flag. */
3566 
3567     if (leading_blanks) parser->simple_key_allowed = 1;
3568 
3569     STRING_DEL(parser, leading_break);
3570     STRING_DEL(parser, trailing_breaks);
3571     STRING_DEL(parser, whitespaces);
3572 
3573     return 1;
3574 
3575 error:
3576     STRING_DEL(parser, string);
3577     STRING_DEL(parser, leading_break);
3578     STRING_DEL(parser, trailing_breaks);
3579     STRING_DEL(parser, whitespaces);
3580 
3581     return 0;
3582 }
3583