Github User Fetcher 1.0.0
C Application with Server and GUI
Loading...
Searching...
No Matches
duktape-1.8.0/src-separate/duk_lexer.h
Go to the documentation of this file.
1/*
2 * Lexer defines.
3 */
4
5#ifndef DUK_LEXER_H_INCLUDED
6#define DUK_LEXER_H_INCLUDED
7
8typedef void (*duk_re_range_callback)(void *user, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct);
9
10/*
11 * A token is interpreted as any possible production of InputElementDiv
12 * and InputElementRegExp, see E5 Section 7 in its entirety. Note that
13 * the E5 "Token" production does not cover all actual tokens of the
14 * language (which is explicitly stated in the specification, Section 7.5).
15 * Null and boolean literals are defined as part of both ReservedWord
16 * (E5 Section 7.6.1) and Literal (E5 Section 7.8) productions. Here,
17 * null and boolean values have literal tokens, and are not reserved
18 * words.
19 *
20 * Decimal literal negative/positive sign is -not- part of DUK_TOK_NUMBER.
21 * The number tokens always have a non-negative value. The unary minus
22 * operator in "-1.0" is optimized during compilation to yield a single
23 * negative constant.
24 *
25 * Token numbering is free except that reserved words are required to be
26 * in a continuous range and in a particular order. See genstrings.py.
27 */
28
29#define DUK_LEXER_INITCTX(ctx) duk_lexer_initctx((ctx))
30
31#define DUK_LEXER_SETPOINT(ctx,pt) duk_lexer_setpoint((ctx), (pt))
32
33#define DUK_LEXER_GETPOINT(ctx,pt) do { (pt)->offset = (ctx)->window[0].offset; \
34 (pt)->line = (ctx)->window[0].line; } while (0)
35
36/* currently 6 characters of lookup are actually needed (duk_lexer.c) */
37#define DUK_LEXER_WINDOW_SIZE 6
38#if defined(DUK_USE_LEXER_SLIDING_WINDOW)
39#define DUK_LEXER_BUFFER_SIZE 64
40#endif
41
42#define DUK_TOK_MINVAL 0
43
44/* returned after EOF (infinite amount) */
45#define DUK_TOK_EOF 0
46
47/* identifier names (E5 Section 7.6) */
48#define DUK_TOK_IDENTIFIER 1
49
50/* reserved words: keywords */
51#define DUK_TOK_START_RESERVED 2
52#define DUK_TOK_BREAK 2
53#define DUK_TOK_CASE 3
54#define DUK_TOK_CATCH 4
55#define DUK_TOK_CONTINUE 5
56#define DUK_TOK_DEBUGGER 6
57#define DUK_TOK_DEFAULT 7
58#define DUK_TOK_DELETE 8
59#define DUK_TOK_DO 9
60#define DUK_TOK_ELSE 10
61#define DUK_TOK_FINALLY 11
62#define DUK_TOK_FOR 12
63#define DUK_TOK_FUNCTION 13
64#define DUK_TOK_IF 14
65#define DUK_TOK_IN 15
66#define DUK_TOK_INSTANCEOF 16
67#define DUK_TOK_NEW 17
68#define DUK_TOK_RETURN 18
69#define DUK_TOK_SWITCH 19
70#define DUK_TOK_THIS 20
71#define DUK_TOK_THROW 21
72#define DUK_TOK_TRY 22
73#define DUK_TOK_TYPEOF 23
74#define DUK_TOK_VAR 24
75#define DUK_TOK_CONST 25
76#define DUK_TOK_VOID 26
77#define DUK_TOK_WHILE 27
78#define DUK_TOK_WITH 28
79
80/* reserved words: future reserved words */
81#define DUK_TOK_CLASS 29
82#define DUK_TOK_ENUM 30
83#define DUK_TOK_EXPORT 31
84#define DUK_TOK_EXTENDS 32
85#define DUK_TOK_IMPORT 33
86#define DUK_TOK_SUPER 34
87
88/* "null", "true", and "false" are always reserved words.
89 * Note that "get" and "set" are not!
90 */
91#define DUK_TOK_NULL 35
92#define DUK_TOK_TRUE 36
93#define DUK_TOK_FALSE 37
94
95/* reserved words: additional future reserved words in strict mode */
96#define DUK_TOK_START_STRICT_RESERVED 38 /* inclusive */
97#define DUK_TOK_IMPLEMENTS 38
98#define DUK_TOK_INTERFACE 39
99#define DUK_TOK_LET 40
100#define DUK_TOK_PACKAGE 41
101#define DUK_TOK_PRIVATE 42
102#define DUK_TOK_PROTECTED 43
103#define DUK_TOK_PUBLIC 44
104#define DUK_TOK_STATIC 45
105#define DUK_TOK_YIELD 46
106
107#define DUK_TOK_END_RESERVED 47 /* exclusive */
108
109/* "get" and "set" are tokens but NOT ReservedWords. They are currently
110 * parsed and identifiers and these defines are actually now unused.
111 */
112#define DUK_TOK_GET 47
113#define DUK_TOK_SET 48
114
115/* punctuators (unlike the spec, also includes "/" and "/=") */
116#define DUK_TOK_LCURLY 49
117#define DUK_TOK_RCURLY 50
118#define DUK_TOK_LBRACKET 51
119#define DUK_TOK_RBRACKET 52
120#define DUK_TOK_LPAREN 53
121#define DUK_TOK_RPAREN 54
122#define DUK_TOK_PERIOD 55
123#define DUK_TOK_SEMICOLON 56
124#define DUK_TOK_COMMA 57
125#define DUK_TOK_LT 58
126#define DUK_TOK_GT 59
127#define DUK_TOK_LE 60
128#define DUK_TOK_GE 61
129#define DUK_TOK_EQ 62
130#define DUK_TOK_NEQ 63
131#define DUK_TOK_SEQ 64
132#define DUK_TOK_SNEQ 65
133#define DUK_TOK_ADD 66
134#define DUK_TOK_SUB 67
135#define DUK_TOK_MUL 68
136#define DUK_TOK_DIV 69
137#define DUK_TOK_MOD 70
138#define DUK_TOK_INCREMENT 71
139#define DUK_TOK_DECREMENT 72
140#define DUK_TOK_ALSHIFT 73 /* named "arithmetic" because result is signed */
141#define DUK_TOK_ARSHIFT 74
142#define DUK_TOK_RSHIFT 75
143#define DUK_TOK_BAND 76
144#define DUK_TOK_BOR 77
145#define DUK_TOK_BXOR 78
146#define DUK_TOK_LNOT 79
147#define DUK_TOK_BNOT 80
148#define DUK_TOK_LAND 81
149#define DUK_TOK_LOR 82
150#define DUK_TOK_QUESTION 83
151#define DUK_TOK_COLON 84
152#define DUK_TOK_EQUALSIGN 85
153#define DUK_TOK_ADD_EQ 86
154#define DUK_TOK_SUB_EQ 87
155#define DUK_TOK_MUL_EQ 88
156#define DUK_TOK_DIV_EQ 89
157#define DUK_TOK_MOD_EQ 90
158#define DUK_TOK_ALSHIFT_EQ 91
159#define DUK_TOK_ARSHIFT_EQ 92
160#define DUK_TOK_RSHIFT_EQ 93
161#define DUK_TOK_BAND_EQ 94
162#define DUK_TOK_BOR_EQ 95
163#define DUK_TOK_BXOR_EQ 96
164
165/* literals (E5 Section 7.8), except null, true, false, which are treated
166 * like reserved words (above).
167 */
168#define DUK_TOK_NUMBER 97
169#define DUK_TOK_STRING 98
170#define DUK_TOK_REGEXP 99
171
172#define DUK_TOK_MAXVAL 99 /* inclusive */
173
174/* Convert heap string index to a token (reserved words) */
175#define DUK_STRIDX_TO_TOK(x) ((x) - DUK_STRIDX_START_RESERVED + DUK_TOK_START_RESERVED)
176
177/* Sanity check */
178#if (DUK_TOK_MAXVAL > 255)
179#error DUK_TOK_MAXVAL too large, code assumes it fits into 8 bits
180#endif
181
182/* Sanity checks for string and token defines */
183#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_BREAK) != DUK_TOK_BREAK)
184#error mismatch in token defines
185#endif
186#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CASE) != DUK_TOK_CASE)
187#error mismatch in token defines
188#endif
189#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CATCH) != DUK_TOK_CATCH)
190#error mismatch in token defines
191#endif
192#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONTINUE) != DUK_TOK_CONTINUE)
193#error mismatch in token defines
194#endif
195#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEBUGGER) != DUK_TOK_DEBUGGER)
196#error mismatch in token defines
197#endif
198#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DEFAULT) != DUK_TOK_DEFAULT)
199#error mismatch in token defines
200#endif
201#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DELETE) != DUK_TOK_DELETE)
202#error mismatch in token defines
203#endif
204#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_DO) != DUK_TOK_DO)
205#error mismatch in token defines
206#endif
207#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ELSE) != DUK_TOK_ELSE)
208#error mismatch in token defines
209#endif
210#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FINALLY) != DUK_TOK_FINALLY)
211#error mismatch in token defines
212#endif
213#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FOR) != DUK_TOK_FOR)
214#error mismatch in token defines
215#endif
216#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_FUNCTION) != DUK_TOK_FUNCTION)
217#error mismatch in token defines
218#endif
219#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IF) != DUK_TOK_IF)
220#error mismatch in token defines
221#endif
222#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IN) != DUK_TOK_IN)
223#error mismatch in token defines
224#endif
225#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INSTANCEOF) != DUK_TOK_INSTANCEOF)
226#error mismatch in token defines
227#endif
228#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_NEW) != DUK_TOK_NEW)
229#error mismatch in token defines
230#endif
231#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_RETURN) != DUK_TOK_RETURN)
232#error mismatch in token defines
233#endif
234#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SWITCH) != DUK_TOK_SWITCH)
235#error mismatch in token defines
236#endif
237#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THIS) != DUK_TOK_THIS)
238#error mismatch in token defines
239#endif
240#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_THROW) != DUK_TOK_THROW)
241#error mismatch in token defines
242#endif
243#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRY) != DUK_TOK_TRY)
244#error mismatch in token defines
245#endif
246#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TYPEOF) != DUK_TOK_TYPEOF)
247#error mismatch in token defines
248#endif
249#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VAR) != DUK_TOK_VAR)
250#error mismatch in token defines
251#endif
252#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_VOID) != DUK_TOK_VOID)
253#error mismatch in token defines
254#endif
255#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WHILE) != DUK_TOK_WHILE)
256#error mismatch in token defines
257#endif
258#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_WITH) != DUK_TOK_WITH)
259#error mismatch in token defines
260#endif
261#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CLASS) != DUK_TOK_CLASS)
262#error mismatch in token defines
263#endif
264#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_CONST) != DUK_TOK_CONST)
265#error mismatch in token defines
266#endif
267#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_ENUM) != DUK_TOK_ENUM)
268#error mismatch in token defines
269#endif
270#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXPORT) != DUK_TOK_EXPORT)
271#error mismatch in token defines
272#endif
273#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_EXTENDS) != DUK_TOK_EXTENDS)
274#error mismatch in token defines
275#endif
276#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPORT) != DUK_TOK_IMPORT)
277#error mismatch in token defines
278#endif
279#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_SUPER) != DUK_TOK_SUPER)
280#error mismatch in token defines
281#endif
282#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LC_NULL) != DUK_TOK_NULL)
283#error mismatch in token defines
284#endif
285#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_TRUE) != DUK_TOK_TRUE)
286#error mismatch in token defines
287#endif
288#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_FALSE) != DUK_TOK_FALSE)
289#error mismatch in token defines
290#endif
291#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_IMPLEMENTS) != DUK_TOK_IMPLEMENTS)
292#error mismatch in token defines
293#endif
294#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_INTERFACE) != DUK_TOK_INTERFACE)
295#error mismatch in token defines
296#endif
297#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_LET) != DUK_TOK_LET)
298#error mismatch in token defines
299#endif
300#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PACKAGE) != DUK_TOK_PACKAGE)
301#error mismatch in token defines
302#endif
303#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PRIVATE) != DUK_TOK_PRIVATE)
304#error mismatch in token defines
305#endif
306#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PROTECTED) != DUK_TOK_PROTECTED)
307#error mismatch in token defines
308#endif
309#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_PUBLIC) != DUK_TOK_PUBLIC)
310#error mismatch in token defines
311#endif
312#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_STATIC) != DUK_TOK_STATIC)
313#error mismatch in token defines
314#endif
315#if (DUK_STRIDX_TO_TOK(DUK_STRIDX_YIELD) != DUK_TOK_YIELD)
316#error mismatch in token defines
317#endif
318
319/* Regexp tokens */
320#define DUK_RETOK_EOF 0
321#define DUK_RETOK_DISJUNCTION 1
322#define DUK_RETOK_QUANTIFIER 2
323#define DUK_RETOK_ASSERT_START 3
324#define DUK_RETOK_ASSERT_END 4
325#define DUK_RETOK_ASSERT_WORD_BOUNDARY 5
326#define DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY 6
327#define DUK_RETOK_ASSERT_START_POS_LOOKAHEAD 7
328#define DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD 8
329#define DUK_RETOK_ATOM_PERIOD 9
330#define DUK_RETOK_ATOM_CHAR 10
331#define DUK_RETOK_ATOM_DIGIT 11
332#define DUK_RETOK_ATOM_NOT_DIGIT 12
333#define DUK_RETOK_ATOM_WHITE 13
334#define DUK_RETOK_ATOM_NOT_WHITE 14
335#define DUK_RETOK_ATOM_WORD_CHAR 15
336#define DUK_RETOK_ATOM_NOT_WORD_CHAR 16
337#define DUK_RETOK_ATOM_BACKREFERENCE 17
338#define DUK_RETOK_ATOM_START_CAPTURE_GROUP 18
339#define DUK_RETOK_ATOM_START_NONCAPTURE_GROUP 19
340#define DUK_RETOK_ATOM_START_CHARCLASS 20
341#define DUK_RETOK_ATOM_START_CHARCLASS_INVERTED 21
342#define DUK_RETOK_ATOM_END_GROUP 22
343
344/* Constants for duk_lexer_ctx.buf. */
345#define DUK_LEXER_TEMP_BUF_LIMIT 256
346
347/* A token value. Can be memcpy()'d, but note that slot1/slot2 values are on the valstack.
348 * Some fields (like num, str1, str2) are only valid for specific token types and may have
349 * stale values otherwise.
350 */
351struct duk_token {
352 duk_small_int_t t; /* token type (with reserved word identification) */
353 duk_small_int_t t_nores; /* token type (with reserved words as DUK_TOK_IDENTIFER) */
354 duk_double_t num; /* numeric value of token */
355 duk_hstring *str1; /* string 1 of token (borrowed, stored to ctx->slot1_idx) */
356 duk_hstring *str2; /* string 2 of token (borrowed, stored to ctx->slot2_idx) */
357 duk_size_t start_offset; /* start byte offset of token in lexer input */
358 duk_int_t start_line; /* start line of token (first char) */
359 duk_int_t num_escapes; /* number of escapes and line continuations (for directive prologue) */
360 duk_bool_t lineterm; /* token was preceded by a lineterm */
361 duk_bool_t allow_auto_semi; /* token allows automatic semicolon insertion (eof or preceded by newline) */
362};
363
364#define DUK_RE_QUANTIFIER_INFINITE ((duk_uint32_t) 0xffffffffUL)
365
366/* A regexp token value. */
367struct duk_re_token {
368 duk_small_int_t t; /* token type */
370 duk_uint_fast32_t num; /* numeric value (character, count) */
373};
374
375/* A structure for 'snapshotting' a point for rewinding */
376struct duk_lexer_point {
379};
380
381/* Lexer codepoint with additional info like offset/line number */
382struct duk_lexer_codepoint {
386};
387
388/* Lexer context. Same context is used for Ecmascript and Regexp parsing. */
389struct duk_lexer_ctx {
390#if defined(DUK_USE_LEXER_SLIDING_WINDOW)
391 duk_lexer_codepoint *window; /* unicode code points, window[0] is always next, points to 'buffer' */
393#else
394 duk_lexer_codepoint window[DUK_LEXER_WINDOW_SIZE]; /* unicode code points, window[0] is always next */
395#endif
396
397 duk_hthread *thr; /* thread; minimizes argument passing */
398
399 const duk_uint8_t *input; /* input string (may be a user pointer) */
400 duk_size_t input_length; /* input byte length */
401 duk_size_t input_offset; /* input offset for window leading edge (not window[0]) */
402 duk_int_t input_line; /* input linenumber at input_offset (not window[0]), init to 1 */
403
404 duk_idx_t slot1_idx; /* valstack slot for 1st token value */
405 duk_idx_t slot2_idx; /* valstack slot for 2nd token value */
406 duk_idx_t buf_idx; /* valstack slot for temp buffer */
407 duk_hbuffer_dynamic *buf; /* temp accumulation buffer */
408 duk_bufwriter_ctx bw; /* bufwriter for temp accumulation */
409
410 duk_int_t token_count; /* number of tokens parsed */
411 duk_int_t token_limit; /* maximum token count before error (sanity backstop) */
412};
413
414/*
415 * Prototypes
416 */
417
419
421
424 duk_token *out_token,
425 duk_bool_t strict_mode,
426 duk_bool_t regexp_mode);
427#ifdef DUK_USE_REGEXP_SUPPORT
430#endif /* DUK_USE_REGEXP_SUPPORT */
431
432#endif /* DUK_LEXER_H_INCLUDED */
duk_int_fast32_t duk_int_t
duk_small_int_t duk_bool_t
duk_uint32_t duk_uint_fast32_t
DUK_INTERNAL_DECL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token *out_token)
DUK_INTERNAL_DECL void duk_lexer_parse_re_ranges(duk_lexer_ctx *lex_ctx, duk_re_range_callback gen_range, void *userdata)
DUK_INTERNAL_DECL void duk_lexer_initctx(duk_lexer_ctx *lex_ctx)
DUK_INTERNAL_DECL void duk_lexer_parse_js_input_element(duk_lexer_ctx *lex_ctx, duk_token *out_token, duk_bool_t strict_mode, duk_bool_t regexp_mode)
DUK_INTERNAL_DECL void duk_lexer_setpoint(duk_lexer_ctx *lex_ctx, duk_lexer_point *pt)
void(* duk_re_range_callback)(void *user, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct)
#define DUK_LEXER_BUFFER_SIZE
#define DUK_LEXER_WINDOW_SIZE
duk_lexer_codepoint buffer[DUK_LEXER_BUFFER_SIZE]