Github User Fetcher 1.0.0
C Application with Server and GUI
Loading...
Searching...
No Matches
duk_regexp_compiler.c File Reference
#include "duk_internal.h"

Go to the source code of this file.

Data Structures

struct  duk__re_disjunction_info
 

Macros

#define DUK__RE_INITIAL_BUFSIZE   64
 
#define DUK__RE_BUFLEN(re_ctx)    DUK_BW_GET_SIZE(re_ctx->thr, &re_ctx->bw)
 

Functions

DUK_LOCAL duk_uint32_t duk__encode_i32 (duk_int32_t x)
 
DUK_LOCAL duk_uint32_t duk__insert_u32 (duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_uint32_t x)
 
DUK_LOCAL duk_uint32_t duk__append_u32 (duk_re_compiler_ctx *re_ctx, duk_uint32_t x)
 
DUK_LOCAL duk_uint32_t duk__insert_i32 (duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t x)
 
DUK_LOCAL void duk__append_u16_list (duk_re_compiler_ctx *re_ctx, const duk_uint16_t *values, duk_uint32_t count)
 
DUK_LOCAL void duk__insert_slice (duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_uint32_t data_offset, duk_uint32_t data_length)
 
DUK_LOCAL void duk__append_slice (duk_re_compiler_ctx *re_ctx, duk_uint32_t data_offset, duk_uint32_t data_length)
 
DUK_LOCAL void duk__remove_slice (duk_re_compiler_ctx *re_ctx, duk_uint32_t data_offset, duk_uint32_t data_length)
 
DUK_LOCAL duk_uint32_t duk__insert_jump_offset (duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t skip)
 
DUK_LOCAL duk_uint32_t duk__append_jump_offset (duk_re_compiler_ctx *re_ctx, duk_int32_t skip)
 
DUK_LOCAL void duk__generate_ranges (void *userdata, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct)
 
DUK_LOCAL void duk__parse_disjunction (duk_re_compiler_ctx *re_ctx, duk_bool_t expect_eof, duk__re_disjunction_info *out_atom_info)
 
DUK_LOCAL duk_uint32_t duk__parse_regexp_flags (duk_hthread *thr, duk_hstring *h)
 
DUK_LOCAL void duk__create_escaped_source (duk_hthread *thr, int idx_pattern)
 
DUK_INTERNAL void duk_regexp_compile (duk_hthread *thr)
 
DUK_INTERNAL void duk_regexp_create_instance (duk_hthread *thr)
 

Macro Definition Documentation

◆ DUK__RE_BUFLEN

#define DUK__RE_BUFLEN ( re_ctx)     DUK_BW_GET_SIZE(re_ctx->thr, &re_ctx->bw)

Definition at line 32 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

32#define DUK__RE_BUFLEN(re_ctx) \
33 DUK_BW_GET_SIZE(re_ctx->thr, &re_ctx->bw)

Referenced by duk__append_jump_offset(), and duk__parse_disjunction().

◆ DUK__RE_INITIAL_BUFSIZE

#define DUK__RE_INITIAL_BUFSIZE   64

Definition at line 29 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

Referenced by duk_regexp_compile().

Function Documentation

◆ duk__append_jump_offset()

DUK_LOCAL duk_uint32_t duk__append_jump_offset ( duk_re_compiler_ctx * re_ctx,
duk_int32_t skip )

Definition at line 162 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

162 {
163 return (duk_uint32_t) duk__insert_jump_offset(re_ctx, (duk_uint32_t) DUK__RE_BUFLEN(re_ctx), skip);
164}
DUK_LOCAL duk_uint32_t duk__insert_jump_offset(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t skip)

References duk__insert_jump_offset(), and DUK__RE_BUFLEN.

Referenced by duk__parse_disjunction().

◆ duk__append_slice()

DUK_LOCAL void duk__append_slice ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t data_offset,
duk_uint32_t data_length )

Definition at line 126 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

126 {
127 DUK_BW_WRITE_ENSURE_SLICE(re_ctx->thr, &re_ctx->bw, data_offset, data_length);
128}
#define DUK_BW_WRITE_ENSURE_SLICE(thr, bw, dst_off, dst_len)

References duk_re_compiler_ctx::bw, DUK_BW_WRITE_ENSURE_SLICE, and duk_re_compiler_ctx::thr.

Referenced by duk__parse_disjunction().

◆ duk__append_u16_list()

DUK_LOCAL void duk__append_u16_list ( duk_re_compiler_ctx * re_ctx,
const duk_uint16_t * values,
duk_uint32_t count )

Definition at line 114 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

114 {
115 /* Call sites don't need the result length so it's not accumulated. */
116 while (count > 0) {
117 (void) duk__append_u32(re_ctx, (duk_uint32_t) (*values++));
118 count--;
119 }
120}
DUK_LOCAL duk_uint32_t duk__append_u32(duk_re_compiler_ctx *re_ctx, duk_uint32_t x)

References duk__append_u32().

Referenced by duk__parse_disjunction().

◆ duk__append_u32()

DUK_LOCAL duk_uint32_t duk__append_u32 ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t x )

Definition at line 94 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

94 {
95 duk_uint8_t buf[DUK_UNICODE_MAX_XUTF8_LENGTH];
97
99 DUK_BW_WRITE_ENSURE_BYTES(re_ctx->thr, &re_ctx->bw, buf, len);
100 return (duk_uint32_t) len;
101}
DUK_INTERNAL_DECL duk_small_int_t duk_unicode_encode_xutf8(duk_ucodepoint_t cp, duk_uint8_t *out)
#define DUK_BW_WRITE_ENSURE_BYTES(thr, bw_ctx, valptr, valsz)
#define DUK_UNICODE_MAX_XUTF8_LENGTH

References duk_re_compiler_ctx::bw, DUK_BW_WRITE_ENSURE_BYTES, duk_unicode_encode_xutf8(), DUK_UNICODE_MAX_XUTF8_LENGTH, and duk_re_compiler_ctx::thr.

Referenced by duk__append_u16_list(), duk__generate_ranges(), duk__parse_disjunction(), and duk_regexp_compile().

◆ duk__create_escaped_source()

DUK_LOCAL void duk__create_escaped_source ( duk_hthread * thr,
int idx_pattern )

Definition at line 834 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

834 {
835 duk_context *ctx = (duk_context *) thr;
836 duk_hstring *h;
837 const duk_uint8_t *p;
838 duk_bufwriter_ctx bw_alloc;
840 duk_uint8_t *q;
841 duk_size_t i, n;
842 duk_uint_fast8_t c_prev, c;
843
844 h = duk_get_hstring(ctx, idx_pattern);
845 DUK_ASSERT(h != NULL);
846 p = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h);
848
849 if (n == 0) {
850 /* return '(?:)' */
852 return;
853 }
854
855 bw = &bw_alloc;
856 DUK_BW_INIT_PUSHBUF(thr, bw, n);
857 q = DUK_BW_GET_PTR(thr, bw);
858
859 c_prev = (duk_uint_fast8_t) 0;
860
861 for (i = 0; i < n; i++) {
862 c = p[i];
863
864 q = DUK_BW_ENSURE_RAW(thr, bw, 2, q);
865
866 if (c == (duk_uint_fast8_t) '/' && c_prev != (duk_uint_fast8_t) '\\') {
867 /* Unescaped '/' ANYWHERE in the regexp (in disjunction,
868 * inside a character class, ...) => same escape works.
869 */
870 *q++ = DUK_ASC_BACKSLASH;
871 }
872 *q++ = (duk_uint8_t) c;
873
874 c_prev = c;
875 }
876
877 DUK_BW_SETPTR_AND_COMPACT(thr, bw, q);
878 duk_to_string(ctx, -1); /* -> [ ... escaped_source ] */
879}
duk_uint8_t duk_uint_fast8_t
#define DUK_HSTRING_GET_DATA(x)
#define DUK_BW_SETPTR_AND_COMPACT(thr, bw_ctx, ptr)
DUK_INTERNAL_DECL void duk_push_hstring_stridx(duk_context *ctx, duk_small_int_t stridx)
#define DUK_BW_ENSURE_RAW(thr, bw_ctx, sz, ptr)
#define DUK_HSTRING_GET_BYTELEN(x)
#define DUK_BW_INIT_PUSHBUF(thr, bw_ctx, sz)
#define DUK_BW_GET_PTR(thr, bw_ctx)
#define DUK_STRIDX_ESCAPED_EMPTY_REGEXP
DUK_EXTERNAL const char * duk_to_string(duk_context *ctx, duk_idx_t index)
DUK_INTERNAL_DECL duk_hstring * duk_get_hstring(duk_context *ctx, duk_idx_t index)
#define NULL
Definition gmacros.h:924

References DUK_ASC_BACKSLASH, DUK_ASSERT, DUK_BW_ENSURE_RAW, DUK_BW_GET_PTR, DUK_BW_INIT_PUSHBUF, DUK_BW_SETPTR_AND_COMPACT, duk_get_hstring(), DUK_HSTRING_GET_BYTELEN, DUK_HSTRING_GET_DATA, duk_push_hstring_stridx(), DUK_STRIDX_ESCAPED_EMPTY_REGEXP, duk_to_string(), and NULL.

Referenced by duk_regexp_compile().

◆ duk__encode_i32()

DUK_LOCAL duk_uint32_t duk__encode_i32 ( duk_int32_t x)

Definition at line 74 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

74 {
75 if (x < 0) {
76 return ((duk_uint32_t) (-x)) * 2 + 1;
77 } else {
78 return ((duk_uint32_t) x) * 2;
79 }
80}

Referenced by duk__insert_i32(), and duk__insert_jump_offset().

◆ duk__generate_ranges()

DUK_LOCAL void duk__generate_ranges ( void * userdata,
duk_codepoint_t r1,
duk_codepoint_t r2,
duk_bool_t direct )

Definition at line 185 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

185 {
186 duk_re_compiler_ctx *re_ctx = (duk_re_compiler_ctx *) userdata;
187
188 DUK_DD(DUK_DDPRINT("duk__generate_ranges(): re_ctx=%p, range=[%ld,%ld] direct=%ld",
189 (void *) re_ctx, (long) r1, (long) r2, (long) direct));
190
191 if (!direct && (re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE)) {
192 /*
193 * Canonicalize a range, generating result ranges as necessary.
194 * Needs to exhaustively scan the entire range (at most 65536
195 * code points). If 'direct' is set, caller (lexer) has ensured
196 * that the range is already canonicalization compatible (this
197 * is used to avoid unnecessary canonicalization of built-in
198 * ranges like \W, which are not affected by canonicalization).
199 *
200 * NOTE: here is one place where we don't want to support chars
201 * outside the BMP, because the exhaustive search would be
202 * massively larger.
203 */
204
207 duk_codepoint_t r_start, r_end;
208
209 r_start = duk_unicode_re_canonicalize_char(re_ctx->thr, r1);
210 r_end = r_start;
211 for (i = r1 + 1; i <= r2; i++) {
212 t = duk_unicode_re_canonicalize_char(re_ctx->thr, i);
213 if (t == r_end + 1) {
214 r_end = t;
215 } else {
216 DUK_DD(DUK_DDPRINT("canonicalized, emit range: [%ld,%ld]", (long) r_start, (long) r_end));
217 duk__append_u32(re_ctx, (duk_uint32_t) r_start);
218 duk__append_u32(re_ctx, (duk_uint32_t) r_end);
219 re_ctx->nranges++;
220 r_start = t;
221 r_end = t;
222 }
223 }
224 DUK_DD(DUK_DDPRINT("canonicalized, emit range: [%ld,%ld]", (long) r_start, (long) r_end));
225 duk__append_u32(re_ctx, (duk_uint32_t) r_start);
226 duk__append_u32(re_ctx, (duk_uint32_t) r_end);
227 re_ctx->nranges++;
228 } else {
229 DUK_DD(DUK_DDPRINT("direct, emit range: [%ld,%ld]", (long) r1, (long) r2));
230 duk__append_u32(re_ctx, (duk_uint32_t) r1);
231 duk__append_u32(re_ctx, (duk_uint32_t) r2);
232 re_ctx->nranges++;
233 }
234}
#define DUK_RE_FLAG_IGNORE_CASE
DUK_INTERNAL_DECL duk_codepoint_t duk_unicode_re_canonicalize_char(duk_hthread *thr, duk_codepoint_t cp)

References duk__append_u32(), DUK_DD, DUK_DDPRINT, DUK_RE_FLAG_IGNORE_CASE, duk_unicode_re_canonicalize_char(), duk_re_compiler_ctx::nranges, duk_re_compiler_ctx::re_flags, and duk_re_compiler_ctx::thr.

Referenced by duk__parse_disjunction().

◆ duk__insert_i32()

DUK_LOCAL duk_uint32_t duk__insert_i32 ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t offset,
duk_int32_t x )

Definition at line 103 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

103 {
104 return duk__insert_u32(re_ctx, offset, duk__encode_i32(x));
105}
DUK_LOCAL duk_uint32_t duk__encode_i32(duk_int32_t x)
DUK_LOCAL duk_uint32_t duk__insert_u32(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_uint32_t x)

References duk__encode_i32(), and duk__insert_u32().

Referenced by duk__insert_jump_offset().

◆ duk__insert_jump_offset()

DUK_LOCAL duk_uint32_t duk__insert_jump_offset ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t offset,
duk_int32_t skip )

Definition at line 147 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

147 {
148 duk_small_int_t len;
149
150 /* XXX: solve into closed form (smaller code) */
151
152 if (skip < 0) {
153 /* two encoding attempts suffices */
155 len = duk_unicode_get_xutf8_length((duk_codepoint_t) duk__encode_i32(skip - (duk_int32_t) len));
156 DUK_ASSERT(duk_unicode_get_xutf8_length(duk__encode_i32(skip - (duk_int32_t) len)) == len); /* no change */
157 skip -= (duk_int32_t) len;
158 }
159 return duk__insert_i32(re_ctx, offset, skip);
160}
DUK_INTERNAL_DECL duk_small_int_t duk_unicode_get_xutf8_length(duk_ucodepoint_t cp)
DUK_LOCAL duk_uint32_t duk__insert_i32(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_int32_t x)

References duk__encode_i32(), duk__insert_i32(), DUK_ASSERT, and duk_unicode_get_xutf8_length().

Referenced by duk__append_jump_offset(), and duk__parse_disjunction().

◆ duk__insert_slice()

DUK_LOCAL void duk__insert_slice ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t offset,
duk_uint32_t data_offset,
duk_uint32_t data_length )

Definition at line 122 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

122 {
123 DUK_BW_INSERT_ENSURE_SLICE(re_ctx->thr, &re_ctx->bw, offset, data_offset, data_length);
124}
#define DUK_BW_INSERT_ENSURE_SLICE(thr, bw, dst_off, src_off, len)

References duk_re_compiler_ctx::bw, DUK_BW_INSERT_ENSURE_SLICE, and duk_re_compiler_ctx::thr.

Referenced by duk__parse_disjunction().

◆ duk__insert_u32()

DUK_LOCAL duk_uint32_t duk__insert_u32 ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t offset,
duk_uint32_t x )

Definition at line 85 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

85 {
86 duk_uint8_t buf[DUK_UNICODE_MAX_XUTF8_LENGTH];
88
90 DUK_BW_INSERT_ENSURE_BYTES(re_ctx->thr, &re_ctx->bw, offset, buf, len);
91 return (duk_uint32_t) len;
92}
#define DUK_BW_INSERT_ENSURE_BYTES(thr, bw, dst_off, buf, len)

References duk_re_compiler_ctx::bw, DUK_BW_INSERT_ENSURE_BYTES, duk_unicode_encode_xutf8(), DUK_UNICODE_MAX_XUTF8_LENGTH, and duk_re_compiler_ctx::thr.

Referenced by duk__insert_i32(), duk__parse_disjunction(), and duk_regexp_compile().

◆ duk__parse_disjunction()

DUK_LOCAL void duk__parse_disjunction ( duk_re_compiler_ctx * re_ctx,
duk_bool_t expect_eof,
duk__re_disjunction_info * out_atom_info )

Definition at line 286 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

286 {
287 duk_int32_t atom_start_offset = -1; /* negative -> no atom matched on previous round */
288 duk_int32_t atom_char_length = 0; /* negative -> complex atom */
289 duk_uint32_t atom_start_captures = re_ctx->captures; /* value of re_ctx->captures at start of atom */
290 duk_int32_t unpatched_disjunction_split = -1;
291 duk_int32_t unpatched_disjunction_jump = -1;
292 duk_uint32_t entry_offset = (duk_uint32_t) DUK__RE_BUFLEN(re_ctx);
293 duk_int32_t res_charlen = 0; /* -1 if disjunction is complex, char length if simple */
295
296 DUK_ASSERT(out_atom_info != NULL);
297
298 if (re_ctx->recursion_depth >= re_ctx->recursion_limit) {
300 }
301 re_ctx->recursion_depth++;
302
303#if 0
304 out_atom_info->start_captures = re_ctx->captures;
305#endif
306
307 for (;;) {
308 /* atom_char_length, atom_start_offset, atom_start_offset reflect the
309 * atom matched on the previous loop. If a quantifier is encountered
310 * on this loop, these are needed to handle the quantifier correctly.
311 * new_atom_char_length etc are for the atom parsed on this round;
312 * they're written to atom_char_length etc at the end of the round.
313 */
314 duk_int32_t new_atom_char_length; /* char length of the atom parsed in this loop */
315 duk_int32_t new_atom_start_offset; /* bytecode start offset of the atom parsed in this loop
316 * (allows quantifiers to copy the atom bytecode)
317 */
318 duk_uint32_t new_atom_start_captures; /* re_ctx->captures at the start of the atom parsed in this loop */
319
320 duk_lexer_parse_re_token(&re_ctx->lex, &re_ctx->curr_token);
321
322 DUK_DD(DUK_DDPRINT("re token: %ld (num=%ld, char=%c)",
323 (long) re_ctx->curr_token.t,
324 (long) re_ctx->curr_token.num,
325 (re_ctx->curr_token.num >= 0x20 && re_ctx->curr_token.num <= 0x7e) ?
326 (int) re_ctx->curr_token.num : (int) '?'));
327
328 /* set by atom case clauses */
329 new_atom_start_offset = -1;
330 new_atom_char_length = -1;
331 new_atom_start_captures = re_ctx->captures;
332
333 switch (re_ctx->curr_token.t) {
335 /*
336 * The handling here is a bit tricky. If a previous '|' has been processed,
337 * we have a pending split1 and a pending jump (for a previous match). These
338 * need to be back-patched carefully. See docs for a detailed example.
339 */
340
341 /* patch pending jump and split */
342 if (unpatched_disjunction_jump >= 0) {
343 duk_uint32_t offset;
344
345 DUK_ASSERT(unpatched_disjunction_split >= 0);
346 offset = unpatched_disjunction_jump;
347 offset += duk__insert_jump_offset(re_ctx,
348 offset,
349 (duk_int32_t) (DUK__RE_BUFLEN(re_ctx) - offset));
350 /* offset is now target of the pending split (right after jump) */
352 unpatched_disjunction_split,
353 offset - unpatched_disjunction_split);
354 }
355
356 /* add a new pending split to the beginning of the entire disjunction */
357 (void) duk__insert_u32(re_ctx,
358 entry_offset,
359 DUK_REOP_SPLIT1); /* prefer direct execution */
360 unpatched_disjunction_split = entry_offset + 1; /* +1 for opcode */
361
362 /* add a new pending match jump for latest finished alternative */
364 unpatched_disjunction_jump = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
365
366 /* 'taint' result as complex */
367 res_charlen = -1;
368 break;
369 }
371 if (atom_start_offset < 0) {
373 }
374 if (re_ctx->curr_token.qmin > re_ctx->curr_token.qmax) {
376 }
377 if (atom_char_length >= 0) {
378 /*
379 * Simple atom
380 *
381 * If atom_char_length is zero, we'll have unbounded execution time for e.g.
382 * /()*x/.exec('x'). We can't just skip the match because it might have some
383 * side effects (for instance, if we allowed captures in simple atoms, the
384 * capture needs to happen). The simple solution below is to force the
385 * quantifier to match at most once, since the additional matches have no effect.
386 *
387 * With a simple atom there can be no capture groups, so no captures need
388 * to be reset.
389 */
390 duk_int32_t atom_code_length;
391 duk_uint32_t offset;
392 duk_uint32_t qmin, qmax;
393
394 qmin = re_ctx->curr_token.qmin;
395 qmax = re_ctx->curr_token.qmax;
396 if (atom_char_length == 0) {
397 /* qmin and qmax will be 0 or 1 */
398 if (qmin > 1) {
399 qmin = 1;
400 }
401 if (qmax > 1) {
402 qmax = 1;
403 }
404 }
405
406 duk__append_u32(re_ctx, DUK_REOP_MATCH); /* complete 'sub atom' */
407 atom_code_length = (duk_int32_t) (DUK__RE_BUFLEN(re_ctx) - atom_start_offset);
408
409 offset = atom_start_offset;
410 if (re_ctx->curr_token.greedy) {
411 offset += duk__insert_u32(re_ctx, offset, DUK_REOP_SQGREEDY);
412 offset += duk__insert_u32(re_ctx, offset, qmin);
413 offset += duk__insert_u32(re_ctx, offset, qmax);
414 offset += duk__insert_u32(re_ctx, offset, atom_char_length);
415 offset += duk__insert_jump_offset(re_ctx, offset, atom_code_length);
416 } else {
417 offset += duk__insert_u32(re_ctx, offset, DUK_REOP_SQMINIMAL);
418 offset += duk__insert_u32(re_ctx, offset, qmin);
419 offset += duk__insert_u32(re_ctx, offset, qmax);
420 offset += duk__insert_jump_offset(re_ctx, offset, atom_code_length);
421 }
422 DUK_UNREF(offset); /* silence scan-build warning */
423 } else {
424 /*
425 * Complex atom
426 *
427 * The original code is used as a template, and removed at the end
428 * (this differs from the handling of simple quantifiers).
429 *
430 * NOTE: there is no current solution for empty atoms in complex
431 * quantifiers. This would need some sort of a 'progress' instruction.
432 *
433 * XXX: impose limit on maximum result size, i.e. atom_code_len * atom_copies?
434 */
435 duk_int32_t atom_code_length;
436 duk_uint32_t atom_copies;
437 duk_uint32_t tmp_qmin, tmp_qmax;
438
439 /* pre-check how many atom copies we're willing to make (atom_copies not needed below) */
440 atom_copies = (re_ctx->curr_token.qmax == DUK_RE_QUANTIFIER_INFINITE) ?
441 re_ctx->curr_token.qmin : re_ctx->curr_token.qmax;
442 if (atom_copies > DUK_RE_MAX_ATOM_COPIES) {
444 }
445
446 /* wipe the capture range made by the atom (if any) */
447 DUK_ASSERT(atom_start_captures <= re_ctx->captures);
448 if (atom_start_captures != re_ctx->captures) {
449 DUK_ASSERT(atom_start_captures < re_ctx->captures);
450 DUK_DDD(DUK_DDDPRINT("must wipe ]atom_start_captures,re_ctx->captures]: ]%ld,%ld]",
451 (long) atom_start_captures, (long) re_ctx->captures));
452
453 /* insert (DUK_REOP_WIPERANGE, start, count) in reverse order so the order ends up right */
454 duk__insert_u32(re_ctx, atom_start_offset, (re_ctx->captures - atom_start_captures) * 2);
455 duk__insert_u32(re_ctx, atom_start_offset, (atom_start_captures + 1) * 2);
456 duk__insert_u32(re_ctx, atom_start_offset, DUK_REOP_WIPERANGE);
457 } else {
458 DUK_DDD(DUK_DDDPRINT("no need to wipe captures: atom_start_captures == re_ctx->captures == %ld",
459 (long) atom_start_captures));
460 }
461
462 atom_code_length = (duk_int32_t) DUK__RE_BUFLEN(re_ctx) - atom_start_offset;
463
464 /* insert the required matches (qmin) by copying the atom */
465 tmp_qmin = re_ctx->curr_token.qmin;
466 tmp_qmax = re_ctx->curr_token.qmax;
467 while (tmp_qmin > 0) {
468 duk__append_slice(re_ctx, atom_start_offset, atom_code_length);
469 tmp_qmin--;
470 if (tmp_qmax != DUK_RE_QUANTIFIER_INFINITE) {
471 tmp_qmax--;
472 }
473 }
474 DUK_ASSERT(tmp_qmin == 0);
475
476 /* insert code for matching the remainder - infinite or finite */
477 if (tmp_qmax == DUK_RE_QUANTIFIER_INFINITE) {
478 /* reuse last emitted atom for remaining 'infinite' quantifier */
479
480 if (re_ctx->curr_token.qmin == 0) {
481 /* Special case: original qmin was zero so there is nothing
482 * to repeat. Emit an atom copy but jump over it here.
483 */
485 duk__append_jump_offset(re_ctx, atom_code_length);
486 duk__append_slice(re_ctx, atom_start_offset, atom_code_length);
487 }
488 if (re_ctx->curr_token.greedy) {
489 duk__append_u32(re_ctx, DUK_REOP_SPLIT2); /* prefer jump */
490 } else {
491 duk__append_u32(re_ctx, DUK_REOP_SPLIT1); /* prefer direct */
492 }
493 duk__append_jump_offset(re_ctx, -atom_code_length - 1); /* -1 for opcode */
494 } else {
495 /*
496 * The remaining matches are emitted as sequence of SPLITs and atom
497 * copies; the SPLITs skip the remaining copies and match the sequel.
498 * This sequence needs to be emitted starting from the last copy
499 * because the SPLITs are variable length due to the variable length
500 * skip offset. This causes a lot of memory copying now.
501 *
502 * Example structure (greedy, match maximum # atoms):
503 *
504 * SPLIT1 LSEQ
505 * (atom)
506 * SPLIT1 LSEQ ; <- the byte length of this instruction is needed
507 * (atom) ; to encode the above SPLIT1 correctly
508 * ...
509 * LSEQ:
510 */
511 duk_uint32_t offset = (duk_uint32_t) DUK__RE_BUFLEN(re_ctx);
512 while (tmp_qmax > 0) {
513 duk__insert_slice(re_ctx, offset, atom_start_offset, atom_code_length);
514 if (re_ctx->curr_token.greedy) {
515 duk__insert_u32(re_ctx, offset, DUK_REOP_SPLIT1); /* prefer direct */
516 } else {
517 duk__insert_u32(re_ctx, offset, DUK_REOP_SPLIT2); /* prefer jump */
518 }
520 offset + 1, /* +1 for opcode */
521 (duk_int32_t) (DUK__RE_BUFLEN(re_ctx) - (offset + 1)));
522 tmp_qmax--;
523 }
524 }
525
526 /* remove the original 'template' atom */
527 duk__remove_slice(re_ctx, atom_start_offset, atom_code_length);
528 }
529
530 /* 'taint' result as complex */
531 res_charlen = -1;
532 break;
533 }
536 break;
537 }
540 break;
541 }
544 break;
545 }
548 break;
549 }
552 duk_uint32_t offset;
553 duk_uint32_t opcode = (re_ctx->curr_token.t == DUK_RETOK_ASSERT_START_POS_LOOKAHEAD) ?
555
556 offset = (duk_uint32_t) DUK__RE_BUFLEN(re_ctx);
557 duk__parse_disjunction(re_ctx, 0, &tmp_disj);
559
560 (void) duk__insert_u32(re_ctx, offset, opcode);
561 (void) duk__insert_jump_offset(re_ctx,
562 offset + 1, /* +1 for opcode */
563 (duk_int32_t) (DUK__RE_BUFLEN(re_ctx) - (offset + 1)));
564
565 /* 'taint' result as complex -- this is conservative,
566 * as lookaheads do not backtrack.
567 */
568 res_charlen = -1;
569 break;
570 }
572 new_atom_char_length = 1;
573 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
575 break;
576 }
577 case DUK_RETOK_ATOM_CHAR: {
578 /* Note: successive characters could be joined into string matches
579 * but this is not trivial (consider e.g. '/xyz+/); see docs for
580 * more discussion.
581 */
582 duk_uint32_t ch;
583
584 new_atom_char_length = 1;
585 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
587 ch = re_ctx->curr_token.num;
588 if (re_ctx->re_flags & DUK_RE_FLAG_IGNORE_CASE) {
589 ch = duk_unicode_re_canonicalize_char(re_ctx->thr, ch);
590 }
591 duk__append_u32(re_ctx, ch);
592 break;
593 }
596 new_atom_char_length = 1;
597 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
598 duk__append_u32(re_ctx,
599 (re_ctx->curr_token.t == DUK_RETOK_ATOM_DIGIT) ?
601 duk__append_u32(re_ctx, sizeof(duk_unicode_re_ranges_digit) / (2 * sizeof(duk_uint16_t)));
603 break;
604 }
607 new_atom_char_length = 1;
608 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
609 duk__append_u32(re_ctx,
610 (re_ctx->curr_token.t == DUK_RETOK_ATOM_WHITE) ?
612 duk__append_u32(re_ctx, sizeof(duk_unicode_re_ranges_white) / (2 * sizeof(duk_uint16_t)));
614 break;
615 }
618 new_atom_char_length = 1;
619 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
620 duk__append_u32(re_ctx,
623 duk__append_u32(re_ctx, sizeof(duk_unicode_re_ranges_wordchar) / (2 * sizeof(duk_uint16_t)));
625 break;
626 }
628 duk_uint32_t backref = (duk_uint32_t) re_ctx->curr_token.num;
629 if (backref > re_ctx->highest_backref) {
630 re_ctx->highest_backref = backref;
631 }
632 new_atom_char_length = -1; /* mark as complex */
633 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
635 duk__append_u32(re_ctx, backref);
636 break;
637 }
639 duk_uint32_t cap;
640
641 new_atom_char_length = -1; /* mark as complex (capture handling) */
642 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
643 cap = ++re_ctx->captures;
645 duk__append_u32(re_ctx, cap * 2);
646 duk__parse_disjunction(re_ctx, 0, &tmp_disj); /* retval (sub-atom char length) unused, tainted as complex above */
648 duk__append_u32(re_ctx, cap * 2 + 1);
649 break;
650 }
652 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
653 duk__parse_disjunction(re_ctx, 0, &tmp_disj);
654 new_atom_char_length = tmp_disj.charlen;
655 break;
656 }
659 /*
660 * Range parsing is done with a special lexer function which calls
661 * us for every range parsed. This is different from how rest of
662 * the parsing works, but avoids a heavy, arbitrary size intermediate
663 * value type to hold the ranges.
664 *
665 * Another complication is the handling of character ranges when
666 * case insensitive matching is used (see docs for discussion).
667 * The range handler callback given to the lexer takes care of this
668 * as well.
669 *
670 * Note that duplicate ranges are not eliminated when parsing character
671 * classes, so that canonicalization of
672 *
673 * [0-9a-fA-Fx-{]
674 *
675 * creates the result (note the duplicate ranges):
676 *
677 * [0-9A-FA-FX-Z{-{]
678 *
679 * where [x-{] is split as a result of canonicalization. The duplicate
680 * ranges are not a semantics issue: they work correctly.
681 */
682
683 duk_uint32_t offset;
684
685 DUK_DD(DUK_DDPRINT("character class"));
686
687 /* insert ranges instruction, range count patched in later */
688 new_atom_char_length = 1;
689 new_atom_start_offset = (duk_int32_t) DUK__RE_BUFLEN(re_ctx);
690 duk__append_u32(re_ctx,
693 offset = (duk_uint32_t) DUK__RE_BUFLEN(re_ctx); /* patch in range count later */
694
695 /* parse ranges until character class ends */
696 re_ctx->nranges = 0; /* note: ctx-wide temporary */
697 duk_lexer_parse_re_ranges(&re_ctx->lex, duk__generate_ranges, (void *) re_ctx);
698
699 /* insert range count */
700 duk__insert_u32(re_ctx, offset, re_ctx->nranges);
701 break;
702 }
704 if (expect_eof) {
706 }
707 goto done;
708 }
709 case DUK_RETOK_EOF: {
710 if (!expect_eof) {
712 }
713 goto done;
714 }
715 default: {
717 }
718 }
719
720 /* a complex (new) atom taints the result */
721 if (new_atom_start_offset >= 0) {
722 if (new_atom_char_length < 0) {
723 res_charlen = -1;
724 } else if (res_charlen >= 0) {
725 /* only advance if not tainted */
726 res_charlen += new_atom_char_length;
727 }
728 }
729
730 /* record previous atom info in case next token is a quantifier */
731 atom_start_offset = new_atom_start_offset;
732 atom_char_length = new_atom_char_length;
733 atom_start_captures = new_atom_start_captures;
734 }
735
736 done:
737
738 /* finish up pending jump and split for last alternative */
739 if (unpatched_disjunction_jump >= 0) {
740 duk_uint32_t offset;
741
742 DUK_ASSERT(unpatched_disjunction_split >= 0);
743 offset = unpatched_disjunction_jump;
744 offset += duk__insert_jump_offset(re_ctx,
745 offset,
746 (duk_int32_t) (DUK__RE_BUFLEN(re_ctx) - offset));
747 /* offset is now target of the pending split (right after jump) */
749 unpatched_disjunction_split,
750 offset - unpatched_disjunction_split);
751 }
752
753#if 0
754 out_atom_info->end_captures = re_ctx->captures;
755#endif
756 out_atom_info->charlen = res_charlen;
757 DUK_DDD(DUK_DDDPRINT("parse disjunction finished: charlen=%ld",
758 (long) out_atom_info->charlen));
759
760 re_ctx->recursion_depth--;
761}
#define DUK_RETOK_DISJUNCTION
#define DUK_ERROR_RANGE(thr, msg)
#define DUK_REOP_ASSERT_WORD_BOUNDARY
#define DUK_RETOK_ATOM_PERIOD
#define DUK_RETOK_ATOM_WORD_CHAR
#define DUK_RETOK_ATOM_START_CHARCLASS
#define DUK_STR_INVALID_QUANTIFIER_NO_ATOM
#define DUK_STR_UNEXPECTED_REGEXP_TOKEN
#define DUK_ERROR_SYNTAX(thr, msg)
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_digit[2]
#define DUK_RETOK_ATOM_START_CHARCLASS_INVERTED
#define DUK_STR_UNEXPECTED_END_OF_PATTERN
#define DUK_RETOK_ATOM_END_GROUP
#define DUK_RETOK_ATOM_NOT_WHITE
#define DUK_RETOK_ASSERT_WORD_BOUNDARY
#define DUK_RETOK_ATOM_NOT_WORD_CHAR
#define DUK_RETOK_ATOM_START_CAPTURE_GROUP
#define DUK_RETOK_ASSERT_START
#define DUK_STR_INVALID_QUANTIFIER_VALUES
DUK_INTERNAL_DECL void duk_lexer_parse_re_token(duk_lexer_ctx *lex_ctx, duk_re_token *out_token)
#define DUK_RETOK_ASSERT_START_POS_LOOKAHEAD
#define DUK_RETOK_ATOM_NOT_DIGIT
#define DUK_REOP_BACKREFERENCE
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_wordchar[8]
#define DUK_REOP_ASSERT_START
#define DUK_RE_QUANTIFIER_INFINITE
#define DUK_RETOK_ATOM_START_NONCAPTURE_GROUP
#define DUK_STR_REGEXP_COMPILER_RECURSION_LIMIT
#define DUK_REOP_ASSERT_NOT_WORD_BOUNDARY
#define DUK_RETOK_ATOM_BACKREFERENCE
#define DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD
#define DUK_RE_MAX_ATOM_COPIES
DUK_INTERNAL_DECL void duk_lexer_parse_re_ranges(duk_lexer_ctx *lex_ctx, duk_re_range_callback gen_range, void *userdata)
#define DUK_STR_UNEXPECTED_CLOSING_PAREN
#define DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY
#define DUK_STR_QUANTIFIER_TOO_MANY_COPIES
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_white[22]
DUK_LOCAL duk_uint32_t duk__append_jump_offset(duk_re_compiler_ctx *re_ctx, duk_int32_t skip)
DUK_LOCAL void duk__remove_slice(duk_re_compiler_ctx *re_ctx, duk_uint32_t data_offset, duk_uint32_t data_length)
DUK_LOCAL void duk__generate_ranges(void *userdata, duk_codepoint_t r1, duk_codepoint_t r2, duk_bool_t direct)
DUK_LOCAL void duk__append_slice(duk_re_compiler_ctx *re_ctx, duk_uint32_t data_offset, duk_uint32_t data_length)
DUK_LOCAL void duk__insert_slice(duk_re_compiler_ctx *re_ctx, duk_uint32_t offset, duk_uint32_t data_offset, duk_uint32_t data_length)
DUK_LOCAL void duk__parse_disjunction(duk_re_compiler_ctx *re_ctx, duk_bool_t expect_eof, duk__re_disjunction_info *out_atom_info)
DUK_LOCAL void duk__append_u16_list(duk_re_compiler_ctx *re_ctx, const duk_uint16_t *values, duk_uint32_t count)

References duk_re_compiler_ctx::captures, duk__re_disjunction_info::charlen, duk_re_compiler_ctx::curr_token, duk__append_jump_offset(), duk__append_slice(), duk__append_u16_list(), duk__append_u32(), duk__generate_ranges(), duk__insert_jump_offset(), duk__insert_slice(), duk__insert_u32(), duk__parse_disjunction(), DUK__RE_BUFLEN, duk__remove_slice(), DUK_ASSERT, DUK_DD, DUK_DDD, DUK_DDDPRINT, DUK_DDPRINT, DUK_ERROR_RANGE, DUK_ERROR_SYNTAX, duk_lexer_parse_re_ranges(), duk_lexer_parse_re_token(), DUK_RE_FLAG_IGNORE_CASE, DUK_RE_MAX_ATOM_COPIES, DUK_RE_QUANTIFIER_INFINITE, DUK_REOP_ASSERT_END, DUK_REOP_ASSERT_NOT_WORD_BOUNDARY, DUK_REOP_ASSERT_START, DUK_REOP_ASSERT_WORD_BOUNDARY, DUK_REOP_BACKREFERENCE, DUK_REOP_CHAR, DUK_REOP_INVRANGES, DUK_REOP_JUMP, DUK_REOP_LOOKNEG, DUK_REOP_LOOKPOS, DUK_REOP_MATCH, DUK_REOP_PERIOD, DUK_REOP_RANGES, DUK_REOP_SAVE, DUK_REOP_SPLIT1, DUK_REOP_SPLIT2, DUK_REOP_SQGREEDY, DUK_REOP_SQMINIMAL, DUK_REOP_WIPERANGE, DUK_RETOK_ASSERT_END, DUK_RETOK_ASSERT_NOT_WORD_BOUNDARY, DUK_RETOK_ASSERT_START, DUK_RETOK_ASSERT_START_NEG_LOOKAHEAD, DUK_RETOK_ASSERT_START_POS_LOOKAHEAD, DUK_RETOK_ASSERT_WORD_BOUNDARY, DUK_RETOK_ATOM_BACKREFERENCE, DUK_RETOK_ATOM_CHAR, DUK_RETOK_ATOM_DIGIT, DUK_RETOK_ATOM_END_GROUP, DUK_RETOK_ATOM_NOT_DIGIT, DUK_RETOK_ATOM_NOT_WHITE, DUK_RETOK_ATOM_NOT_WORD_CHAR, DUK_RETOK_ATOM_PERIOD, DUK_RETOK_ATOM_START_CAPTURE_GROUP, DUK_RETOK_ATOM_START_CHARCLASS, DUK_RETOK_ATOM_START_CHARCLASS_INVERTED, DUK_RETOK_ATOM_START_NONCAPTURE_GROUP, DUK_RETOK_ATOM_WHITE, DUK_RETOK_ATOM_WORD_CHAR, DUK_RETOK_DISJUNCTION, DUK_RETOK_EOF, DUK_RETOK_QUANTIFIER, DUK_STR_INVALID_QUANTIFIER_NO_ATOM, DUK_STR_INVALID_QUANTIFIER_VALUES, DUK_STR_QUANTIFIER_TOO_MANY_COPIES, DUK_STR_REGEXP_COMPILER_RECURSION_LIMIT, DUK_STR_UNEXPECTED_CLOSING_PAREN, DUK_STR_UNEXPECTED_END_OF_PATTERN, DUK_STR_UNEXPECTED_REGEXP_TOKEN, duk_unicode_re_canonicalize_char(), duk_unicode_re_ranges_digit, duk_unicode_re_ranges_white, duk_unicode_re_ranges_wordchar, DUK_UNREF, duk_re_token::greedy, duk_re_compiler_ctx::highest_backref, duk_re_compiler_ctx::lex, duk_re_compiler_ctx::nranges, NULL, duk_re_token::num, duk_re_token::qmax, duk_re_token::qmin, duk_re_compiler_ctx::re_flags, duk_re_compiler_ctx::recursion_depth, duk_re_compiler_ctx::recursion_limit, duk_re_token::t, and duk_re_compiler_ctx::thr.

Referenced by duk__parse_disjunction(), and duk_regexp_compile().

◆ duk__parse_regexp_flags()

DUK_LOCAL duk_uint32_t duk__parse_regexp_flags ( duk_hthread * thr,
duk_hstring * h )

Definition at line 767 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

767 {
768 const duk_uint8_t *p;
769 const duk_uint8_t *p_end;
770 duk_uint32_t flags = 0;
771
773 p_end = p + DUK_HSTRING_GET_BYTELEN(h);
774
775 /* Note: can be safely scanned as bytes (undecoded) */
776
777 while (p < p_end) {
778 duk_uint8_t c = *p++;
779 switch ((int) c) {
780 case (int) 'g': {
781 if (flags & DUK_RE_FLAG_GLOBAL) {
782 goto error;
783 }
784 flags |= DUK_RE_FLAG_GLOBAL;
785 break;
786 }
787 case (int) 'i': {
788 if (flags & DUK_RE_FLAG_IGNORE_CASE) {
789 goto error;
790 }
792 break;
793 }
794 case (int) 'm': {
795 if (flags & DUK_RE_FLAG_MULTILINE) {
796 goto error;
797 }
798 flags |= DUK_RE_FLAG_MULTILINE;
799 break;
800 }
801 default: {
802 goto error;
803 }
804 }
805 }
806
807 return flags;
808
809 error:
811 return 0; /* never here */
812}
#define DUK_RE_FLAG_MULTILINE
#define DUK_STR_INVALID_REGEXP_FLAGS
static void error(LoadState *S, const char *why)

References DUK_ERROR_SYNTAX, DUK_HSTRING_GET_BYTELEN, DUK_HSTRING_GET_DATA, DUK_RE_FLAG_GLOBAL, DUK_RE_FLAG_IGNORE_CASE, DUK_RE_FLAG_MULTILINE, DUK_STR_INVALID_REGEXP_FLAGS, and error().

Referenced by duk_regexp_compile().

◆ duk__remove_slice()

DUK_LOCAL void duk__remove_slice ( duk_re_compiler_ctx * re_ctx,
duk_uint32_t data_offset,
duk_uint32_t data_length )

Definition at line 130 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

130 {
131 DUK_BW_REMOVE_ENSURE_SLICE(re_ctx->thr, &re_ctx->bw, data_offset, data_length);
132}
#define DUK_BW_REMOVE_ENSURE_SLICE(thr, bw, off, len)

References duk_re_compiler_ctx::bw, DUK_BW_REMOVE_ENSURE_SLICE, and duk_re_compiler_ctx::thr.

Referenced by duk__parse_disjunction().

◆ duk_regexp_compile()

DUK_INTERNAL void duk_regexp_compile ( duk_hthread * thr)

Definition at line 896 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

896 {
897 duk_context *ctx = (duk_context *) thr;
898 duk_re_compiler_ctx re_ctx;
899 duk_lexer_point lex_point;
900 duk_hstring *h_pattern;
901 duk_hstring *h_flags;
903
904 DUK_ASSERT(thr != NULL);
905 DUK_ASSERT(ctx != NULL);
906
907 /*
908 * Args validation
909 */
910
911 /* TypeError if fails */
912 h_pattern = duk_require_hstring(ctx, -2);
913 h_flags = duk_require_hstring(ctx, -1);
914
915 /*
916 * Create normalized 'source' property (E5 Section 15.10.3).
917 */
918
919 /* [ ... pattern flags ] */
920
922
923 /* [ ... pattern flags escaped_source ] */
924
925 /*
926 * Init compilation context
927 */
928
929 /* [ ... pattern flags escaped_source buffer ] */
930
931 DUK_MEMZERO(&re_ctx, sizeof(re_ctx));
932 DUK_LEXER_INITCTX(&re_ctx.lex); /* duplicate zeroing, expect for (possible) NULL inits */
933 re_ctx.thr = thr;
934 re_ctx.lex.thr = thr;
935 re_ctx.lex.input = DUK_HSTRING_GET_DATA(h_pattern);
936 re_ctx.lex.input_length = DUK_HSTRING_GET_BYTELEN(h_pattern);
939 re_ctx.re_flags = duk__parse_regexp_flags(thr, h_flags);
940
942
943 DUK_DD(DUK_DDPRINT("regexp compiler ctx initialized, flags=0x%08lx, recursion_limit=%ld",
944 (unsigned long) re_ctx.re_flags, (long) re_ctx.recursion_limit));
945
946 /*
947 * Init lexer
948 */
949
950 lex_point.offset = 0; /* expensive init, just want to fill window */
951 lex_point.line = 1;
952 DUK_LEXER_SETPOINT(&re_ctx.lex, &lex_point);
953
954 /*
955 * Compilation
956 */
957
958 DUK_DD(DUK_DDPRINT("starting regexp compilation"));
959
961 duk__append_u32(&re_ctx, 0);
962 duk__parse_disjunction(&re_ctx, 1 /*expect_eof*/, &ign_disj);
964 duk__append_u32(&re_ctx, 1);
966
967 /*
968 * Check for invalid backreferences; note that it is NOT an error
969 * to back-reference a capture group which has not yet been introduced
970 * in the pattern (as in /\1(foo)/); in fact, the backreference will
971 * always match! It IS an error to back-reference a capture group
972 * which will never be introduced in the pattern. Thus, we can check
973 * for such references only after parsing is complete.
974 */
975
976 if (re_ctx.highest_backref > re_ctx.captures) {
978 }
979
980 /*
981 * Emit compiled regexp header: flags, ncaptures
982 * (insertion order inverted on purpose)
983 */
984
985 duk__insert_u32(&re_ctx, 0, (re_ctx.captures + 1) * 2);
986 duk__insert_u32(&re_ctx, 0, re_ctx.re_flags);
987
988 /* [ ... pattern flags escaped_source buffer ] */
989
990 DUK_BW_COMPACT(thr, &re_ctx.bw);
991 duk_to_string(ctx, -1); /* coerce to string */
992
993 /* [ ... pattern flags escaped_source bytecode ] */
994
995 /*
996 * Finalize stack
997 */
998
999 duk_remove(ctx, -4); /* -> [ ... flags escaped_source bytecode ] */
1000 duk_remove(ctx, -3); /* -> [ ... escaped_source bytecode ] */
1001
1002 DUK_DD(DUK_DDPRINT("regexp compilation successful, bytecode: %!T, escaped source: %!T",
1003 (duk_tval *) duk_get_tval(ctx, -1), (duk_tval *) duk_get_tval(ctx, -2)));
1004}
#define DUK_USE_REGEXP_COMPILER_RECLIMIT
#define DUK_MEMZERO(p, n)
#define DUK_BW_COMPACT(thr, bw_ctx)
DUK_EXTERNAL void duk_remove(duk_context *ctx, duk_idx_t index)
#define DUK_LEXER_INITCTX(ctx)
#define DUK_RE_COMPILE_TOKEN_LIMIT
#define DUK_LEXER_SETPOINT(ctx, pt)
DUK_INTERNAL_DECL duk_hstring * duk_require_hstring(duk_context *ctx, duk_idx_t index)
#define DUK_STR_INVALID_BACKREFS
DUK_INTERNAL_DECL duk_tval * duk_get_tval(duk_context *ctx, duk_idx_t index)
DUK_LOCAL duk_uint32_t duk__parse_regexp_flags(duk_hthread *thr, duk_hstring *h)
DUK_LOCAL void duk__create_escaped_source(duk_hthread *thr, int idx_pattern)

References duk_re_compiler_ctx::bw, duk_re_compiler_ctx::captures, duk__append_u32(), duk__create_escaped_source(), duk__insert_u32(), duk__parse_disjunction(), duk__parse_regexp_flags(), DUK__RE_INITIAL_BUFSIZE, DUK_ASSERT, DUK_BW_COMPACT, DUK_BW_INIT_PUSHBUF, DUK_DD, DUK_DDPRINT, DUK_ERROR_SYNTAX, duk_get_tval(), DUK_HSTRING_GET_BYTELEN, DUK_HSTRING_GET_DATA, DUK_LEXER_INITCTX, DUK_LEXER_SETPOINT, DUK_MEMZERO, DUK_RE_COMPILE_TOKEN_LIMIT, duk_remove(), DUK_REOP_MATCH, DUK_REOP_SAVE, duk_require_hstring(), DUK_STR_INVALID_BACKREFS, duk_to_string(), DUK_USE_REGEXP_COMPILER_RECLIMIT, duk_re_compiler_ctx::highest_backref, duk_lexer_ctx::input, duk_lexer_ctx::input_length, duk_re_compiler_ctx::lex, duk_lexer_point::line, NULL, duk_lexer_point::offset, duk_re_compiler_ctx::re_flags, duk_re_compiler_ctx::recursion_limit, duk_lexer_ctx::thr, duk_re_compiler_ctx::thr, and duk_lexer_ctx::token_limit.

◆ duk_regexp_create_instance()

DUK_INTERNAL void duk_regexp_create_instance ( duk_hthread * thr)

Definition at line 1016 of file duktape-1.5.2/src-separate/duk_regexp_compiler.c.

1016 {
1017 duk_context *ctx = (duk_context *) thr;
1018 duk_hobject *h;
1019 duk_hstring *h_bc;
1020 duk_small_int_t re_flags;
1021
1022 /* [ ... escape_source bytecode ] */
1023
1024 h_bc = duk_get_hstring(ctx, -1);
1025 DUK_ASSERT(h_bc != NULL);
1026 DUK_ASSERT(DUK_HSTRING_GET_BYTELEN(h_bc) >= 1); /* always at least the header */
1028 DUK_ASSERT((duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0] < 0x80); /* flags always encodes to 1 byte */
1029 re_flags = (duk_small_int_t) DUK_HSTRING_GET_DATA(h_bc)[0];
1030
1031 /* [ ... escaped_source bytecode ] */
1032
1033 duk_push_object(ctx);
1034 h = duk_get_hobject(ctx, -1);
1035 DUK_ASSERT(h != NULL);
1036 duk_insert(ctx, -3);
1037
1038 /* [ ... regexp_object escaped_source bytecode ] */
1039
1042
1044
1045 /* [ ... regexp_object escaped_source ] */
1046
1048
1049 /* [ ... regexp_object ] */
1050
1051 duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_GLOBAL));
1053
1054 duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_IGNORE_CASE));
1056
1057 duk_push_boolean(ctx, (re_flags & DUK_RE_FLAG_MULTILINE));
1059
1060 duk_push_int(ctx, 0);
1062
1063 /* [ ... regexp_object ] */
1064}
#define DUK_HOBJECT_CLASS_REGEXP
DUK_EXTERNAL void duk_push_boolean(duk_context *ctx, duk_bool_t val)
#define DUK_PROPDESC_FLAGS_NONE
#define DUK_HSTRING_GET_CHARLEN(x)
#define DUK_HOBJECT_SET_PROTOTYPE_UPDREF(thr, h, p)
DUK_EXTERNAL void duk_push_int(duk_context *ctx, duk_int_t val)
DUK_EXTERNAL duk_idx_t duk_push_object(duk_context *ctx)
DUK_EXTERNAL void duk_insert(duk_context *ctx, duk_idx_t to_index)
#define DUK_STRIDX_LAST_INDEX
#define DUK_STRIDX_IGNORE_CASE
#define DUK_BIDX_REGEXP_PROTOTYPE
#define DUK_STRIDX_INT_BYTECODE
DUK_INTERNAL_DECL duk_hobject * duk_get_hobject(duk_context *ctx, duk_idx_t index)
DUK_INTERNAL_DECL void duk_xdef_prop_stridx(duk_context *ctx, duk_idx_t obj_index, duk_small_int_t stridx, duk_small_uint_t desc_flags)
#define DUK_HOBJECT_SET_CLASS_NUMBER(h, v)
duk_hobject * builtins[DUK_NUM_BUILTINS]

References duk_hthread::builtins, DUK_ASSERT, DUK_BIDX_REGEXP_PROTOTYPE, duk_get_hobject(), duk_get_hstring(), DUK_HOBJECT_CLASS_REGEXP, DUK_HOBJECT_SET_CLASS_NUMBER, DUK_HOBJECT_SET_PROTOTYPE_UPDREF, DUK_HSTRING_GET_BYTELEN, DUK_HSTRING_GET_CHARLEN, DUK_HSTRING_GET_DATA, duk_insert(), DUK_PROPDESC_FLAGS_NONE, DUK_PROPDESC_FLAGS_W, duk_push_boolean(), duk_push_int(), duk_push_object(), DUK_RE_FLAG_GLOBAL, DUK_RE_FLAG_IGNORE_CASE, DUK_RE_FLAG_MULTILINE, DUK_STRIDX_GLOBAL, DUK_STRIDX_IGNORE_CASE, DUK_STRIDX_INT_BYTECODE, DUK_STRIDX_LAST_INDEX, DUK_STRIDX_MULTILINE, DUK_STRIDX_SOURCE, duk_xdef_prop_stridx(), and NULL.