#include "duk_internal.h"

Functions
DUK_INTERNAL duk_small_int_t	duk_unicode_get_xutf8_length (duk_ucodepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_encode_xutf8 (duk_ucodepoint_t cp, duk_uint8_t *out)

DUK_INTERNAL duk_small_int_t	duk_unicode_encode_cesu8 (duk_ucodepoint_t cp, duk_uint8_t *out)

DUK_INTERNAL duk_small_int_t	duk_unicode_decode_xutf8 (duk_hthread thr, const duk_uint8_t ptr, const duk_uint8_t ptr_start, const duk_uint8_t ptr_end, duk_ucodepoint_t out_cp)

DUK_INTERNAL duk_ucodepoint_t	duk_unicode_decode_xutf8_checked (duk_hthread thr, const duk_uint8_t ptr, const duk_uint8_t ptr_start, const duk_uint8_t *ptr_end)

DUK_INTERNAL duk_size_t	duk_unicode_unvalidated_utf8_length (const duk_uint8_t *data, duk_size_t blen)

DUK_LOCAL duk_uint32_t	duk__uni_decode_value (duk_bitdecoder_ctx *bd_ctx)

DUK_LOCAL duk_small_int_t	duk__uni_range_match (const duk_uint8_t *unitab, duk_size_t unilen, duk_codepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_is_whitespace (duk_codepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_is_line_terminator (duk_codepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_is_identifier_start (duk_codepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_is_identifier_part (duk_codepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_is_letter (duk_codepoint_t cp)

DUK_LOCAL duk_codepoint_t	duk__slow_case_conversion (duk_hthread thr, duk_bufwriter_ctx bw, duk_codepoint_t cp, duk_bitdecoder_ctx *bd_ctx)

DUK_LOCAL duk_codepoint_t	duk__case_transform_helper (duk_hthread thr, duk_bufwriter_ctx bw, duk_codepoint_t cp, duk_codepoint_t prev, duk_codepoint_t next, duk_bool_t uppercase)

DUK_INTERNAL void	duk_unicode_case_convert_string (duk_hthread *thr, duk_small_int_t uppercase)

DUK_INTERNAL duk_codepoint_t	duk_unicode_re_canonicalize_char (duk_hthread *thr, duk_codepoint_t cp)

DUK_INTERNAL duk_small_int_t	duk_unicode_re_is_wordchar (duk_codepoint_t x)

Variables
DUK_INTERNAL const duk_int8_t	duk_is_idchar_tab [128]

DUK_INTERNAL const duk_uint8_t	duk_unicode_xutf8_markers [7]

DUK_INTERNAL const duk_uint16_t	duk_unicode_re_ranges_digit [2]

DUK_INTERNAL const duk_uint16_t	duk_unicode_re_ranges_white [22]

DUK_INTERNAL const duk_uint16_t	duk_unicode_re_ranges_wordchar [8]

DUK_INTERNAL const duk_uint16_t	duk_unicode_re_ranges_not_digit [4]

DUK_INTERNAL const duk_uint16_t	duk_unicode_re_ranges_not_white [24]

DUK_INTERNAL const duk_uint16_t	duk_unicode_re_ranges_not_wordchar [10]

Function Documentation

◆ duk__case_transform_helper()

DUK_LOCAL duk_codepoint_t duk__case_transform_helper	(	duk_hthread *	thr,
		duk_bufwriter_ctx *	bw,
		duk_codepoint_t	cp,
		duk_codepoint_t	prev,
		duk_codepoint_t	next,
		duk_bool_t	uppercase )

Definition at line 914 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                 {
    duk_bitdecoder_ctx bd_ctx;
 
    /* fast path for ASCII */
    if (cp < 0x80L) {
        /* XXX: there are language sensitive rules for the ASCII range.
         * If/when language/locale support is implemented, they need to
         * be implemented here for the fast path.  There are no context
         * sensitive rules for ASCII range.
         */
 
        if (uppercase) {
            if (cp >= 'a' && cp <= 'z') {
                cp = cp - 'a' + 'A';
            }
        } else {
            if (cp >= 'A' && cp <= 'Z') {
                cp = cp - 'A' + 'a';
            }
        }
 
        if (bw != NULL) {
            DUK_BW_WRITE_RAW_U8(thr, bw, (duk_uint8_t) cp);
        }
        return cp;
    }
 
    /* context and locale specific rules which cannot currently be represented
     * in the caseconv bitstream: hardcoded rules in C
     */
    if (uppercase) {
        /* XXX: turkish / azeri */
    } else {
        /*
         *  Final sigma context specific rule.  This is a rather tricky
         *  rule and this handling is probably not 100% correct now.
         *  The rule is not locale/language specific so it is supported.
         */
 
        if (cp == 0x03a3L &&    /* U+03A3 = GREEK CAPITAL LETTER SIGMA */
            duk_unicode_is_letter(prev) &&        /* prev exists and is not a letter */
            !duk_unicode_is_letter(next)) {       /* next does not exist or next is not a letter */
            /* Capital sigma occurred at "end of word", lowercase to
             * U+03C2 = GREEK SMALL LETTER FINAL SIGMA.  Otherwise
             * fall through and let the normal rules lowercase it to
             * U+03C3 = GREEK SMALL LETTER SIGMA.
             */
            cp = 0x03c2L;
            goto singlechar;
        }
 
        /* XXX: lithuanian not implemented */
        /* XXX: lithuanian, explicit dot rules */
        /* XXX: turkish / azeri, lowercase rules */
    }
 
    /* 1:1 or special conversions, but not locale/context specific: script generated rules */
    DUK_MEMZERO(&bd_ctx, sizeof(bd_ctx));
    if (uppercase) {
        bd_ctx.data = (const duk_uint8_t *) duk_unicode_caseconv_uc;
        bd_ctx.length = (duk_size_t) sizeof(duk_unicode_caseconv_uc);
    } else {
        bd_ctx.data = (const duk_uint8_t *) duk_unicode_caseconv_lc;
        bd_ctx.length = (duk_size_t) sizeof(duk_unicode_caseconv_lc);
    }
    return duk__slow_case_conversion(thr, bw, cp, &bd_ctx);
 
 singlechar:
    if (bw != NULL) {
        DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) cp);
    }
    return cp;
 
 /* unused now, not needed until Turkish/Azeri */
#if 0
 nochar:
    return -1;
#endif
}

References duk_bitdecoder_ctx::data, duk__slow_case_conversion(), DUK_BW_WRITE_RAW_U8, DUK_BW_WRITE_RAW_XUTF8, DUK_MEMZERO, duk_unicode_caseconv_lc, duk_unicode_caseconv_uc, duk_unicode_is_letter(), duk_bitdecoder_ctx::length, next, and NULL.

Referenced by duk_unicode_case_convert_string(), and duk_unicode_re_canonicalize_char().

◆ duk__slow_case_conversion()

DUK_LOCAL duk_codepoint_t duk__slow_case_conversion	(	duk_hthread *	thr,
		duk_bufwriter_ctx *	bw,
		duk_codepoint_t	cp,
		duk_bitdecoder_ctx *	bd_ctx )

Definition at line 808 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                      {
    duk_small_int_t skip = 0;
    duk_small_int_t n;
    duk_small_int_t t;
    duk_small_int_t count;
    duk_codepoint_t tmp_cp;
    duk_codepoint_t start_i;
    duk_codepoint_t start_o;
 
    DUK_UNREF(thr);
    DUK_ASSERT(bd_ctx != NULL);
 
    DUK_DDD(DUK_DDDPRINT("slow case conversion for codepoint: %ld", (long) cp));
 
    /* range conversion with a "skip" */
    DUK_DDD(DUK_DDDPRINT("checking ranges"));
    for (;;) {
        skip++;
        n = (duk_small_int_t) duk_bd_decode(bd_ctx, 6);
        if (n == 0x3f) {
            /* end marker */
            break;
        }
        DUK_DDD(DUK_DDDPRINT("skip=%ld, n=%ld", (long) skip, (long) n));
 
        while (n--) {
            start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
            start_o = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
            count = (duk_small_int_t) duk_bd_decode(bd_ctx, 7);
            DUK_DDD(DUK_DDDPRINT("range: start_i=%ld, start_o=%ld, count=%ld, skip=%ld",
                                 (long) start_i, (long) start_o, (long) count, (long) skip));
 
            if (cp >= start_i) {
                tmp_cp = cp - start_i;  /* always >= 0 */
                if (tmp_cp < (duk_codepoint_t) count * (duk_codepoint_t) skip &&
                    (tmp_cp % (duk_codepoint_t) skip) == 0) {
                    DUK_DDD(DUK_DDDPRINT("range matches input codepoint"));
                    cp = start_o + tmp_cp;
                    goto single;
                }
            }
        }
    }
 
    /* 1:1 conversion */
    n = (duk_small_int_t) duk_bd_decode(bd_ctx, 6);
    DUK_DDD(DUK_DDDPRINT("checking 1:1 conversions (count %ld)", (long) n));
    while (n--) {
        start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
        start_o = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
        DUK_DDD(DUK_DDDPRINT("1:1 conversion %ld -> %ld", (long) start_i, (long) start_o));
        if (cp == start_i) {
            DUK_DDD(DUK_DDDPRINT("1:1 matches input codepoint"));
            cp = start_o;
            goto single;
        }
    }
 
    /* complex, multicharacter conversion */
    n = (duk_small_int_t) duk_bd_decode(bd_ctx, 7);
    DUK_DDD(DUK_DDDPRINT("checking 1:n conversions (count %ld)", (long) n));
    while (n--) {
        start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
        t = (duk_small_int_t) duk_bd_decode(bd_ctx, 2);
        DUK_DDD(DUK_DDDPRINT("1:n conversion %ld -> %ld chars", (long) start_i, (long) t));
        if (cp == start_i) {
            DUK_DDD(DUK_DDDPRINT("1:n matches input codepoint"));
            if (bw != NULL) {
                while (t--) {
                    tmp_cp = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
                    DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) tmp_cp);
                }
            }
            return -1;
        } else {
            while (t--) {
                (void) duk_bd_decode(bd_ctx, 16);
            }
        }
    }
 
    /* default: no change */
    DUK_DDD(DUK_DDDPRINT("no rule matches, output is same as input"));
    /* fall through */
 
 single:
    if (bw != NULL) {
        DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) cp);
    }
    return cp;
}

References DUK_ASSERT, duk_bd_decode(), DUK_BW_WRITE_RAW_XUTF8, DUK_DDD, DUK_DDDPRINT, DUK_UNREF, and NULL.

Referenced by duk__case_transform_helper().

◆ duk__uni_decode_value()

DUK_LOCAL duk_uint32_t duk__uni_decode_value ( duk_bitdecoder_ctx * bd_ctx )

Definition at line 400 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                         {
    duk_uint32_t t;
 
    t = (duk_uint32_t) duk_bd_decode(bd_ctx, 4);
    if (t <= 0x0eU) {
        return t;
    }
    t = (duk_uint32_t) duk_bd_decode(bd_ctx, 8);
    if (t <= 0xfdU) {
        return t + 0x0f;
    }
    if (t == 0xfeU) {
        t = (duk_uint32_t) duk_bd_decode(bd_ctx, 12);
        return t + 0x0fU + 0xfeU;
    } else {
        t = (duk_uint32_t) duk_bd_decode(bd_ctx, 24);
        return t + 0x0fU + 0xfeU + 0x1000UL;
    }
}

References duk_bd_decode().

Referenced by duk__uni_range_match().

◆ duk__uni_range_match()

DUK_LOCAL duk_small_int_t duk__uni_range_match	(	const duk_uint8_t *	unitab,
		duk_size_t	unilen,
		duk_codepoint_t	cp )

Definition at line 420 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                                                 {
    duk_bitdecoder_ctx bd_ctx;
    duk_codepoint_t prev_re;
 
    DUK_MEMZERO(&bd_ctx, sizeof(bd_ctx));
    bd_ctx.data = (const duk_uint8_t *) unitab;
    bd_ctx.length = (duk_size_t) unilen;
 
    prev_re = 0;
    for (;;) {
        duk_codepoint_t r1, r2;
        r1 = (duk_codepoint_t) duk__uni_decode_value(&bd_ctx);
        if (r1 == 0) {
            break;
        }
        r2 = (duk_codepoint_t) duk__uni_decode_value(&bd_ctx);
 
        r1 = prev_re + r1;
        r2 = r1 + r2;
        prev_re = r2;
 
        /* [r1,r2] is the range */
 
        DUK_DDD(DUK_DDDPRINT("duk__uni_range_match: cp=%06lx range=[0x%06lx,0x%06lx]",
                             (unsigned long) cp, (unsigned long) r1, (unsigned long) r2));
        if (cp >= r1 && cp <= r2) {
            return 1;
        }
    }
 
    return 0;
}

References duk_bitdecoder_ctx::data, duk__uni_decode_value(), DUK_DDD, DUK_DDDPRINT, DUK_MEMZERO, and duk_bitdecoder_ctx::length.

Referenced by duk_unicode_is_identifier_part(), duk_unicode_is_identifier_start(), and duk_unicode_is_letter().

◆ duk_unicode_case_convert_string()

DUK_INTERNAL void duk_unicode_case_convert_string	(	duk_hthread *	thr,
		duk_small_int_t	uppercase )

Definition at line 1003 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                               {
    duk_context *ctx = (duk_context *) thr;
    duk_hstring *h_input;
    duk_bufwriter_ctx bw_alloc;
    duk_bufwriter_ctx *bw;
    const duk_uint8_t *p, *p_start, *p_end;
    duk_codepoint_t prev, curr, next;
 
    h_input = duk_require_hstring(ctx, -1);
    DUK_ASSERT(h_input != NULL);
 
    bw = &bw_alloc;
    DUK_BW_INIT_PUSHBUF(thr, bw, DUK_HSTRING_GET_BYTELEN(h_input));
 
    /* [ ... input buffer ] */
 
    p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_input);
    p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
    p = p_start;
 
    prev = -1; DUK_UNREF(prev);
    curr = -1;
    next = -1;
    for (;;) {
        prev = curr;
        curr = next;
        next = -1;
        if (p < p_end) {
            next = (int) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);
        } else {
            /* end of input and last char has been processed */
            if (curr < 0) {
                break;
            }
        }
 
        /* on first round, skip */
        if (curr >= 0) {
            /* XXX: could add a fast path to process chunks of input codepoints,
             * but relative benefit would be quite small.
             */
 
            /* Ensure space for maximum multi-character result; estimate is overkill. */
            DUK_BW_ENSURE(thr, bw, 8 * DUK_UNICODE_MAX_XUTF8_LENGTH);
 
            duk__case_transform_helper(thr,
                                       bw,
                                       (duk_codepoint_t) curr,
                                       prev,
                                       next,
                                       uppercase);
        }
    }
 
    DUK_BW_COMPACT(thr, bw);
    duk_to_string(ctx, -1);  /* invalidates h_buf pointer */
    duk_remove(ctx, -2);
}

References duk__case_transform_helper(), DUK_ASSERT, DUK_BW_COMPACT, DUK_BW_ENSURE, DUK_BW_INIT_PUSHBUF, DUK_HSTRING_GET_BYTELEN, DUK_HSTRING_GET_DATA, duk_remove(), duk_require_hstring(), duk_to_string(), duk_unicode_decode_xutf8_checked(), DUK_UNICODE_MAX_XUTF8_LENGTH, DUK_UNREF, next, and NULL.

◆ duk_unicode_decode_xutf8()

DUK_INTERNAL duk_small_int_t duk_unicode_decode_xutf8	(	duk_hthread *	thr,
		const duk_uint8_t **	ptr,
		const duk_uint8_t *	ptr_start,
		const duk_uint8_t *	ptr_end,
		duk_ucodepoint_t *	out_cp )

Definition at line 183 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                                                                                                                     {
    const duk_uint8_t *p;
    duk_uint32_t res;
    duk_uint_fast8_t ch;
    duk_small_int_t n;
 
    DUK_UNREF(thr);
 
    p = *ptr;
    if (p < ptr_start || p >= ptr_end) {
        goto fail;
    }
 
    /*
     *  UTF-8 decoder which accepts longer than standard byte sequences.
     *  This allows full 32-bit code points to be used.
     */
 
    ch = (duk_uint_fast8_t) (*p++);
    if (ch < 0x80) {
        /* 0xxx xxxx   [7 bits] */
        res = (duk_uint32_t) (ch & 0x7f);
        n = 0;
    } else if (ch < 0xc0) {
        /* 10xx xxxx -> invalid */
        goto fail;
    } else if (ch < 0xe0) {
        /* 110x xxxx   10xx xxxx   [11 bits] */
        res = (duk_uint32_t) (ch & 0x1f);
        n = 1;
    } else if (ch < 0xf0) {
        /* 1110 xxxx   10xx xxxx   10xx xxxx   [16 bits] */
        res = (duk_uint32_t) (ch & 0x0f);
        n = 2;
    } else if (ch < 0xf8) {
        /* 1111 0xxx   10xx xxxx   10xx xxxx   10xx xxxx   [21 bits] */
        res = (duk_uint32_t) (ch & 0x07);
        n = 3;
    } else if (ch < 0xfc) {
        /* 1111 10xx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [26 bits] */
        res = (duk_uint32_t) (ch & 0x03);
        n = 4;
    } else if (ch < 0xfe) {
        /* 1111 110x   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [31 bits] */
        res = (duk_uint32_t) (ch & 0x01);
        n = 5;
    } else if (ch < 0xff) {
        /* 1111 1110   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [36 bits] */
        res = (duk_uint32_t) (0);
        n = 6;
    } else {
        /* 8-byte format could be:
         * 1111 1111   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   10xx xxxx   [41 bits]
         *
         * However, this format would not have a zero bit following the
         * leading one bits and would not allow 0xFF to be used as an
         * "invalid xutf-8" marker for internal keys.  Further, 8-byte
         * encodings (up to 41 bit code points) are not currently needed.
         */
        goto fail;
    }
 
    DUK_ASSERT(p >= ptr_start);  /* verified at beginning */
    if (p + n > ptr_end) {
        /* check pointer at end */
        goto fail;
    }
 
    while (n > 0) {
        DUK_ASSERT(p >= ptr_start && p < ptr_end);
        res = res << 6;
        res += (duk_uint32_t) ((*p++) & 0x3f);
        n--;
    }
 
    *ptr = p;
    *out_cp = res;
    return 1;
 
 fail:
    return 0;
}

References DUK_ASSERT, and DUK_UNREF.

Referenced by duk_unicode_decode_xutf8_checked().

◆ duk_unicode_decode_xutf8_checked()

DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked	(	duk_hthread *	thr,
		const duk_uint8_t **	ptr,
		const duk_uint8_t *	ptr_start,
		const duk_uint8_t *	ptr_end )

Definition at line 267 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                                                                                                    {
    duk_ucodepoint_t cp;
 
    if (duk_unicode_decode_xutf8(thr, ptr, ptr_start, ptr_end, &cp)) {
        return cp;
    }
    DUK_ERROR_INTERNAL(thr, "utf-8 decode failed");  /* XXX: 'internal error' is a bit of a misnomer */
    DUK_UNREACHABLE();
    return 0;
}

References DUK_ERROR_INTERNAL, duk_unicode_decode_xutf8(), and DUK_UNREACHABLE.

Referenced by duk_unicode_case_convert_string().

◆ duk_unicode_encode_cesu8()

DUK_INTERNAL duk_small_int_t duk_unicode_encode_cesu8	(	duk_ucodepoint_t	cp,
		duk_uint8_t *	out )

Definition at line 123 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                             {
    duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
    duk_small_int_t len;
 
    if (x < 0x80UL) {
        out[0] = (duk_uint8_t) x;
        len = 1;
    } else if (x < 0x800UL) {
        out[0] = (duk_uint8_t) (0xc0 + ((x >> 6) & 0x1f));
        out[1] = (duk_uint8_t) (0x80 + (x & 0x3f));
        len = 2;
    } else if (x < 0x10000UL) {
        /* surrogate pairs get encoded here */
        out[0] = (duk_uint8_t) (0xe0 + ((x >> 12) & 0x0f));
        out[1] = (duk_uint8_t) (0x80 + ((x >> 6) & 0x3f));
        out[2] = (duk_uint8_t) (0x80 + (x & 0x3f));
        len = 3;
    } else {
        /*
         *  Unicode codepoints above U+FFFF are encoded as surrogate
         *  pairs here.  This ensures that all CESU-8 codepoints are
         *  16-bit values as expected in Ecmascript.  The surrogate
         *  pairs always get a 3-byte encoding (each) in CESU-8.
         *  See: http://en.wikipedia.org/wiki/Surrogate_pair
         *
         *  20-bit codepoint, 10 bits (A and B) per surrogate pair:
         *
         *    x = 0b00000000 0000AAAA AAAAAABB BBBBBBBB
         *  sp1 = 0b110110AA AAAAAAAA  (0xd800 + ((x >> 10) & 0x3ff))
         *  sp2 = 0b110111BB BBBBBBBB  (0xdc00 + (x & 0x3ff))
         *
         *  Encoded into CESU-8:
         *
         *  sp1 -> 0b11101101  (0xe0 + ((sp1 >> 12) & 0x0f))
         *      -> 0b1010AAAA  (0x80 + ((sp1 >> 6) & 0x3f))
         *      -> 0b10AAAAAA  (0x80 + (sp1 & 0x3f))
         *  sp2 -> 0b11101101  (0xe0 + ((sp2 >> 12) & 0x0f))
         *      -> 0b1011BBBB  (0x80 + ((sp2 >> 6) & 0x3f))
         *      -> 0b10BBBBBB  (0x80 + (sp2 & 0x3f))
         *
         *  Note that 0x10000 must be subtracted first.  The code below
         *  avoids the sp1, sp2 temporaries which saves around 20 bytes
         *  of code.
         */
 
        x -= 0x10000UL;
 
        out[0] = (duk_uint8_t) (0xed);
        out[1] = (duk_uint8_t) (0xa0 + ((x >> 16) & 0x0f));
        out[2] = (duk_uint8_t) (0x80 + ((x >> 10) & 0x3f));
        out[3] = (duk_uint8_t) (0xed);
        out[4] = (duk_uint8_t) (0xb0 + ((x >> 6) & 0x0f));
        out[5] = (duk_uint8_t) (0x80 + (x & 0x3f));
        len = 6;
    }
 
    return len;
}

◆ duk_unicode_encode_xutf8()

DUK_INTERNAL duk_small_int_t duk_unicode_encode_xutf8	(	duk_ucodepoint_t	cp,
		duk_uint8_t *	out )

Definition at line 89 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                             {
    duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
    duk_small_int_t len;
    duk_uint8_t marker;
    duk_small_int_t i;
 
    len = duk_unicode_get_xutf8_length(cp);
    DUK_ASSERT(len > 0);
 
    marker = duk_unicode_xutf8_markers[len - 1];  /* 64-bit OK because always >= 0 */
 
    i = len;
    DUK_ASSERT(i > 0);
    do {
        i--;
        if (i > 0) {
            out[i] = (duk_uint8_t) (0x80 + (x & 0x3f));
            x >>= 6;
        } else {
            /* Note: masking of 'x' is not necessary because of
             * range check and shifting -> no bits overlapping
             * the marker should be set.
             */
            out[0] = (duk_uint8_t) (marker + x);
        }
    } while (i > 0);
 
    return len;
}

References DUK_ASSERT, duk_unicode_get_xutf8_length(), and duk_unicode_xutf8_markers.

◆ duk_unicode_get_xutf8_length()

DUK_INTERNAL duk_small_int_t duk_unicode_get_xutf8_length ( duk_ucodepoint_t cp )

Definition at line 33 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                               {
    duk_uint_fast32_t x = (duk_uint_fast32_t) cp;
    if (x < 0x80UL) {
        /* 7 bits */
        return 1;
    } else if (x < 0x800UL) {
        /* 11 bits */
        return 2;
    } else if (x < 0x10000UL) {
        /* 16 bits */
        return 3;
    } else if (x < 0x200000UL) {
        /* 21 bits */
        return 4;
    } else if (x < 0x4000000UL) {
        /* 26 bits */
        return 5;
    } else if (x < (duk_ucodepoint_t) 0x80000000UL) {
        /* 31 bits */
        return 6;
    } else {
        /* 36 bits */
        return 7;
    }
}

Referenced by duk_unicode_encode_xutf8().

◆ duk_unicode_is_identifier_part()

DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_part ( duk_codepoint_t cp )

Definition at line 636 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                {
    /*
     *  E5 Section 7.6:
     *
     *    IdentifierPart:
     *      IdentifierStart
     *      UnicodeCombiningMark
     *      UnicodeDigit
     *      UnicodeConnectorPunctuation
     *      <ZWNJ>  [U+200C]
     *      <ZWJ>   [U+200D]
     *
     *  IdentifierPart production has one multi-character production
     *  as part of its IdentifierStart alternative.  The '\' character
     *  of an escape sequence is not matched here, see discussion in
     *  duk_unicode_is_identifier_start().
     *
     *  To match non-ASCII characters (codepoints >= 0x80), a very slow
     *  linear range-by-range scan is used.  The codepoint is first compared
     *  to the IdentifierStart ranges, and if it doesn't match, then to a
     *  set consisting of code points in IdentifierPart but not in
     *  IdentifierStart.  This is done to keep the unicode range data small,
     *  at the expense of speed.
     *
     *  The ASCII fast path consists of:
     *
     *    0x0030 ... 0x0039     ['0' ... '9', UnicodeDigit]
     *    0x0041 ... 0x005a     ['A' ... 'Z', IdentifierStart]
     *    0x0061 ... 0x007a     ['a' ... 'z', IdentifierStart]
     *    0x0024                ['$', IdentifierStart]
     *    0x005f                ['_', IdentifierStart and
     *                                UnicodeConnectorPunctuation]
     *
     *  UnicodeCombiningMark has no code points <= 0x7f.
     *
     *  The matching code reuses the "identifier start" tables, and then
     *  consults a separate range set for characters in "identifier part"
     *  but not in "identifier start".  These can be extracted with the
     *  "src/extract_chars.py" script.
     *
     *  UnicodeCombiningMark -> categories Mn, Mc
     *  UnicodeDigit -> categories Nd
     *  UnicodeConnectorPunctuation -> categories Pc
     */
 
    /* ASCII (and EOF) fast path -- quick accept and reject */
    if (cp <= 0x7fL) {
#if defined(DUK_USE_IDCHAR_FASTPATH)
        return (cp >= 0) && (duk_is_idchar_tab[cp] != 0);
#else
        if ((cp >= 'a' && cp <= 'z') ||
            (cp >= 'A' && cp <= 'Z') ||
            (cp >= '0' && cp <= '9') ||
            cp == '_' || cp == '$') {
            return 1;
        }
        return 0;
#endif
    }
 
    /* Non-ASCII slow path (range-by-range linear comparison), very slow */
 
#ifdef DUK_USE_SOURCE_NONBMP
    if (duk__uni_range_match(duk_unicode_ids_noa,
                             sizeof(duk_unicode_ids_noa),
                             (duk_codepoint_t) cp) ||
        duk__uni_range_match(duk_unicode_idp_m_ids_noa,
                             sizeof(duk_unicode_idp_m_ids_noa),
                             (duk_codepoint_t) cp)) {
        return 1;
    }
    return 0;
#else
    if (cp < 0x10000L) {
        if (duk__uni_range_match(duk_unicode_ids_noabmp,
                                 sizeof(duk_unicode_ids_noabmp),
                                 (duk_codepoint_t) cp) ||
            duk__uni_range_match(duk_unicode_idp_m_ids_noabmp,
                                 sizeof(duk_unicode_idp_m_ids_noabmp),
                                 (duk_codepoint_t) cp)) {
            return 1;
        }
        return 0;
    } else {
        /* without explicit non-BMP support, assume non-BMP characters
         * are always accepted as identifier characters.
         */
        return 1;
    }
#endif
}

References duk__uni_range_match(), duk_is_idchar_tab, duk_unicode_idp_m_ids_noa, duk_unicode_idp_m_ids_noabmp, duk_unicode_ids_noa, and duk_unicode_ids_noabmp.

◆ duk_unicode_is_identifier_start()

DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_start ( duk_codepoint_t cp )

Definition at line 557 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                 {
    /*
     *  E5 Section 7.6:
     *
     *    IdentifierStart:
     *      UnicodeLetter
     *      $
     *      _
     *      \ UnicodeEscapeSequence
     *
     *  IdentifierStart production has one multi-character production:
     *
     *    \ UnicodeEscapeSequence
     *
     *  The '\' character is -not- matched by this function.  Rather, the caller
     *  should decode the escape and then call this function to check whether the
     *  decoded character is acceptable (see discussion in E5 Section 7.6).
     *
     *  The "UnicodeLetter" alternative of the production allows letters
     *  from various Unicode categories.  These can be extracted with the
     *  "src/extract_chars.py" script.
     *
     *  Because the result has hundreds of Unicode codepoint ranges, matching
     *  for any values >= 0x80 are done using a very slow range-by-range scan
     *  and a packed range format.
     *
     *  The ASCII portion (codepoints 0x00 ... 0x7f) is fast-pathed below because
     *  it matters the most.  The ASCII related ranges of IdentifierStart are:
     *
     *    0x0041 ... 0x005a     ['A' ... 'Z']
     *    0x0061 ... 0x007a     ['a' ... 'z']
     *    0x0024                ['$']
     *    0x005f                ['_']
     */
 
    /* ASCII (and EOF) fast path -- quick accept and reject */
    if (cp <= 0x7fL) {
#if defined(DUK_USE_IDCHAR_FASTPATH)
        return (cp >= 0) && (duk_is_idchar_tab[cp] > 0);
#else
        if ((cp >= 'a' && cp <= 'z') ||
            (cp >= 'A' && cp <= 'Z') ||
            cp == '_' || cp == '$') {
            return 1;
        }
        return 0;
#endif
    }
 
    /* Non-ASCII slow path (range-by-range linear comparison), very slow */
 
#ifdef DUK_USE_SOURCE_NONBMP
    if (duk__uni_range_match(duk_unicode_ids_noa,
                             (duk_size_t) sizeof(duk_unicode_ids_noa),
                             (duk_codepoint_t) cp)) {
        return 1;
    }
    return 0;
#else
    if (cp < 0x10000L) {
        if (duk__uni_range_match(duk_unicode_ids_noabmp,
                                 sizeof(duk_unicode_ids_noabmp),
                                 (duk_codepoint_t) cp)) {
            return 1;
        }
        return 0;
    } else {
        /* without explicit non-BMP support, assume non-BMP characters
         * are always accepted as identifier characters.
         */
        return 1;
    }
#endif
}

References duk__uni_range_match(), duk_is_idchar_tab, duk_unicode_ids_noa, and duk_unicode_ids_noabmp.

◆ duk_unicode_is_letter()

DUK_INTERNAL duk_small_int_t duk_unicode_is_letter ( duk_codepoint_t cp )

Definition at line 732 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                       {
    /*
     *  Unicode letter is now taken to be the categories:
     *
     *    Lu, Ll, Lt, Lm, Lo
     *
     *  (Not sure if this is exactly correct.)
     *
     *  The ASCII fast path consists of:
     *
     *    0x0041 ... 0x005a     ['A' ... 'Z']
     *    0x0061 ... 0x007a     ['a' ... 'z']
     */
 
    /* ASCII (and EOF) fast path -- quick accept and reject */
    if (cp <= 0x7fL) {
        if ((cp >= 'a' && cp <= 'z') ||
            (cp >= 'A' && cp <= 'Z')) {
            return 1;
        }
        return 0;
    }
 
    /* Non-ASCII slow path (range-by-range linear comparison), very slow */
 
#ifdef DUK_USE_SOURCE_NONBMP
    if (duk__uni_range_match(duk_unicode_ids_noa,
                             sizeof(duk_unicode_ids_noa),
                             (duk_codepoint_t) cp) &&
        !duk__uni_range_match(duk_unicode_ids_m_let_noa,
                              sizeof(duk_unicode_ids_m_let_noa),
                              (duk_codepoint_t) cp)) {
        return 1;
    }
    return 0;
#else
    if (cp < 0x10000L) {
        if (duk__uni_range_match(duk_unicode_ids_noabmp,
                                 sizeof(duk_unicode_ids_noabmp),
                                 (duk_codepoint_t) cp) &&
            !duk__uni_range_match(duk_unicode_ids_m_let_noabmp,
                                  sizeof(duk_unicode_ids_m_let_noabmp),
                                  (duk_codepoint_t) cp)) {
            return 1;
        }
        return 0;
    } else {
        /* without explicit non-BMP support, assume non-BMP characters
         * are always accepted as letters.
         */
        return 1;
    }
#endif
}

References duk__uni_range_match(), duk_unicode_ids_m_let_noa, duk_unicode_ids_m_let_noabmp, duk_unicode_ids_noa, and duk_unicode_ids_noabmp.

Referenced by duk__case_transform_helper().

◆ duk_unicode_is_line_terminator()

DUK_INTERNAL duk_small_int_t duk_unicode_is_line_terminator ( duk_codepoint_t cp )

Definition at line 537 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                {
    /*
     *  E5 Section 7.3
     *
     *  A LineTerminatorSequence essentially merges <CR> <LF> sequences
     *  into a single line terminator.  This must be handled by the caller.
     */
 
    if (cp == 0x000aL || cp == 0x000dL || cp == 0x2028L ||
        cp == 0x2029L) {
        return 1;
    }
 
    return 0;
}

◆ duk_unicode_is_whitespace()

DUK_INTERNAL duk_small_int_t duk_unicode_is_whitespace ( duk_codepoint_t cp )

Definition at line 457 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                           {
    /*
     *  E5 Section 7.2 specifies six characters specifically as
     *  white space:
     *
     *    0009;<control>;Cc;0;S;;;;;N;CHARACTER TABULATION;;;;
     *    000B;<control>;Cc;0;S;;;;;N;LINE TABULATION;;;;
     *    000C;<control>;Cc;0;WS;;;;;N;FORM FEED (FF);;;;
     *    0020;SPACE;Zs;0;WS;;;;;N;;;;;
     *    00A0;NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
     *    FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;;
     *
     *  It also specifies any Unicode category 'Zs' characters as white
     *  space.  These can be extracted with the "src/extract_chars.py" script.
     *  Current result:
     *
     *    RAW OUTPUT:
     *    ===========
     *    0020;SPACE;Zs;0;WS;;;;;N;;;;;
     *    00A0;NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
     *    1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;;
     *    180E;MONGOLIAN VOWEL SEPARATOR;Zs;0;WS;;;;;N;;;;;
     *    2000;EN QUAD;Zs;0;WS;2002;;;;N;;;;;
     *    2001;EM QUAD;Zs;0;WS;2003;;;;N;;;;;
     *    2002;EN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    2003;EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    2004;THREE-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    2005;FOUR-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    2006;SIX-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    2007;FIGURE SPACE;Zs;0;WS;<noBreak> 0020;;;;N;;;;;
     *    2008;PUNCTUATION SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    2009;THIN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    200A;HAIR SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    202F;NARROW NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;;;;;
     *    205F;MEDIUM MATHEMATICAL SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
     *    3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
     *
     *    RANGES:
     *    =======
     *    0x0020
     *    0x00a0
     *    0x1680
     *    0x180e
     *    0x2000 ... 0x200a
     *    0x202f
     *    0x205f
     *    0x3000
     *
     *  A manual decoder (below) is probably most compact for this.
     */
 
    duk_uint_fast8_t lo;
    duk_uint_fast32_t hi;
 
    /* cp == -1 (EOF) never matches and causes return value 0 */
 
    lo = (duk_uint_fast8_t) (cp & 0xff);
    hi = (duk_uint_fast32_t) (cp >> 8);  /* does not fit into an uchar */
 
    if (hi == 0x0000UL) {
        if (lo == 0x09U || lo == 0x0bU || lo == 0x0cU ||
            lo == 0x20U || lo == 0xa0U) {
            return 1;
        }
    } else if (hi == 0x0020UL) {
        if (lo <= 0x0aU || lo == 0x2fU || lo == 0x5fU) {
            return 1;
        }
    } else if (cp == 0x1680L || cp == 0x180eL || cp == 0x3000L ||
               cp == 0xfeffL) {
        return 1;
    }
 
    return 0;
}

◆ duk_unicode_re_canonicalize_char()

DUK_INTERNAL duk_codepoint_t duk_unicode_re_canonicalize_char	(	duk_hthread *	thr,
		duk_codepoint_t	cp )

Definition at line 1071 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                                    {
#if defined(DUK_USE_REGEXP_CANON_WORKAROUND)
    /* Fast canonicalization lookup at the cost of 128kB footprint. */
    DUK_ASSERT(cp >= 0);
    DUK_UNREF(thr);
    if (DUK_LIKELY(cp < 0x10000L)) {
        return (duk_codepoint_t) duk_unicode_re_canon_lookup[cp];
    }
    return cp;
#else  /* DUK_USE_REGEXP_CANON_WORKAROUND */
    duk_codepoint_t y;
 
    y = duk__case_transform_helper(thr,
                                   NULL,    /* NULL is allowed, no output */
                                   cp,      /* curr char */
                                   -1,      /* prev char */
                                   -1,      /* next char */
                                   1);      /* uppercase */
 
    if ((y < 0) || (cp >= 0x80 && y < 0x80)) {
        /* multiple codepoint conversion or non-ASCII mapped to ASCII
         * --> leave as is.
         */
        return cp;
    }
 
    return y;
#endif  /* DUK_USE_REGEXP_CANON_WORKAROUND */
}

References duk__case_transform_helper(), DUK_ASSERT, DUK_LIKELY, DUK_UNREF, and NULL.

◆ duk_unicode_re_is_wordchar()

DUK_INTERNAL duk_small_int_t duk_unicode_re_is_wordchar ( duk_codepoint_t x )

Definition at line 1106 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                           {
    /*
     *  Note: the description in E5 Section 15.10.2.6 has a typo, it
     *  contains 'A' twice and lacks 'a'; the intent is [0-9a-zA-Z_].
     */
    if ((x >= '0' && x <= '9') ||
        (x >= 'a' && x <= 'z') ||
        (x >= 'A' && x <= 'Z') ||
        (x == '_')) {
        return 1;
    }
    return 0;
}

◆ duk_unicode_unvalidated_utf8_length()

DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length	(	const duk_uint8_t *	data,
		duk_size_t	blen )

Definition at line 319 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                                                      {
    const duk_uint8_t *p;
    const duk_uint8_t *p_end;
    const duk_uint32_t *p32_end;
    const duk_uint32_t *p32;
    duk_size_t ncont;
    duk_size_t clen;
 
    ncont = 0;  /* number of continuation (non-initial) bytes in [0x80,0xbf] */
    p = data;
    p_end = data + blen;
    if (blen < 16) {
        goto skip_fastpath;
    }
 
    /* Align 'p' to 4; the input data may have arbitrary alignment.
     * End of string check not needed because blen >= 16.
     */
    while (((duk_size_t) (const void *) p) & 0x03U) {
        duk_uint8_t x;
        x = *p++;
        if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
            ncont++;
        }
    }
 
    /* Full, aligned 4-byte reads. */
    p32_end = (const duk_uint32_t *) (const void *) (p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)));
    p32 = (const duk_uint32_t *) (const void *) p;
    while (p32 != (const duk_uint32_t *) p32_end) {
        duk_uint32_t x;
        x = *p32++;
        if (DUK_LIKELY((x & 0x80808080UL) == 0)) {
            ;  /* ASCII fast path */
        } else {
            /* Flip highest bit of each byte which changes
             * the bit pattern 10xxxxxx into 00xxxxxx which
             * allows an easy bit mask test.
             */
            x ^= 0x80808080UL;
            if (DUK_UNLIKELY(!(x & 0xc0000000UL))) {
                ncont++;
            }
            if (DUK_UNLIKELY(!(x & 0x00c00000UL))) {
                ncont++;
            }
            if (DUK_UNLIKELY(!(x & 0x0000c000UL))) {
                ncont++;
            }
            if (DUK_UNLIKELY(!(x & 0x000000c0UL))) {
                ncont++;
            }
        }
    }
    p = (const duk_uint8_t *) p32;
    /* Fall through to handle the rest. */
 
 skip_fastpath:
    while (p != p_end) {
        duk_uint8_t x;
        x = *p++;
        if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
            ncont++;
        }
    }
 
    DUK_ASSERT(ncont <= blen);
    clen = blen - ncont;
    DUK_ASSERT(clen <= blen);
    return clen;
}

References DUK_ASSERT, DUK_LIKELY, and DUK_UNLIKELY.

Variable Documentation

◆ duk_is_idchar_tab

DUK_INTERNAL const duk_int8_t duk_is_idchar_tab[128]

Initial value:

= {
    
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   
    0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  0,  0,  0,  0,  0,   
    0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,   
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,   
    0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,   
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0    
}

Definition at line 13 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                       {
    /* 0: not IdentifierStart or IdentifierPart
     * 1: IdentifierStart and IdentifierPart
     * -1: IdentifierPart only
     */
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   /* 0x00...0x0f */
    0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   /* 0x10...0x1f */
    0,  0,  0,  0,  1,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,   /* 0x20...0x2f */
    -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0,  0,  0,  0,  0,  0,   /* 0x30...0x3f */
    0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,   /* 0x40...0x4f */
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  1,   /* 0x50...0x5f */
    0,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,   /* 0x60...0x6f */
    1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  1,  0,  0,  0,  0,  0    /* 0x70...0x7f */
};

Referenced by duk_unicode_is_identifier_part(), and duk_unicode_is_identifier_start().

◆ duk_unicode_re_ranges_digit

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_digit[2]

Initial value:

= {
    (duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
}

Definition at line 1125 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                 {
    (duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
};

◆ duk_unicode_re_ranges_not_digit

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_digit[4]

Initial value:

= {
    (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
    (duk_uint16_t) 0x003AUL, (duk_uint16_t) 0xFFFFUL,
}

Definition at line 1147 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                     {
    (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
    (duk_uint16_t) 0x003AUL, (duk_uint16_t) 0xFFFFUL,
};

◆ duk_unicode_re_ranges_not_white

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_white[24]

Initial value:

= {
    (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x0008UL,
    (duk_uint16_t) 0x000EUL, (duk_uint16_t) 0x001FUL,
    (duk_uint16_t) 0x0021UL, (duk_uint16_t) 0x009FUL,
    (duk_uint16_t) 0x00A1UL, (duk_uint16_t) 0x167FUL,
    (duk_uint16_t) 0x1681UL, (duk_uint16_t) 0x180DUL,
    (duk_uint16_t) 0x180FUL, (duk_uint16_t) 0x1FFFUL,
    (duk_uint16_t) 0x200BUL, (duk_uint16_t) 0x2027UL,
    (duk_uint16_t) 0x202AUL, (duk_uint16_t) 0x202EUL,
    (duk_uint16_t) 0x2030UL, (duk_uint16_t) 0x205EUL,
    (duk_uint16_t) 0x2060UL, (duk_uint16_t) 0x2FFFUL,
    (duk_uint16_t) 0x3001UL, (duk_uint16_t) 0xFEFEUL,
    (duk_uint16_t) 0xFF00UL, (duk_uint16_t) 0xFFFFUL,
}

Definition at line 1151 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                      {
    (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x0008UL,
    (duk_uint16_t) 0x000EUL, (duk_uint16_t) 0x001FUL,
    (duk_uint16_t) 0x0021UL, (duk_uint16_t) 0x009FUL,
    (duk_uint16_t) 0x00A1UL, (duk_uint16_t) 0x167FUL,
    (duk_uint16_t) 0x1681UL, (duk_uint16_t) 0x180DUL,
    (duk_uint16_t) 0x180FUL, (duk_uint16_t) 0x1FFFUL,
    (duk_uint16_t) 0x200BUL, (duk_uint16_t) 0x2027UL,
    (duk_uint16_t) 0x202AUL, (duk_uint16_t) 0x202EUL,
    (duk_uint16_t) 0x2030UL, (duk_uint16_t) 0x205EUL,
    (duk_uint16_t) 0x2060UL, (duk_uint16_t) 0x2FFFUL,
    (duk_uint16_t) 0x3001UL, (duk_uint16_t) 0xFEFEUL,
    (duk_uint16_t) 0xFF00UL, (duk_uint16_t) 0xFFFFUL,
};

◆ duk_unicode_re_ranges_not_wordchar

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_wordchar[10]

Initial value:

= {
    (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
    (duk_uint16_t) 0x003AUL, (duk_uint16_t) 0x0040UL,
    (duk_uint16_t) 0x005BUL, (duk_uint16_t) 0x005EUL,
    (duk_uint16_t) 0x0060UL, (duk_uint16_t) 0x0060UL,
    (duk_uint16_t) 0x007BUL, (duk_uint16_t) 0xFFFFUL,
}

Definition at line 1165 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                         {
    (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
    (duk_uint16_t) 0x003AUL, (duk_uint16_t) 0x0040UL,
    (duk_uint16_t) 0x005BUL, (duk_uint16_t) 0x005EUL,
    (duk_uint16_t) 0x0060UL, (duk_uint16_t) 0x0060UL,
    (duk_uint16_t) 0x007BUL, (duk_uint16_t) 0xFFFFUL,
};

◆ duk_unicode_re_ranges_white

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_white[22]

Initial value:

= {
    (duk_uint16_t) 0x0009UL, (duk_uint16_t) 0x000DUL,
    (duk_uint16_t) 0x0020UL, (duk_uint16_t) 0x0020UL,
    (duk_uint16_t) 0x00A0UL, (duk_uint16_t) 0x00A0UL,
    (duk_uint16_t) 0x1680UL, (duk_uint16_t) 0x1680UL,
    (duk_uint16_t) 0x180EUL, (duk_uint16_t) 0x180EUL,
    (duk_uint16_t) 0x2000UL, (duk_uint16_t) 0x200AUL,
    (duk_uint16_t) 0x2028UL, (duk_uint16_t) 0x2029UL,
    (duk_uint16_t) 0x202FUL, (duk_uint16_t) 0x202FUL,
    (duk_uint16_t) 0x205FUL, (duk_uint16_t) 0x205FUL,
    (duk_uint16_t) 0x3000UL, (duk_uint16_t) 0x3000UL,
    (duk_uint16_t) 0xFEFFUL, (duk_uint16_t) 0xFEFFUL,
}

Definition at line 1128 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                  {
    (duk_uint16_t) 0x0009UL, (duk_uint16_t) 0x000DUL,
    (duk_uint16_t) 0x0020UL, (duk_uint16_t) 0x0020UL,
    (duk_uint16_t) 0x00A0UL, (duk_uint16_t) 0x00A0UL,
    (duk_uint16_t) 0x1680UL, (duk_uint16_t) 0x1680UL,
    (duk_uint16_t) 0x180EUL, (duk_uint16_t) 0x180EUL,
    (duk_uint16_t) 0x2000UL, (duk_uint16_t) 0x200AUL,
    (duk_uint16_t) 0x2028UL, (duk_uint16_t) 0x2029UL,
    (duk_uint16_t) 0x202FUL, (duk_uint16_t) 0x202FUL,
    (duk_uint16_t) 0x205FUL, (duk_uint16_t) 0x205FUL,
    (duk_uint16_t) 0x3000UL, (duk_uint16_t) 0x3000UL,
    (duk_uint16_t) 0xFEFFUL, (duk_uint16_t) 0xFEFFUL,
};

◆ duk_unicode_re_ranges_wordchar

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_wordchar[8]

Initial value:

= {
    (duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
    (duk_uint16_t) 0x0041UL, (duk_uint16_t) 0x005AUL,
    (duk_uint16_t) 0x005FUL, (duk_uint16_t) 0x005FUL,
    (duk_uint16_t) 0x0061UL, (duk_uint16_t) 0x007AUL,
}

Definition at line 1141 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                                    {
    (duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
    (duk_uint16_t) 0x0041UL, (duk_uint16_t) 0x005AUL,
    (duk_uint16_t) 0x005FUL, (duk_uint16_t) 0x005FUL,
    (duk_uint16_t) 0x0061UL, (duk_uint16_t) 0x007AUL,
};

◆ duk_unicode_xutf8_markers

DUK_INTERNAL const duk_uint8_t duk_unicode_xutf8_markers[7]

Initial value:

= {
    0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe
}

Definition at line 81 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

                                                              {
    0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe
};

Referenced by duk_unicode_encode_xutf8().

Functions

Variables

Function Documentation

◆ duk__case_transform_helper()

◆ duk__slow_case_conversion()

◆ duk__uni_decode_value()

◆ duk__uni_range_match()

◆ duk_unicode_case_convert_string()

◆ duk_unicode_decode_xutf8()

◆ duk_unicode_decode_xutf8_checked()

◆ duk_unicode_encode_cesu8()

◆ duk_unicode_encode_xutf8()

◆ duk_unicode_get_xutf8_length()

◆ duk_unicode_is_identifier_part()

◆ duk_unicode_is_identifier_start()

◆ duk_unicode_is_letter()

◆ duk_unicode_is_line_terminator()

◆ duk_unicode_is_whitespace()

◆ duk_unicode_re_canonicalize_char()

◆ duk_unicode_re_is_wordchar()

◆ duk_unicode_unvalidated_utf8_length()

Variable Documentation

◆ duk_is_idchar_tab

◆ duk_unicode_re_ranges_digit

◆ duk_unicode_re_ranges_not_digit

◆ duk_unicode_re_ranges_not_white

◆ duk_unicode_re_ranges_not_wordchar

◆ duk_unicode_re_ranges_white

◆ duk_unicode_re_ranges_wordchar

◆ duk_unicode_xutf8_markers