Github User Fetcher 1.0.0
C Application with Server and GUI
Loading...
Searching...
No Matches
duk_unicode_support.c File Reference
#include "duk_internal.h"

Go to the source code of this file.

Functions

DUK_INTERNAL duk_small_int_t duk_unicode_get_xutf8_length (duk_ucodepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_encode_xutf8 (duk_ucodepoint_t cp, duk_uint8_t *out)
 
DUK_INTERNAL duk_small_int_t duk_unicode_encode_cesu8 (duk_ucodepoint_t cp, duk_uint8_t *out)
 
DUK_INTERNAL duk_small_int_t duk_unicode_decode_xutf8 (duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end, duk_ucodepoint_t *out_cp)
 
DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked (duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end)
 
DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length (const duk_uint8_t *data, duk_size_t blen)
 
DUK_LOCAL duk_uint32_t duk__uni_decode_value (duk_bitdecoder_ctx *bd_ctx)
 
DUK_LOCAL duk_small_int_t duk__uni_range_match (const duk_uint8_t *unitab, duk_size_t unilen, duk_codepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_is_whitespace (duk_codepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_is_line_terminator (duk_codepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_start (duk_codepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_part (duk_codepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_is_letter (duk_codepoint_t cp)
 
DUK_LOCAL duk_codepoint_t duk__slow_case_conversion (duk_hthread *thr, duk_bufwriter_ctx *bw, duk_codepoint_t cp, duk_bitdecoder_ctx *bd_ctx)
 
DUK_LOCAL duk_codepoint_t duk__case_transform_helper (duk_hthread *thr, duk_bufwriter_ctx *bw, duk_codepoint_t cp, duk_codepoint_t prev, duk_codepoint_t next, duk_bool_t uppercase)
 
DUK_INTERNAL void duk_unicode_case_convert_string (duk_hthread *thr, duk_small_int_t uppercase)
 
DUK_INTERNAL duk_codepoint_t duk_unicode_re_canonicalize_char (duk_hthread *thr, duk_codepoint_t cp)
 
DUK_INTERNAL duk_small_int_t duk_unicode_re_is_wordchar (duk_codepoint_t x)
 

Variables

DUK_INTERNAL const duk_int8_t duk_is_idchar_tab [128]
 
DUK_INTERNAL const duk_uint8_t duk_unicode_xutf8_markers [7]
 
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_digit [2]
 
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_white [22]
 
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_wordchar [8]
 
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_digit [4]
 
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_white [24]
 
DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_wordchar [10]
 

Function Documentation

◆ duk__case_transform_helper()

DUK_LOCAL duk_codepoint_t duk__case_transform_helper ( duk_hthread * thr,
duk_bufwriter_ctx * bw,
duk_codepoint_t cp,
duk_codepoint_t prev,
duk_codepoint_t next,
duk_bool_t uppercase )

Definition at line 914 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

919 {
920 duk_bitdecoder_ctx bd_ctx;
921
922 /* fast path for ASCII */
923 if (cp < 0x80L) {
924 /* XXX: there are language sensitive rules for the ASCII range.
925 * If/when language/locale support is implemented, they need to
926 * be implemented here for the fast path. There are no context
927 * sensitive rules for ASCII range.
928 */
929
930 if (uppercase) {
931 if (cp >= 'a' && cp <= 'z') {
932 cp = cp - 'a' + 'A';
933 }
934 } else {
935 if (cp >= 'A' && cp <= 'Z') {
936 cp = cp - 'A' + 'a';
937 }
938 }
939
940 if (bw != NULL) {
941 DUK_BW_WRITE_RAW_U8(thr, bw, (duk_uint8_t) cp);
942 }
943 return cp;
944 }
945
946 /* context and locale specific rules which cannot currently be represented
947 * in the caseconv bitstream: hardcoded rules in C
948 */
949 if (uppercase) {
950 /* XXX: turkish / azeri */
951 } else {
952 /*
953 * Final sigma context specific rule. This is a rather tricky
954 * rule and this handling is probably not 100% correct now.
955 * The rule is not locale/language specific so it is supported.
956 */
957
958 if (cp == 0x03a3L && /* U+03A3 = GREEK CAPITAL LETTER SIGMA */
959 duk_unicode_is_letter(prev) && /* prev exists and is not a letter */
960 !duk_unicode_is_letter(next)) { /* next does not exist or next is not a letter */
961 /* Capital sigma occurred at "end of word", lowercase to
962 * U+03C2 = GREEK SMALL LETTER FINAL SIGMA. Otherwise
963 * fall through and let the normal rules lowercase it to
964 * U+03C3 = GREEK SMALL LETTER SIGMA.
965 */
966 cp = 0x03c2L;
967 goto singlechar;
968 }
969
970 /* XXX: lithuanian not implemented */
971 /* XXX: lithuanian, explicit dot rules */
972 /* XXX: turkish / azeri, lowercase rules */
973 }
974
975 /* 1:1 or special conversions, but not locale/context specific: script generated rules */
976 DUK_MEMZERO(&bd_ctx, sizeof(bd_ctx));
977 if (uppercase) {
978 bd_ctx.data = (const duk_uint8_t *) duk_unicode_caseconv_uc;
979 bd_ctx.length = (duk_size_t) sizeof(duk_unicode_caseconv_uc);
980 } else {
981 bd_ctx.data = (const duk_uint8_t *) duk_unicode_caseconv_lc;
982 bd_ctx.length = (duk_size_t) sizeof(duk_unicode_caseconv_lc);
983 }
984 return duk__slow_case_conversion(thr, bw, cp, &bd_ctx);
985
986 singlechar:
987 if (bw != NULL) {
989 }
990 return cp;
991
992 /* unused now, not needed until Turkish/Azeri */
993#if 0
994 nochar:
995 return -1;
996#endif
997}
#define DUK_MEMZERO(p, n)
const duk_uint8_t duk_unicode_caseconv_uc[1288]
#define DUK_BW_WRITE_RAW_U8(thr, bw_ctx, val)
const duk_uint8_t duk_unicode_caseconv_lc[616]
#define DUK_BW_WRITE_RAW_XUTF8(thr, bw_ctx, cp)
DUK_LOCAL duk_codepoint_t duk__slow_case_conversion(duk_hthread *thr, duk_bufwriter_ctx *bw, duk_codepoint_t cp, duk_bitdecoder_ctx *bd_ctx)
DUK_INTERNAL duk_small_int_t duk_unicode_is_letter(duk_codepoint_t cp)
#define NULL
Definition gmacros.h:924
#define next(ls)

References duk_bitdecoder_ctx::data, duk__slow_case_conversion(), DUK_BW_WRITE_RAW_U8, DUK_BW_WRITE_RAW_XUTF8, DUK_MEMZERO, duk_unicode_caseconv_lc, duk_unicode_caseconv_uc, duk_unicode_is_letter(), duk_bitdecoder_ctx::length, next, and NULL.

Referenced by duk_unicode_case_convert_string(), and duk_unicode_re_canonicalize_char().

◆ duk__slow_case_conversion()

DUK_LOCAL duk_codepoint_t duk__slow_case_conversion ( duk_hthread * thr,
duk_bufwriter_ctx * bw,
duk_codepoint_t cp,
duk_bitdecoder_ctx * bd_ctx )

Definition at line 808 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

811 {
812 duk_small_int_t skip = 0;
815 duk_small_int_t count;
816 duk_codepoint_t tmp_cp;
817 duk_codepoint_t start_i;
818 duk_codepoint_t start_o;
819
820 DUK_UNREF(thr);
821 DUK_ASSERT(bd_ctx != NULL);
822
823 DUK_DDD(DUK_DDDPRINT("slow case conversion for codepoint: %ld", (long) cp));
824
825 /* range conversion with a "skip" */
826 DUK_DDD(DUK_DDDPRINT("checking ranges"));
827 for (;;) {
828 skip++;
829 n = (duk_small_int_t) duk_bd_decode(bd_ctx, 6);
830 if (n == 0x3f) {
831 /* end marker */
832 break;
833 }
834 DUK_DDD(DUK_DDDPRINT("skip=%ld, n=%ld", (long) skip, (long) n));
835
836 while (n--) {
837 start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
838 start_o = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
839 count = (duk_small_int_t) duk_bd_decode(bd_ctx, 7);
840 DUK_DDD(DUK_DDDPRINT("range: start_i=%ld, start_o=%ld, count=%ld, skip=%ld",
841 (long) start_i, (long) start_o, (long) count, (long) skip));
842
843 if (cp >= start_i) {
844 tmp_cp = cp - start_i; /* always >= 0 */
845 if (tmp_cp < (duk_codepoint_t) count * (duk_codepoint_t) skip &&
846 (tmp_cp % (duk_codepoint_t) skip) == 0) {
847 DUK_DDD(DUK_DDDPRINT("range matches input codepoint"));
848 cp = start_o + tmp_cp;
849 goto single;
850 }
851 }
852 }
853 }
854
855 /* 1:1 conversion */
856 n = (duk_small_int_t) duk_bd_decode(bd_ctx, 6);
857 DUK_DDD(DUK_DDDPRINT("checking 1:1 conversions (count %ld)", (long) n));
858 while (n--) {
859 start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
860 start_o = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
861 DUK_DDD(DUK_DDDPRINT("1:1 conversion %ld -> %ld", (long) start_i, (long) start_o));
862 if (cp == start_i) {
863 DUK_DDD(DUK_DDDPRINT("1:1 matches input codepoint"));
864 cp = start_o;
865 goto single;
866 }
867 }
868
869 /* complex, multicharacter conversion */
870 n = (duk_small_int_t) duk_bd_decode(bd_ctx, 7);
871 DUK_DDD(DUK_DDDPRINT("checking 1:n conversions (count %ld)", (long) n));
872 while (n--) {
873 start_i = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
874 t = (duk_small_int_t) duk_bd_decode(bd_ctx, 2);
875 DUK_DDD(DUK_DDDPRINT("1:n conversion %ld -> %ld chars", (long) start_i, (long) t));
876 if (cp == start_i) {
877 DUK_DDD(DUK_DDDPRINT("1:n matches input codepoint"));
878 if (bw != NULL) {
879 while (t--) {
880 tmp_cp = (duk_codepoint_t) duk_bd_decode(bd_ctx, 16);
881 DUK_BW_WRITE_RAW_XUTF8(thr, bw, (duk_ucodepoint_t) tmp_cp);
882 }
883 }
884 return -1;
885 } else {
886 while (t--) {
887 (void) duk_bd_decode(bd_ctx, 16);
888 }
889 }
890 }
891
892 /* default: no change */
893 DUK_DDD(DUK_DDDPRINT("no rule matches, output is same as input"));
894 /* fall through */
895
896 single:
897 if (bw != NULL) {
899 }
900 return cp;
901}
DUK_INTERNAL_DECL duk_int32_t duk_bd_decode(duk_bitdecoder_ctx *ctx, duk_small_int_t bits)

References DUK_ASSERT, duk_bd_decode(), DUK_BW_WRITE_RAW_XUTF8, DUK_DDD, DUK_DDDPRINT, DUK_UNREF, and NULL.

Referenced by duk__case_transform_helper().

◆ duk__uni_decode_value()

DUK_LOCAL duk_uint32_t duk__uni_decode_value ( duk_bitdecoder_ctx * bd_ctx)

Definition at line 400 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

400 {
401 duk_uint32_t t;
402
403 t = (duk_uint32_t) duk_bd_decode(bd_ctx, 4);
404 if (t <= 0x0eU) {
405 return t;
406 }
407 t = (duk_uint32_t) duk_bd_decode(bd_ctx, 8);
408 if (t <= 0xfdU) {
409 return t + 0x0f;
410 }
411 if (t == 0xfeU) {
412 t = (duk_uint32_t) duk_bd_decode(bd_ctx, 12);
413 return t + 0x0fU + 0xfeU;
414 } else {
415 t = (duk_uint32_t) duk_bd_decode(bd_ctx, 24);
416 return t + 0x0fU + 0xfeU + 0x1000UL;
417 }
418}

References duk_bd_decode().

Referenced by duk__uni_range_match().

◆ duk__uni_range_match()

DUK_LOCAL duk_small_int_t duk__uni_range_match ( const duk_uint8_t * unitab,
duk_size_t unilen,
duk_codepoint_t cp )

Definition at line 420 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

420 {
421 duk_bitdecoder_ctx bd_ctx;
422 duk_codepoint_t prev_re;
423
424 DUK_MEMZERO(&bd_ctx, sizeof(bd_ctx));
425 bd_ctx.data = (const duk_uint8_t *) unitab;
426 bd_ctx.length = (duk_size_t) unilen;
427
428 prev_re = 0;
429 for (;;) {
430 duk_codepoint_t r1, r2;
432 if (r1 == 0) {
433 break;
434 }
436
437 r1 = prev_re + r1;
438 r2 = r1 + r2;
439 prev_re = r2;
440
441 /* [r1,r2] is the range */
442
443 DUK_DDD(DUK_DDDPRINT("duk__uni_range_match: cp=%06lx range=[0x%06lx,0x%06lx]",
444 (unsigned long) cp, (unsigned long) r1, (unsigned long) r2));
445 if (cp >= r1 && cp <= r2) {
446 return 1;
447 }
448 }
449
450 return 0;
451}
DUK_LOCAL duk_uint32_t duk__uni_decode_value(duk_bitdecoder_ctx *bd_ctx)

References duk_bitdecoder_ctx::data, duk__uni_decode_value(), DUK_DDD, DUK_DDDPRINT, DUK_MEMZERO, and duk_bitdecoder_ctx::length.

Referenced by duk_unicode_is_identifier_part(), duk_unicode_is_identifier_start(), and duk_unicode_is_letter().

◆ duk_unicode_case_convert_string()

DUK_INTERNAL void duk_unicode_case_convert_string ( duk_hthread * thr,
duk_small_int_t uppercase )

Definition at line 1003 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1003 {
1004 duk_context *ctx = (duk_context *) thr;
1005 duk_hstring *h_input;
1006 duk_bufwriter_ctx bw_alloc;
1008 const duk_uint8_t *p, *p_start, *p_end;
1009 duk_codepoint_t prev, curr, next;
1010
1011 h_input = duk_require_hstring(ctx, -1);
1012 DUK_ASSERT(h_input != NULL);
1013
1014 bw = &bw_alloc;
1016
1017 /* [ ... input buffer ] */
1018
1019 p_start = (const duk_uint8_t *) DUK_HSTRING_GET_DATA(h_input);
1020 p_end = p_start + DUK_HSTRING_GET_BYTELEN(h_input);
1021 p = p_start;
1022
1023 prev = -1; DUK_UNREF(prev);
1024 curr = -1;
1025 next = -1;
1026 for (;;) {
1027 prev = curr;
1028 curr = next;
1029 next = -1;
1030 if (p < p_end) {
1031 next = (int) duk_unicode_decode_xutf8_checked(thr, &p, p_start, p_end);
1032 } else {
1033 /* end of input and last char has been processed */
1034 if (curr < 0) {
1035 break;
1036 }
1037 }
1038
1039 /* on first round, skip */
1040 if (curr >= 0) {
1041 /* XXX: could add a fast path to process chunks of input codepoints,
1042 * but relative benefit would be quite small.
1043 */
1044
1045 /* Ensure space for maximum multi-character result; estimate is overkill. */
1047
1049 bw,
1050 (duk_codepoint_t) curr,
1051 prev,
1052 next,
1053 uppercase);
1054 }
1055 }
1056
1057 DUK_BW_COMPACT(thr, bw);
1058 duk_to_string(ctx, -1); /* invalidates h_buf pointer */
1059 duk_remove(ctx, -2);
1060}
#define DUK_HSTRING_GET_DATA(x)
#define DUK_BW_COMPACT(thr, bw_ctx)
#define DUK_BW_ENSURE(thr, bw_ctx, sz)
DUK_EXTERNAL void duk_remove(duk_context *ctx, duk_idx_t index)
#define DUK_HSTRING_GET_BYTELEN(x)
#define DUK_BW_INIT_PUSHBUF(thr, bw_ctx, sz)
DUK_EXTERNAL const char * duk_to_string(duk_context *ctx, duk_idx_t index)
DUK_INTERNAL_DECL duk_hstring * duk_require_hstring(duk_context *ctx, duk_idx_t index)
#define DUK_UNICODE_MAX_XUTF8_LENGTH
DUK_LOCAL duk_codepoint_t duk__case_transform_helper(duk_hthread *thr, duk_bufwriter_ctx *bw, duk_codepoint_t cp, duk_codepoint_t prev, duk_codepoint_t next, duk_bool_t uppercase)
DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked(duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end)

References duk__case_transform_helper(), DUK_ASSERT, DUK_BW_COMPACT, DUK_BW_ENSURE, DUK_BW_INIT_PUSHBUF, DUK_HSTRING_GET_BYTELEN, DUK_HSTRING_GET_DATA, duk_remove(), duk_require_hstring(), duk_to_string(), duk_unicode_decode_xutf8_checked(), DUK_UNICODE_MAX_XUTF8_LENGTH, DUK_UNREF, next, and NULL.

◆ duk_unicode_decode_xutf8()

DUK_INTERNAL duk_small_int_t duk_unicode_decode_xutf8 ( duk_hthread * thr,
const duk_uint8_t ** ptr,
const duk_uint8_t * ptr_start,
const duk_uint8_t * ptr_end,
duk_ucodepoint_t * out_cp )

Definition at line 183 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

183 {
184 const duk_uint8_t *p;
185 duk_uint32_t res;
188
189 DUK_UNREF(thr);
190
191 p = *ptr;
192 if (p < ptr_start || p >= ptr_end) {
193 goto fail;
194 }
195
196 /*
197 * UTF-8 decoder which accepts longer than standard byte sequences.
198 * This allows full 32-bit code points to be used.
199 */
200
201 ch = (duk_uint_fast8_t) (*p++);
202 if (ch < 0x80) {
203 /* 0xxx xxxx [7 bits] */
204 res = (duk_uint32_t) (ch & 0x7f);
205 n = 0;
206 } else if (ch < 0xc0) {
207 /* 10xx xxxx -> invalid */
208 goto fail;
209 } else if (ch < 0xe0) {
210 /* 110x xxxx 10xx xxxx [11 bits] */
211 res = (duk_uint32_t) (ch & 0x1f);
212 n = 1;
213 } else if (ch < 0xf0) {
214 /* 1110 xxxx 10xx xxxx 10xx xxxx [16 bits] */
215 res = (duk_uint32_t) (ch & 0x0f);
216 n = 2;
217 } else if (ch < 0xf8) {
218 /* 1111 0xxx 10xx xxxx 10xx xxxx 10xx xxxx [21 bits] */
219 res = (duk_uint32_t) (ch & 0x07);
220 n = 3;
221 } else if (ch < 0xfc) {
222 /* 1111 10xx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx [26 bits] */
223 res = (duk_uint32_t) (ch & 0x03);
224 n = 4;
225 } else if (ch < 0xfe) {
226 /* 1111 110x 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx [31 bits] */
227 res = (duk_uint32_t) (ch & 0x01);
228 n = 5;
229 } else if (ch < 0xff) {
230 /* 1111 1110 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx [36 bits] */
231 res = (duk_uint32_t) (0);
232 n = 6;
233 } else {
234 /* 8-byte format could be:
235 * 1111 1111 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx 10xx xxxx [41 bits]
236 *
237 * However, this format would not have a zero bit following the
238 * leading one bits and would not allow 0xFF to be used as an
239 * "invalid xutf-8" marker for internal keys. Further, 8-byte
240 * encodings (up to 41 bit code points) are not currently needed.
241 */
242 goto fail;
243 }
244
245 DUK_ASSERT(p >= ptr_start); /* verified at beginning */
246 if (p + n > ptr_end) {
247 /* check pointer at end */
248 goto fail;
249 }
250
251 while (n > 0) {
252 DUK_ASSERT(p >= ptr_start && p < ptr_end);
253 res = res << 6;
254 res += (duk_uint32_t) ((*p++) & 0x3f);
255 n--;
256 }
257
258 *ptr = p;
259 *out_cp = res;
260 return 1;
261
262 fail:
263 return 0;
264}
duk_uint8_t duk_uint_fast8_t

References DUK_ASSERT, and DUK_UNREF.

Referenced by duk_unicode_decode_xutf8_checked().

◆ duk_unicode_decode_xutf8_checked()

DUK_INTERNAL duk_ucodepoint_t duk_unicode_decode_xutf8_checked ( duk_hthread * thr,
const duk_uint8_t ** ptr,
const duk_uint8_t * ptr_start,
const duk_uint8_t * ptr_end )

Definition at line 267 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

267 {
269
270 if (duk_unicode_decode_xutf8(thr, ptr, ptr_start, ptr_end, &cp)) {
271 return cp;
272 }
273 DUK_ERROR_INTERNAL(thr, "utf-8 decode failed"); /* XXX: 'internal error' is a bit of a misnomer */
275 return 0;
276}
#define DUK_ERROR_INTERNAL(thr, msg)
DUK_INTERNAL duk_small_int_t duk_unicode_decode_xutf8(duk_hthread *thr, const duk_uint8_t **ptr, const duk_uint8_t *ptr_start, const duk_uint8_t *ptr_end, duk_ucodepoint_t *out_cp)

References DUK_ERROR_INTERNAL, duk_unicode_decode_xutf8(), and DUK_UNREACHABLE.

Referenced by duk_unicode_case_convert_string().

◆ duk_unicode_encode_cesu8()

DUK_INTERNAL duk_small_int_t duk_unicode_encode_cesu8 ( duk_ucodepoint_t cp,
duk_uint8_t * out )

Definition at line 123 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

123 {
125 duk_small_int_t len;
126
127 if (x < 0x80UL) {
128 out[0] = (duk_uint8_t) x;
129 len = 1;
130 } else if (x < 0x800UL) {
131 out[0] = (duk_uint8_t) (0xc0 + ((x >> 6) & 0x1f));
132 out[1] = (duk_uint8_t) (0x80 + (x & 0x3f));
133 len = 2;
134 } else if (x < 0x10000UL) {
135 /* surrogate pairs get encoded here */
136 out[0] = (duk_uint8_t) (0xe0 + ((x >> 12) & 0x0f));
137 out[1] = (duk_uint8_t) (0x80 + ((x >> 6) & 0x3f));
138 out[2] = (duk_uint8_t) (0x80 + (x & 0x3f));
139 len = 3;
140 } else {
141 /*
142 * Unicode codepoints above U+FFFF are encoded as surrogate
143 * pairs here. This ensures that all CESU-8 codepoints are
144 * 16-bit values as expected in Ecmascript. The surrogate
145 * pairs always get a 3-byte encoding (each) in CESU-8.
146 * See: http://en.wikipedia.org/wiki/Surrogate_pair
147 *
148 * 20-bit codepoint, 10 bits (A and B) per surrogate pair:
149 *
150 * x = 0b00000000 0000AAAA AAAAAABB BBBBBBBB
151 * sp1 = 0b110110AA AAAAAAAA (0xd800 + ((x >> 10) & 0x3ff))
152 * sp2 = 0b110111BB BBBBBBBB (0xdc00 + (x & 0x3ff))
153 *
154 * Encoded into CESU-8:
155 *
156 * sp1 -> 0b11101101 (0xe0 + ((sp1 >> 12) & 0x0f))
157 * -> 0b1010AAAA (0x80 + ((sp1 >> 6) & 0x3f))
158 * -> 0b10AAAAAA (0x80 + (sp1 & 0x3f))
159 * sp2 -> 0b11101101 (0xe0 + ((sp2 >> 12) & 0x0f))
160 * -> 0b1011BBBB (0x80 + ((sp2 >> 6) & 0x3f))
161 * -> 0b10BBBBBB (0x80 + (sp2 & 0x3f))
162 *
163 * Note that 0x10000 must be subtracted first. The code below
164 * avoids the sp1, sp2 temporaries which saves around 20 bytes
165 * of code.
166 */
167
168 x -= 0x10000UL;
169
170 out[0] = (duk_uint8_t) (0xed);
171 out[1] = (duk_uint8_t) (0xa0 + ((x >> 16) & 0x0f));
172 out[2] = (duk_uint8_t) (0x80 + ((x >> 10) & 0x3f));
173 out[3] = (duk_uint8_t) (0xed);
174 out[4] = (duk_uint8_t) (0xb0 + ((x >> 6) & 0x0f));
175 out[5] = (duk_uint8_t) (0x80 + (x & 0x3f));
176 len = 6;
177 }
178
179 return len;
180}
duk_uint32_t duk_uint_fast32_t

◆ duk_unicode_encode_xutf8()

DUK_INTERNAL duk_small_int_t duk_unicode_encode_xutf8 ( duk_ucodepoint_t cp,
duk_uint8_t * out )

Definition at line 89 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

89 {
92 duk_uint8_t marker;
94
96 DUK_ASSERT(len > 0);
97
98 marker = duk_unicode_xutf8_markers[len - 1]; /* 64-bit OK because always >= 0 */
99
100 i = len;
101 DUK_ASSERT(i > 0);
102 do {
103 i--;
104 if (i > 0) {
105 out[i] = (duk_uint8_t) (0x80 + (x & 0x3f));
106 x >>= 6;
107 } else {
108 /* Note: masking of 'x' is not necessary because of
109 * range check and shifting -> no bits overlapping
110 * the marker should be set.
111 */
112 out[0] = (duk_uint8_t) (marker + x);
113 }
114 } while (i > 0);
115
116 return len;
117}
DUK_INTERNAL duk_small_int_t duk_unicode_get_xutf8_length(duk_ucodepoint_t cp)
DUK_INTERNAL const duk_uint8_t duk_unicode_xutf8_markers[7]

References DUK_ASSERT, duk_unicode_get_xutf8_length(), and duk_unicode_xutf8_markers.

◆ duk_unicode_get_xutf8_length()

DUK_INTERNAL duk_small_int_t duk_unicode_get_xutf8_length ( duk_ucodepoint_t cp)

Definition at line 33 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

33 {
35 if (x < 0x80UL) {
36 /* 7 bits */
37 return 1;
38 } else if (x < 0x800UL) {
39 /* 11 bits */
40 return 2;
41 } else if (x < 0x10000UL) {
42 /* 16 bits */
43 return 3;
44 } else if (x < 0x200000UL) {
45 /* 21 bits */
46 return 4;
47 } else if (x < 0x4000000UL) {
48 /* 26 bits */
49 return 5;
50 } else if (x < (duk_ucodepoint_t) 0x80000000UL) {
51 /* 31 bits */
52 return 6;
53 } else {
54 /* 36 bits */
55 return 7;
56 }
57}

Referenced by duk_unicode_encode_xutf8().

◆ duk_unicode_is_identifier_part()

DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_part ( duk_codepoint_t cp)

Definition at line 636 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

636 {
637 /*
638 * E5 Section 7.6:
639 *
640 * IdentifierPart:
641 * IdentifierStart
642 * UnicodeCombiningMark
643 * UnicodeDigit
644 * UnicodeConnectorPunctuation
645 * <ZWNJ> [U+200C]
646 * <ZWJ> [U+200D]
647 *
648 * IdentifierPart production has one multi-character production
649 * as part of its IdentifierStart alternative. The '\' character
650 * of an escape sequence is not matched here, see discussion in
651 * duk_unicode_is_identifier_start().
652 *
653 * To match non-ASCII characters (codepoints >= 0x80), a very slow
654 * linear range-by-range scan is used. The codepoint is first compared
655 * to the IdentifierStart ranges, and if it doesn't match, then to a
656 * set consisting of code points in IdentifierPart but not in
657 * IdentifierStart. This is done to keep the unicode range data small,
658 * at the expense of speed.
659 *
660 * The ASCII fast path consists of:
661 *
662 * 0x0030 ... 0x0039 ['0' ... '9', UnicodeDigit]
663 * 0x0041 ... 0x005a ['A' ... 'Z', IdentifierStart]
664 * 0x0061 ... 0x007a ['a' ... 'z', IdentifierStart]
665 * 0x0024 ['$', IdentifierStart]
666 * 0x005f ['_', IdentifierStart and
667 * UnicodeConnectorPunctuation]
668 *
669 * UnicodeCombiningMark has no code points <= 0x7f.
670 *
671 * The matching code reuses the "identifier start" tables, and then
672 * consults a separate range set for characters in "identifier part"
673 * but not in "identifier start". These can be extracted with the
674 * "src/extract_chars.py" script.
675 *
676 * UnicodeCombiningMark -> categories Mn, Mc
677 * UnicodeDigit -> categories Nd
678 * UnicodeConnectorPunctuation -> categories Pc
679 */
680
681 /* ASCII (and EOF) fast path -- quick accept and reject */
682 if (cp <= 0x7fL) {
683#if defined(DUK_USE_IDCHAR_FASTPATH)
684 return (cp >= 0) && (duk_is_idchar_tab[cp] != 0);
685#else
686 if ((cp >= 'a' && cp <= 'z') ||
687 (cp >= 'A' && cp <= 'Z') ||
688 (cp >= '0' && cp <= '9') ||
689 cp == '_' || cp == '$') {
690 return 1;
691 }
692 return 0;
693#endif
694 }
695
696 /* Non-ASCII slow path (range-by-range linear comparison), very slow */
697
698#ifdef DUK_USE_SOURCE_NONBMP
700 sizeof(duk_unicode_ids_noa),
701 (duk_codepoint_t) cp) ||
704 (duk_codepoint_t) cp)) {
705 return 1;
706 }
707 return 0;
708#else
709 if (cp < 0x10000L) {
712 (duk_codepoint_t) cp) ||
715 (duk_codepoint_t) cp)) {
716 return 1;
717 }
718 return 0;
719 } else {
720 /* without explicit non-BMP support, assume non-BMP characters
721 * are always accepted as identifier characters.
722 */
723 return 1;
724 }
725#endif
726}
const duk_uint8_t duk_unicode_ids_noa[791]
const duk_uint8_t duk_unicode_idp_m_ids_noa[397]
const duk_uint8_t duk_unicode_idp_m_ids_noabmp[348]
const duk_uint8_t duk_unicode_ids_noabmp[611]
DUK_LOCAL duk_small_int_t duk__uni_range_match(const duk_uint8_t *unitab, duk_size_t unilen, duk_codepoint_t cp)
DUK_INTERNAL const duk_int8_t duk_is_idchar_tab[128]

References duk__uni_range_match(), duk_is_idchar_tab, duk_unicode_idp_m_ids_noa, duk_unicode_idp_m_ids_noabmp, duk_unicode_ids_noa, and duk_unicode_ids_noabmp.

◆ duk_unicode_is_identifier_start()

DUK_INTERNAL duk_small_int_t duk_unicode_is_identifier_start ( duk_codepoint_t cp)

Definition at line 557 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

557 {
558 /*
559 * E5 Section 7.6:
560 *
561 * IdentifierStart:
562 * UnicodeLetter
563 * $
564 * _
565 * \ UnicodeEscapeSequence
566 *
567 * IdentifierStart production has one multi-character production:
568 *
569 * \ UnicodeEscapeSequence
570 *
571 * The '\' character is -not- matched by this function. Rather, the caller
572 * should decode the escape and then call this function to check whether the
573 * decoded character is acceptable (see discussion in E5 Section 7.6).
574 *
575 * The "UnicodeLetter" alternative of the production allows letters
576 * from various Unicode categories. These can be extracted with the
577 * "src/extract_chars.py" script.
578 *
579 * Because the result has hundreds of Unicode codepoint ranges, matching
580 * for any values >= 0x80 are done using a very slow range-by-range scan
581 * and a packed range format.
582 *
583 * The ASCII portion (codepoints 0x00 ... 0x7f) is fast-pathed below because
584 * it matters the most. The ASCII related ranges of IdentifierStart are:
585 *
586 * 0x0041 ... 0x005a ['A' ... 'Z']
587 * 0x0061 ... 0x007a ['a' ... 'z']
588 * 0x0024 ['$']
589 * 0x005f ['_']
590 */
591
592 /* ASCII (and EOF) fast path -- quick accept and reject */
593 if (cp <= 0x7fL) {
594#if defined(DUK_USE_IDCHAR_FASTPATH)
595 return (cp >= 0) && (duk_is_idchar_tab[cp] > 0);
596#else
597 if ((cp >= 'a' && cp <= 'z') ||
598 (cp >= 'A' && cp <= 'Z') ||
599 cp == '_' || cp == '$') {
600 return 1;
601 }
602 return 0;
603#endif
604 }
605
606 /* Non-ASCII slow path (range-by-range linear comparison), very slow */
607
608#ifdef DUK_USE_SOURCE_NONBMP
611 (duk_codepoint_t) cp)) {
612 return 1;
613 }
614 return 0;
615#else
616 if (cp < 0x10000L) {
619 (duk_codepoint_t) cp)) {
620 return 1;
621 }
622 return 0;
623 } else {
624 /* without explicit non-BMP support, assume non-BMP characters
625 * are always accepted as identifier characters.
626 */
627 return 1;
628 }
629#endif
630}

References duk__uni_range_match(), duk_is_idchar_tab, duk_unicode_ids_noa, and duk_unicode_ids_noabmp.

◆ duk_unicode_is_letter()

DUK_INTERNAL duk_small_int_t duk_unicode_is_letter ( duk_codepoint_t cp)

Definition at line 732 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

732 {
733 /*
734 * Unicode letter is now taken to be the categories:
735 *
736 * Lu, Ll, Lt, Lm, Lo
737 *
738 * (Not sure if this is exactly correct.)
739 *
740 * The ASCII fast path consists of:
741 *
742 * 0x0041 ... 0x005a ['A' ... 'Z']
743 * 0x0061 ... 0x007a ['a' ... 'z']
744 */
745
746 /* ASCII (and EOF) fast path -- quick accept and reject */
747 if (cp <= 0x7fL) {
748 if ((cp >= 'a' && cp <= 'z') ||
749 (cp >= 'A' && cp <= 'Z')) {
750 return 1;
751 }
752 return 0;
753 }
754
755 /* Non-ASCII slow path (range-by-range linear comparison), very slow */
756
757#ifdef DUK_USE_SOURCE_NONBMP
759 sizeof(duk_unicode_ids_noa),
760 (duk_codepoint_t) cp) &&
763 (duk_codepoint_t) cp)) {
764 return 1;
765 }
766 return 0;
767#else
768 if (cp < 0x10000L) {
771 (duk_codepoint_t) cp) &&
774 (duk_codepoint_t) cp)) {
775 return 1;
776 }
777 return 0;
778 } else {
779 /* without explicit non-BMP support, assume non-BMP characters
780 * are always accepted as letters.
781 */
782 return 1;
783 }
784#endif
785}
const duk_uint8_t duk_unicode_ids_m_let_noa[42]
const duk_uint8_t duk_unicode_ids_m_let_noabmp[24]

References duk__uni_range_match(), duk_unicode_ids_m_let_noa, duk_unicode_ids_m_let_noabmp, duk_unicode_ids_noa, and duk_unicode_ids_noabmp.

Referenced by duk__case_transform_helper().

◆ duk_unicode_is_line_terminator()

DUK_INTERNAL duk_small_int_t duk_unicode_is_line_terminator ( duk_codepoint_t cp)

Definition at line 537 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

537 {
538 /*
539 * E5 Section 7.3
540 *
541 * A LineTerminatorSequence essentially merges <CR> <LF> sequences
542 * into a single line terminator. This must be handled by the caller.
543 */
544
545 if (cp == 0x000aL || cp == 0x000dL || cp == 0x2028L ||
546 cp == 0x2029L) {
547 return 1;
548 }
549
550 return 0;
551}

◆ duk_unicode_is_whitespace()

DUK_INTERNAL duk_small_int_t duk_unicode_is_whitespace ( duk_codepoint_t cp)

Definition at line 457 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

457 {
458 /*
459 * E5 Section 7.2 specifies six characters specifically as
460 * white space:
461 *
462 * 0009;<control>;Cc;0;S;;;;;N;CHARACTER TABULATION;;;;
463 * 000B;<control>;Cc;0;S;;;;;N;LINE TABULATION;;;;
464 * 000C;<control>;Cc;0;WS;;;;;N;FORM FEED (FF);;;;
465 * 0020;SPACE;Zs;0;WS;;;;;N;;;;;
466 * 00A0;NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
467 * FEFF;ZERO WIDTH NO-BREAK SPACE;Cf;0;BN;;;;;N;BYTE ORDER MARK;;;;
468 *
469 * It also specifies any Unicode category 'Zs' characters as white
470 * space. These can be extracted with the "src/extract_chars.py" script.
471 * Current result:
472 *
473 * RAW OUTPUT:
474 * ===========
475 * 0020;SPACE;Zs;0;WS;;;;;N;;;;;
476 * 00A0;NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;NON-BREAKING SPACE;;;;
477 * 1680;OGHAM SPACE MARK;Zs;0;WS;;;;;N;;;;;
478 * 180E;MONGOLIAN VOWEL SEPARATOR;Zs;0;WS;;;;;N;;;;;
479 * 2000;EN QUAD;Zs;0;WS;2002;;;;N;;;;;
480 * 2001;EM QUAD;Zs;0;WS;2003;;;;N;;;;;
481 * 2002;EN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
482 * 2003;EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
483 * 2004;THREE-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
484 * 2005;FOUR-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
485 * 2006;SIX-PER-EM SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
486 * 2007;FIGURE SPACE;Zs;0;WS;<noBreak> 0020;;;;N;;;;;
487 * 2008;PUNCTUATION SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
488 * 2009;THIN SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
489 * 200A;HAIR SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
490 * 202F;NARROW NO-BREAK SPACE;Zs;0;CS;<noBreak> 0020;;;;N;;;;;
491 * 205F;MEDIUM MATHEMATICAL SPACE;Zs;0;WS;<compat> 0020;;;;N;;;;;
492 * 3000;IDEOGRAPHIC SPACE;Zs;0;WS;<wide> 0020;;;;N;;;;;
493 *
494 * RANGES:
495 * =======
496 * 0x0020
497 * 0x00a0
498 * 0x1680
499 * 0x180e
500 * 0x2000 ... 0x200a
501 * 0x202f
502 * 0x205f
503 * 0x3000
504 *
505 * A manual decoder (below) is probably most compact for this.
506 */
507
510
511 /* cp == -1 (EOF) never matches and causes return value 0 */
512
513 lo = (duk_uint_fast8_t) (cp & 0xff);
514 hi = (duk_uint_fast32_t) (cp >> 8); /* does not fit into an uchar */
515
516 if (hi == 0x0000UL) {
517 if (lo == 0x09U || lo == 0x0bU || lo == 0x0cU ||
518 lo == 0x20U || lo == 0xa0U) {
519 return 1;
520 }
521 } else if (hi == 0x0020UL) {
522 if (lo <= 0x0aU || lo == 0x2fU || lo == 0x5fU) {
523 return 1;
524 }
525 } else if (cp == 0x1680L || cp == 0x180eL || cp == 0x3000L ||
526 cp == 0xfeffL) {
527 return 1;
528 }
529
530 return 0;
531}

◆ duk_unicode_re_canonicalize_char()

DUK_INTERNAL duk_codepoint_t duk_unicode_re_canonicalize_char ( duk_hthread * thr,
duk_codepoint_t cp )

Definition at line 1071 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1071 {
1072#if defined(DUK_USE_REGEXP_CANON_WORKAROUND)
1073 /* Fast canonicalization lookup at the cost of 128kB footprint. */
1074 DUK_ASSERT(cp >= 0);
1075 DUK_UNREF(thr);
1076 if (DUK_LIKELY(cp < 0x10000L)) {
1077 return (duk_codepoint_t) duk_unicode_re_canon_lookup[cp];
1078 }
1079 return cp;
1080#else /* DUK_USE_REGEXP_CANON_WORKAROUND */
1082
1084 NULL, /* NULL is allowed, no output */
1085 cp, /* curr char */
1086 -1, /* prev char */
1087 -1, /* next char */
1088 1); /* uppercase */
1089
1090 if ((y < 0) || (cp >= 0x80 && y < 0x80)) {
1091 /* multiple codepoint conversion or non-ASCII mapped to ASCII
1092 * --> leave as is.
1093 */
1094 return cp;
1095 }
1096
1097 return y;
1098#endif /* DUK_USE_REGEXP_CANON_WORKAROUND */
1099}

References duk__case_transform_helper(), DUK_ASSERT, DUK_LIKELY, DUK_UNREF, and NULL.

◆ duk_unicode_re_is_wordchar()

DUK_INTERNAL duk_small_int_t duk_unicode_re_is_wordchar ( duk_codepoint_t x)

Definition at line 1106 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1106 {
1107 /*
1108 * Note: the description in E5 Section 15.10.2.6 has a typo, it
1109 * contains 'A' twice and lacks 'a'; the intent is [0-9a-zA-Z_].
1110 */
1111 if ((x >= '0' && x <= '9') ||
1112 (x >= 'a' && x <= 'z') ||
1113 (x >= 'A' && x <= 'Z') ||
1114 (x == '_')) {
1115 return 1;
1116 }
1117 return 0;
1118}

◆ duk_unicode_unvalidated_utf8_length()

DUK_INTERNAL duk_size_t duk_unicode_unvalidated_utf8_length ( const duk_uint8_t * data,
duk_size_t blen )

Definition at line 319 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

319 {
320 const duk_uint8_t *p;
321 const duk_uint8_t *p_end;
322 const duk_uint32_t *p32_end;
323 const duk_uint32_t *p32;
324 duk_size_t ncont;
325 duk_size_t clen;
326
327 ncont = 0; /* number of continuation (non-initial) bytes in [0x80,0xbf] */
328 p = data;
329 p_end = data + blen;
330 if (blen < 16) {
331 goto skip_fastpath;
332 }
333
334 /* Align 'p' to 4; the input data may have arbitrary alignment.
335 * End of string check not needed because blen >= 16.
336 */
337 while (((duk_size_t) (const void *) p) & 0x03U) {
338 duk_uint8_t x;
339 x = *p++;
340 if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
341 ncont++;
342 }
343 }
344
345 /* Full, aligned 4-byte reads. */
346 p32_end = (const duk_uint32_t *) (const void *) (p + ((duk_size_t) (p_end - p) & (duk_size_t) (~0x03)));
347 p32 = (const duk_uint32_t *) (const void *) p;
348 while (p32 != (const duk_uint32_t *) p32_end) {
349 duk_uint32_t x;
350 x = *p32++;
351 if (DUK_LIKELY((x & 0x80808080UL) == 0)) {
352 ; /* ASCII fast path */
353 } else {
354 /* Flip highest bit of each byte which changes
355 * the bit pattern 10xxxxxx into 00xxxxxx which
356 * allows an easy bit mask test.
357 */
358 x ^= 0x80808080UL;
359 if (DUK_UNLIKELY(!(x & 0xc0000000UL))) {
360 ncont++;
361 }
362 if (DUK_UNLIKELY(!(x & 0x00c00000UL))) {
363 ncont++;
364 }
365 if (DUK_UNLIKELY(!(x & 0x0000c000UL))) {
366 ncont++;
367 }
368 if (DUK_UNLIKELY(!(x & 0x000000c0UL))) {
369 ncont++;
370 }
371 }
372 }
373 p = (const duk_uint8_t *) p32;
374 /* Fall through to handle the rest. */
375
376 skip_fastpath:
377 while (p != p_end) {
378 duk_uint8_t x;
379 x = *p++;
380 if (DUK_UNLIKELY(x >= 0x80 && x <= 0xbf)) {
381 ncont++;
382 }
383 }
384
385 DUK_ASSERT(ncont <= blen);
386 clen = blen - ncont;
387 DUK_ASSERT(clen <= blen);
388 return clen;
389}

References DUK_ASSERT, DUK_LIKELY, and DUK_UNLIKELY.

Variable Documentation

◆ duk_is_idchar_tab

DUK_INTERNAL const duk_int8_t duk_is_idchar_tab[128]
Initial value:
= {
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1,
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0
}

Definition at line 13 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

13 {
14 /* 0: not IdentifierStart or IdentifierPart
15 * 1: IdentifierStart and IdentifierPart
16 * -1: IdentifierPart only
17 */
18 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x00...0x0f */
19 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x10...0x1f */
20 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* 0x20...0x2f */
21 -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, 0, 0, 0, 0, 0, /* 0x30...0x3f */
22 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x40...0x4f */
23 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 1, /* 0x50...0x5f */
24 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* 0x60...0x6f */
25 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 0, 0 /* 0x70...0x7f */
26};

Referenced by duk_unicode_is_identifier_part(), and duk_unicode_is_identifier_start().

◆ duk_unicode_re_ranges_digit

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_digit[2]
Initial value:
= {
(duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
}

Definition at line 1125 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1125 {
1126 (duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
1127};

◆ duk_unicode_re_ranges_not_digit

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_digit[4]
Initial value:
= {
(duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
(duk_uint16_t) 0x003AUL, (duk_uint16_t) 0xFFFFUL,
}

Definition at line 1147 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1147 {
1148 (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
1149 (duk_uint16_t) 0x003AUL, (duk_uint16_t) 0xFFFFUL,
1150};

◆ duk_unicode_re_ranges_not_white

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_white[24]
Initial value:
= {
(duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x0008UL,
(duk_uint16_t) 0x000EUL, (duk_uint16_t) 0x001FUL,
(duk_uint16_t) 0x0021UL, (duk_uint16_t) 0x009FUL,
(duk_uint16_t) 0x00A1UL, (duk_uint16_t) 0x167FUL,
(duk_uint16_t) 0x1681UL, (duk_uint16_t) 0x180DUL,
(duk_uint16_t) 0x180FUL, (duk_uint16_t) 0x1FFFUL,
(duk_uint16_t) 0x200BUL, (duk_uint16_t) 0x2027UL,
(duk_uint16_t) 0x202AUL, (duk_uint16_t) 0x202EUL,
(duk_uint16_t) 0x2030UL, (duk_uint16_t) 0x205EUL,
(duk_uint16_t) 0x2060UL, (duk_uint16_t) 0x2FFFUL,
(duk_uint16_t) 0x3001UL, (duk_uint16_t) 0xFEFEUL,
(duk_uint16_t) 0xFF00UL, (duk_uint16_t) 0xFFFFUL,
}

Definition at line 1151 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1151 {
1152 (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x0008UL,
1153 (duk_uint16_t) 0x000EUL, (duk_uint16_t) 0x001FUL,
1154 (duk_uint16_t) 0x0021UL, (duk_uint16_t) 0x009FUL,
1155 (duk_uint16_t) 0x00A1UL, (duk_uint16_t) 0x167FUL,
1156 (duk_uint16_t) 0x1681UL, (duk_uint16_t) 0x180DUL,
1157 (duk_uint16_t) 0x180FUL, (duk_uint16_t) 0x1FFFUL,
1158 (duk_uint16_t) 0x200BUL, (duk_uint16_t) 0x2027UL,
1159 (duk_uint16_t) 0x202AUL, (duk_uint16_t) 0x202EUL,
1160 (duk_uint16_t) 0x2030UL, (duk_uint16_t) 0x205EUL,
1161 (duk_uint16_t) 0x2060UL, (duk_uint16_t) 0x2FFFUL,
1162 (duk_uint16_t) 0x3001UL, (duk_uint16_t) 0xFEFEUL,
1163 (duk_uint16_t) 0xFF00UL, (duk_uint16_t) 0xFFFFUL,
1164};

◆ duk_unicode_re_ranges_not_wordchar

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_not_wordchar[10]
Initial value:
= {
(duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
(duk_uint16_t) 0x003AUL, (duk_uint16_t) 0x0040UL,
(duk_uint16_t) 0x005BUL, (duk_uint16_t) 0x005EUL,
(duk_uint16_t) 0x0060UL, (duk_uint16_t) 0x0060UL,
(duk_uint16_t) 0x007BUL, (duk_uint16_t) 0xFFFFUL,
}

Definition at line 1165 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1165 {
1166 (duk_uint16_t) 0x0000UL, (duk_uint16_t) 0x002FUL,
1167 (duk_uint16_t) 0x003AUL, (duk_uint16_t) 0x0040UL,
1168 (duk_uint16_t) 0x005BUL, (duk_uint16_t) 0x005EUL,
1169 (duk_uint16_t) 0x0060UL, (duk_uint16_t) 0x0060UL,
1170 (duk_uint16_t) 0x007BUL, (duk_uint16_t) 0xFFFFUL,
1171};

◆ duk_unicode_re_ranges_white

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_white[22]
Initial value:
= {
(duk_uint16_t) 0x0009UL, (duk_uint16_t) 0x000DUL,
(duk_uint16_t) 0x0020UL, (duk_uint16_t) 0x0020UL,
(duk_uint16_t) 0x00A0UL, (duk_uint16_t) 0x00A0UL,
(duk_uint16_t) 0x1680UL, (duk_uint16_t) 0x1680UL,
(duk_uint16_t) 0x180EUL, (duk_uint16_t) 0x180EUL,
(duk_uint16_t) 0x2000UL, (duk_uint16_t) 0x200AUL,
(duk_uint16_t) 0x2028UL, (duk_uint16_t) 0x2029UL,
(duk_uint16_t) 0x202FUL, (duk_uint16_t) 0x202FUL,
(duk_uint16_t) 0x205FUL, (duk_uint16_t) 0x205FUL,
(duk_uint16_t) 0x3000UL, (duk_uint16_t) 0x3000UL,
(duk_uint16_t) 0xFEFFUL, (duk_uint16_t) 0xFEFFUL,
}

Definition at line 1128 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1128 {
1129 (duk_uint16_t) 0x0009UL, (duk_uint16_t) 0x000DUL,
1130 (duk_uint16_t) 0x0020UL, (duk_uint16_t) 0x0020UL,
1131 (duk_uint16_t) 0x00A0UL, (duk_uint16_t) 0x00A0UL,
1132 (duk_uint16_t) 0x1680UL, (duk_uint16_t) 0x1680UL,
1133 (duk_uint16_t) 0x180EUL, (duk_uint16_t) 0x180EUL,
1134 (duk_uint16_t) 0x2000UL, (duk_uint16_t) 0x200AUL,
1135 (duk_uint16_t) 0x2028UL, (duk_uint16_t) 0x2029UL,
1136 (duk_uint16_t) 0x202FUL, (duk_uint16_t) 0x202FUL,
1137 (duk_uint16_t) 0x205FUL, (duk_uint16_t) 0x205FUL,
1138 (duk_uint16_t) 0x3000UL, (duk_uint16_t) 0x3000UL,
1139 (duk_uint16_t) 0xFEFFUL, (duk_uint16_t) 0xFEFFUL,
1140};

◆ duk_unicode_re_ranges_wordchar

DUK_INTERNAL const duk_uint16_t duk_unicode_re_ranges_wordchar[8]
Initial value:
= {
(duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
(duk_uint16_t) 0x0041UL, (duk_uint16_t) 0x005AUL,
(duk_uint16_t) 0x005FUL, (duk_uint16_t) 0x005FUL,
(duk_uint16_t) 0x0061UL, (duk_uint16_t) 0x007AUL,
}

Definition at line 1141 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

1141 {
1142 (duk_uint16_t) 0x0030UL, (duk_uint16_t) 0x0039UL,
1143 (duk_uint16_t) 0x0041UL, (duk_uint16_t) 0x005AUL,
1144 (duk_uint16_t) 0x005FUL, (duk_uint16_t) 0x005FUL,
1145 (duk_uint16_t) 0x0061UL, (duk_uint16_t) 0x007AUL,
1146};

◆ duk_unicode_xutf8_markers

DUK_INTERNAL const duk_uint8_t duk_unicode_xutf8_markers[7]
Initial value:
= {
0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe
}

Definition at line 81 of file duktape-1.5.2/src-separate/duk_unicode_support.c.

81 {
82 0x00, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc, 0xfe
83};

Referenced by duk_unicode_encode_xutf8().