1 | |
2 | |
3 | |
4 | |
5 | |
6 | |
7 | |
8 | |
9 | |
10 | |
11 | |
12 | |
13 | |
14 | |
15 | |
16 | |
17 | |
18 | |
19 | |
20 | |
21 | |
22 | |
23 | |
24 | |
25 | |
26 | |
27 | |
28 | |
29 | |
30 | |
31 | |
32 | |
33 | |
34 | |
35 | |
36 | |
37 | |
38 | |
39 | |
40 | #include "avmplus.h" |
41 | |
42 | #ifdef VMCFG_EVAL |
43 | |
44 | #include "eval.h" |
45 | |
46 | namespace avmplus |
47 | { |
48 | namespace RTC |
49 | { |
50 | #define OCT CHAR_ATTR_OCTAL | CHAR_ATTR_DECIMAL | CHAR_ATTR_HEX |
51 | #define DEC CHAR_ATTR_DECIMAL | CHAR_ATTR_HEX |
52 | #define HEX CHAR_ATTR_HEX | CHAR_ATTR_LETTER |
53 | #define LTR CHAR_ATTR_LETTER |
54 | #define DLR CHAR_ATTR_DOLLAR |
55 | #define UBR CHAR_ATTR_UNDERBAR |
56 | |
57 | const uint8_t Lexer::char_attrs[128] = { |
58 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
59 | 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
60 | 0, 0, 0, 0, DLR, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, |
61 | OCT, OCT, OCT, OCT, OCT, OCT, OCT, OCT, DEC, DEC, 0, 0, 0, 0, 0, 0, |
62 | 0, HEX, HEX, HEX, HEX, HEX, HEX, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, |
63 | LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, 0, 0, 0, 0, UBR, |
64 | 0, HEX, HEX, HEX, HEX, HEX, HEX, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, |
65 | LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, LTR, 0, 0, 0, 0, 0 |
66 | }; |
67 | |
68 | #undef OCT |
69 | #undef DEC |
70 | #undef HEX |
71 | #undef LTR |
72 | #undef DLR |
73 | #undef UBR |
74 | |
75 | |
76 | |
77 | |
78 | |
79 | Lexer::Lexer(Compiler* compiler, const wchar* src, uint32_t srclen, boolbool keyword_or_ident) |
80 | : compiler(compiler) |
81 | , src(src) |
82 | , limit(src + srclen - 1) |
83 | , idx(src) |
84 | , mark(NULL__null) |
85 | , lineno(1) |
86 | , keyword_or_ident(keyword_or_ident) |
87 | #ifdef DEBUG |
88 | , last_token(T_LAST) |
89 | , traceflag(falsefalse) |
90 | #endif |
91 | { |
92 | AvmAssert(*limit == 0)do { } while (0); |
93 | } |
94 | |
95 | |
96 | |
97 | |
98 | |
99 | |
100 | |
101 | |
102 | |
103 | |
104 | Token Lexer::regexpImpl() |
105 | { |
106 | AvmAssert(last_token == T_BreakSlash)do { } while (0); |
107 | |
108 | StringBuilder s(compiler); |
109 | boolbool in_charset = falsefalse; |
110 | int c; |
111 | |
112 | s.append('/'); |
113 | for (;;) { |
114 | c = *idx++; |
115 | switch (c) { |
116 | case 0: |
117 | if (idx == limit) |
118 | compiler->syntaxError(lineno, SYNTAXERR_EOT_IN_REGEXP); |
119 | break; |
120 | case '/': |
121 | if (!in_charset) |
122 | goto end_loop; |
123 | break; |
124 | case '[': |
125 | in_charset = truetrue; |
126 | break; |
127 | case ']': |
128 | in_charset = falsefalse; |
129 | break; |
130 | case '\\': |
131 | c = *idx++; |
132 | switch (c) { |
133 | case 0: |
134 | if (idx == limit) |
135 | compiler->syntaxError(lineno, SYNTAXERR_EOT_IN_REGEXP); |
136 | break; |
137 | case '\r': |
138 | if (*idx == '\n') |
139 | idx++; |
140 | if (compiler->standard_regex) |
141 | compiler->syntaxError(lineno, SYNTAXERR_NEWLINE_IN_REGEXP); |
142 | continue; |
143 | case '\n': |
144 | case UNICHAR_LS: |
145 | case UNICHAR_PS: |
146 | if (compiler->standard_regex) |
147 | compiler->syntaxError(lineno, SYNTAXERR_NEWLINE_IN_REGEXP); |
148 | continue; |
149 | case 'u': |
150 | |
151 | mark = idx; |
152 | if (hexDigits(4)) { |
153 | s.append((wchar)parseInt(16)); |
154 | continue; |
155 | } |
156 | idx = mark; |
157 | s.append('u'); |
158 | continue; |
159 | } |
160 | s.append('\\'); |
161 | break; |
162 | case '\n': |
163 | case '\r': |
164 | case UNICHAR_LS: |
165 | case UNICHAR_PS: |
166 | compiler->syntaxError(lineno, SYNTAXERR_NEWLINE_IN_REGEXP); |
167 | } |
168 | s.append(c); |
169 | } |
170 | end_loop: |
171 | s.append('/'); |
172 | |
173 | |
174 | while (isUnicodeIdentifierPart(c = *idx)) { |
175 | idx++; |
176 | s.append(c); |
177 | } |
178 | |
179 | val.s = s.str(); |
180 | DEBUG_ONLY(last_token = T_RegexpLiteral); |
181 | return T_RegexpLiteral; |
182 | } |
183 | |
184 | Token Lexer::divideOperatorImpl() |
185 | { |
186 | AvmAssert(last_token == T_BreakSlash)do { } while (0); |
187 | switch (*idx) { |
188 | case '=': |
189 | idx++; |
190 | return T_DivideAssign; |
191 | |
192 | default : |
193 | return T_Divide; |
194 | } |
195 | } |
196 | |
197 | Token Lexer::rightAngleImpl() |
198 | { |
199 | AvmAssert(last_token == T_BreakRightAngle)do { } while (0); |
200 | return T_GreaterThan; |
201 | } |
202 | |
203 | Token Lexer::rightShiftOrRelationalOperatorImpl() |
204 | { |
205 | AvmAssert(last_token == T_BreakRightAngle)do { } while (0); |
206 | switch (*idx) { |
207 | case '=': |
208 | idx++; |
209 | return T_GreaterThanOrEqual; |
210 | |
211 | case '>': |
212 | idx++; |
213 | switch (*idx) { |
214 | case '=': |
215 | idx++; |
216 | return T_RightShiftAssign; |
217 | |
218 | case '>': |
219 | idx++; |
220 | if (*idx == '=') { |
221 | idx++; |
222 | return T_UnsignedRightShiftAssign; |
223 | } |
224 | return T_UnsignedRightShift; |
225 | |
226 | default: |
227 | return T_RightShift; |
228 | } |
229 | |
230 | default: |
231 | return T_GreaterThan; |
232 | } |
233 | } |
234 | |
235 | Token Lexer::leftShiftOrRelationalOperatorImpl() |
236 | { |
237 | AvmAssert(last_token == T_BreakLeftAngle)do { } while (0); |
238 | switch (*idx) { |
239 | case '=': |
240 | idx++; |
241 | return T_LessThanOrEqual; |
242 | |
243 | case '<': |
244 | idx++; |
245 | if (*idx == '=') { |
246 | idx++; |
247 | return T_LeftShiftAssign; |
248 | } |
249 | return T_LeftShift; |
250 | |
251 | default: |
252 | return T_LessThan; |
253 | } |
254 | } |
255 | |
256 | |
257 | |
258 | |
259 | |
260 | |
261 | |
262 | |
263 | |
264 | |
265 | |
266 | |
267 | Token Lexer::lexImpl() |
268 | { |
269 | DEBUG_ONLY(last_token = T_LAST); |
270 | for (;;) { |
271 | switch (*idx++) { |
272 | case 0: |
273 | if (idx >= limit) { |
274 | idx = limit; |
275 | return compiler->parser.onEOS(&lineno, &val); |
276 | } |
277 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGALCHAR_NUL); |
278 | |
279 | case '\n' : |
280 | lineno++; |
281 | continue; |
282 | |
283 | case '\r' : |
284 | if (*idx == '\n') |
285 | idx++; |
286 | lineno++; |
287 | continue; |
288 | |
289 | case ' ' : |
290 | case '\t' : |
291 | case '\v' : |
292 | case '\f' : |
293 | continue; |
294 | |
295 | case '(': |
296 | return T_LeftParen; |
297 | |
298 | case ')': |
299 | return T_RightParen; |
300 | |
301 | case ',': |
302 | return T_Comma; |
303 | |
304 | case ';': |
305 | return T_Semicolon; |
306 | |
307 | case '?': |
308 | return T_Question; |
309 | |
310 | case '[': |
311 | return T_LeftBracket; |
312 | |
313 | case ']': |
314 | return T_RightBracket; |
315 | |
316 | case '{': |
317 | return T_LeftBrace; |
318 | |
319 | case '}': |
320 | return T_RightBrace; |
321 | |
322 | case '~': |
323 | return T_BitwiseNot; |
324 | |
325 | case '/': |
326 | switch (*idx) { |
327 | case '/': |
328 | idx++; |
329 | lineComment(); |
330 | continue; |
331 | |
332 | case '*': |
333 | idx++; |
334 | blockComment(); |
335 | continue; |
336 | |
337 | default: |
338 | DEBUG_ONLY(last_token = T_BreakSlash); |
339 | return T_BreakSlash; |
340 | } |
341 | |
342 | case '\'': |
343 | case '"': |
344 | return stringLiteral(idx[-1]); |
345 | |
346 | case '.': |
347 | switch (*idx) { |
348 | case '.': |
349 | if (idx[1] == '.') { |
350 | idx += 2; |
351 | return T_TripleDot; |
352 | } |
353 | idx++; |
354 | return T_DoubleDot; |
355 | |
356 | case '<': |
357 | idx++; |
358 | return T_LeftDotAngle; |
359 | |
360 | case '0': |
361 | case '1': |
362 | case '2': |
363 | case '3': |
364 | case '4': |
365 | case '5': |
366 | case '6': |
367 | case '7': |
368 | case '8': |
369 | case '9': |
370 | mark = --idx; |
371 | return numberLiteral(); |
372 | |
373 | default: |
374 | return T_Dot; |
375 | } |
376 | |
377 | case '-': |
378 | switch (*idx) { |
379 | case '-': |
380 | idx++; |
381 | return T_MinusMinus; |
382 | |
383 | case '=': |
384 | idx++; |
385 | return T_MinusAssign; |
386 | |
387 | default: |
388 | return T_Minus; |
389 | } |
390 | |
391 | case '!': |
392 | if (*idx == '=') { |
393 | idx++; |
394 | if (*idx == '=') { |
395 | idx++; |
396 | return T_StrictNotEqual; |
397 | } |
398 | return T_NotEqual; |
399 | } |
400 | return T_Not; |
401 | |
402 | case '%': |
403 | if (*idx == '=') { |
404 | idx++; |
405 | return T_RemainderAssign; |
406 | } |
407 | return T_Remainder; |
408 | |
409 | case '&': |
410 | switch (*idx) { |
411 | case '=': |
412 | idx++; |
413 | return T_BitwiseAndAssign; |
414 | |
415 | case '&': |
416 | idx++; |
417 | if (*idx == '=') { |
418 | idx++; |
419 | return T_LogicalAndAssign; |
420 | } |
421 | return T_LogicalAnd; |
422 | |
423 | default: |
424 | return T_BitwiseAnd; |
425 | } |
426 | |
427 | case '*': |
428 | if (*idx == '=') { |
429 | idx++; |
430 | return T_MultiplyAssign; |
431 | } |
432 | return T_Multiply; |
433 | |
434 | case ':': |
435 | if (*idx == ':') { |
436 | idx++; |
437 | return T_DoubleColon; |
438 | } |
439 | return T_Colon; |
440 | |
441 | case '^': |
442 | if (*idx == '=') { |
443 | idx++; |
444 | return T_BitwiseXorAssign; |
445 | } |
446 | return T_BitwiseXor; |
447 | |
448 | case '|': |
449 | switch (*idx) { |
450 | case '=': |
451 | idx++; |
452 | return T_BitwiseOrAssign; |
453 | |
454 | case '|': |
455 | idx++; |
456 | if (*idx == '=') { |
457 | idx++; |
458 | return T_LogicalOrAssign; |
459 | } |
460 | return T_LogicalOr; |
461 | |
462 | default: |
463 | return T_BitwiseOr; |
464 | } |
465 | |
466 | case '+': |
467 | switch (*idx) { |
468 | case '+': |
469 | idx++; |
470 | return T_PlusPlus; |
471 | |
472 | case '=': |
473 | idx++; |
474 | return T_PlusAssign; |
475 | |
476 | default: |
477 | return T_Plus; |
478 | } |
479 | |
480 | case '<': |
481 | DEBUG_ONLY(last_token = T_BreakLeftAngle); |
482 | return T_BreakLeftAngle; |
483 | |
484 | case '=': |
485 | if (*idx == '=') { |
486 | idx++; |
487 | if (*idx == '=') { |
488 | idx++; |
489 | return T_StrictEqual; |
490 | } |
491 | return T_Equal; |
492 | } |
493 | return T_Assign; |
494 | |
495 | case '>': |
496 | DEBUG_ONLY(last_token = T_BreakRightAngle); |
497 | return T_BreakRightAngle; |
498 | |
499 | case '@': |
500 | return T_AtSign; |
501 | |
502 | case '`': |
503 | return identifier(); |
504 | |
505 | |
506 | case 'a': |
507 | if (idx[0] == 's' && |
508 | !compiler->es3_keywords && |
509 | notPartOfIdent(idx[1])) { |
510 | idx += 1; |
511 | return T_As; |
512 | } |
513 | goto bigswitch_end; |
514 | case 'b': |
515 | if (idx[0] == 'r' && |
516 | idx[1] == 'e' && |
517 | idx[2] == 'a' && |
518 | idx[3] == 'k' && |
519 | notPartOfIdent(idx[4])) { |
520 | idx += 4; |
521 | return T_Break; |
522 | } |
523 | goto bigswitch_end; |
524 | case 'c': |
525 | switch(idx[0]) { |
526 | case 'a': |
527 | switch(idx[1]) { |
528 | case 's': |
529 | if (idx[2] == 'e' && |
530 | notPartOfIdent(idx[3])) { |
531 | idx += 3; |
532 | return T_Case; |
533 | } |
534 | goto bigswitch_end; |
535 | case 't': |
536 | if (idx[2] == 'c' && |
537 | idx[3] == 'h' && |
538 | notPartOfIdent(idx[4])) { |
539 | idx += 4; |
540 | return T_Catch; |
541 | } |
542 | goto bigswitch_end; |
543 | default: |
544 | goto bigswitch_end; |
545 | } |
546 | case 'l': |
547 | if (idx[1] == 'a' && |
548 | idx[2] == 's' && |
549 | idx[3] == 's' && |
550 | !compiler->es3_keywords && |
551 | notPartOfIdent(idx[4])) { |
552 | idx += 4; |
553 | return T_Class; |
554 | } |
555 | goto bigswitch_end; |
556 | case 'o': |
557 | switch(idx[1]) { |
558 | case 'n': |
559 | switch(idx[2]) { |
560 | case 's': |
561 | if (idx[3] == 't' && |
562 | !compiler->es3_keywords && |
563 | notPartOfIdent(idx[4])) { |
564 | idx += 4; |
565 | return T_Const; |
566 | } |
567 | goto bigswitch_end; |
568 | case 't': |
569 | if (idx[3] == 'i' && |
570 | idx[4] == 'n' && |
571 | idx[5] == 'u' && |
572 | idx[6] == 'e' && |
573 | notPartOfIdent(idx[7])) { |
574 | idx += 7; |
575 | return T_Continue; |
576 | } |
577 | goto bigswitch_end; |
578 | default: |
579 | goto bigswitch_end; |
580 | } |
581 | default: |
582 | goto bigswitch_end; |
583 | } |
584 | default: |
585 | goto bigswitch_end; |
586 | } |
587 | case 'd': |
588 | switch(idx[0]) { |
589 | case 'e': |
590 | switch(idx[1]) { |
591 | case 'f': |
592 | if (idx[2] == 'a' && |
593 | idx[3] == 'u' && |
594 | idx[4] == 'l' && |
595 | idx[5] == 't' && |
596 | notPartOfIdent(idx[6])) { |
597 | idx += 6; |
598 | return T_Default; |
599 | } |
600 | goto bigswitch_end; |
601 | case 'l': |
602 | if (idx[2] == 'e' && |
603 | idx[3] == 't' && |
604 | idx[4] == 'e' && |
605 | notPartOfIdent(idx[5])) { |
606 | idx += 5; |
607 | return T_Delete; |
608 | } |
609 | goto bigswitch_end; |
610 | default: |
611 | goto bigswitch_end; |
612 | } |
613 | case 'o': |
614 | if (!notPartOfIdent(idx[1])) |
615 | goto bigswitch_end; |
616 | idx += 1; |
617 | return T_Do; |
618 | case 'y': |
619 | if (idx[1] == 'n' && |
620 | idx[2] == 'a' && |
621 | idx[3] == 'm' && |
622 | idx[4] == 'i' && |
623 | idx[5] == 'c' && |
624 | !compiler->es3_keywords && |
625 | notPartOfIdent(idx[6])) { |
626 | idx += 6; |
627 | return T_Dynamic; |
628 | } |
629 | goto bigswitch_end; |
630 | default: |
631 | goto bigswitch_end; |
632 | } |
633 | case 'e': |
634 | if (idx[0] == 'l' && |
635 | idx[1] == 's' && |
636 | idx[2] == 'e' && |
637 | notPartOfIdent(idx[3])) { |
638 | idx += 3; |
639 | return T_Else; |
640 | } |
641 | goto bigswitch_end; |
642 | case 'f': |
643 | switch(idx[0]) { |
644 | case 'a': |
645 | if (idx[1] == 'l' && |
646 | idx[2] == 's' && |
647 | idx[3] == 'e' && |
648 | notPartOfIdent(idx[4])) { |
649 | idx += 4; |
650 | return T_False; |
651 | } |
652 | goto bigswitch_end; |
653 | case 'i': |
654 | switch(idx[1]) { |
655 | case 'n': |
656 | switch(idx[2]) { |
657 | case 'a': |
658 | switch(idx[3]) { |
659 | case 'l': |
660 | if (idx[4] == 'l' && |
661 | idx[5] == 'y' && |
662 | !compiler->es3_keywords && |
663 | notPartOfIdent(idx[6])) { |
664 | idx += 6; |
665 | return T_Finally; |
666 | } |
667 | if (!compiler->es3_keywords && !notPartOfIdent(idx[4])) |
668 | goto bigswitch_end; |
669 | idx += 4; |
670 | return T_Final; |
671 | default: |
672 | goto bigswitch_end; |
673 | } |
674 | default: |
675 | goto bigswitch_end; |
676 | } |
677 | default: |
678 | goto bigswitch_end; |
679 | } |
680 | case 'o': |
681 | if (idx[1] == 'r' && |
682 | notPartOfIdent(idx[2])) { |
683 | idx += 2; |
684 | return T_For; |
685 | } |
686 | goto bigswitch_end; |
687 | case 'u': |
688 | if (idx[1] == 'n' && |
689 | idx[2] == 'c' && |
690 | idx[3] == 't' && |
691 | idx[4] == 'i' && |
692 | idx[5] == 'o' && |
693 | idx[6] == 'n' && |
694 | notPartOfIdent(idx[7])) { |
695 | idx += 7; |
696 | return T_Function; |
697 | } |
698 | goto bigswitch_end; |
699 | default: |
700 | goto bigswitch_end; |
701 | } |
702 | case 'i': |
703 | switch(idx[0]) { |
704 | case 'f': |
705 | if (!notPartOfIdent(idx[1])) |
706 | goto bigswitch_end; |
707 | idx += 1; |
708 | return T_If; |
709 | case 'm': |
710 | if (idx[1] == 'p' && |
711 | idx[2] == 'o' && |
712 | idx[3] == 'r' && |
713 | idx[4] == 't' && |
714 | !compiler->es3_keywords && |
715 | notPartOfIdent(idx[5])) { |
716 | idx += 5; |
717 | return T_Import; |
718 | } |
719 | goto bigswitch_end; |
720 | case 'n': |
721 | switch(idx[1]) { |
722 | case 'c': |
723 | if (idx[2] == 'l' && |
724 | idx[3] == 'u' && |
725 | idx[4] == 'd' && |
726 | idx[5] == 'e' && |
727 | !compiler->es3_keywords && |
728 | notPartOfIdent(idx[6])) { |
729 | idx += 6; |
730 | return T_Include; |
731 | } |
732 | goto bigswitch_end; |
733 | case 's': |
734 | if (idx[2] == 't' && |
735 | idx[3] == 'a' && |
736 | idx[4] == 'n' && |
737 | idx[5] == 'c' && |
738 | idx[6] == 'e' && |
739 | idx[7] == 'o' && |
740 | idx[8] == 'f' && |
741 | notPartOfIdent(idx[9])) { |
742 | idx += 9; |
743 | return T_InstanceOf; |
744 | } |
745 | goto bigswitch_end; |
746 | case 't': |
747 | switch(idx[2]) { |
748 | case 'e': |
749 | switch(idx[3]) { |
750 | case 'r': |
751 | switch(idx[4]) { |
752 | case 'f': |
753 | if (idx[5] == 'a' && |
754 | idx[6] == 'c' && |
755 | idx[7] == 'e' && |
756 | !compiler->es3_keywords && |
757 | notPartOfIdent(idx[8])) { |
758 | idx += 8; |
759 | return T_Interface; |
760 | } |
761 | goto bigswitch_end; |
762 | case 'n': |
763 | if (idx[5] == 'a' && |
764 | idx[6] == 'l' && |
765 | !compiler->es3_keywords && |
766 | notPartOfIdent(idx[7])) { |
767 | idx += 7; |
768 | return T_Internal; |
769 | } |
770 | goto bigswitch_end; |
771 | default: |
772 | goto bigswitch_end; |
773 | } |
774 | default: |
775 | goto bigswitch_end; |
776 | } |
777 | default: |
778 | goto bigswitch_end; |
779 | } |
780 | default: |
781 | if (!notPartOfIdent(idx[1])) |
782 | goto bigswitch_end; |
783 | idx += 1; |
784 | return T_In; |
785 | } |
786 | case 's': |
787 | if (!compiler->es3_keywords && !notPartOfIdent(idx[1])) |
788 | goto bigswitch_end; |
789 | idx += 1; |
790 | return T_Is; |
791 | default: |
792 | goto bigswitch_end; |
793 | } |
794 | case 'n': |
795 | switch(idx[0]) { |
796 | case 'a': |
797 | switch(idx[1]) { |
798 | case 'm': |
799 | if (idx[2] == 'e' && |
800 | idx[3] == 's' && |
801 | idx[4] == 'p' && |
802 | idx[5] == 'a' && |
803 | idx[6] == 'c' && |
804 | idx[7] == 'e' && |
805 | !compiler->es3_keywords && |
806 | notPartOfIdent(idx[8])) { |
807 | idx += 8; |
808 | return T_Namespace; |
809 | } |
810 | goto bigswitch_end; |
811 | case 't': |
812 | if (idx[2] == 'i' && |
813 | idx[3] == 'v' && |
814 | idx[4] == 'e' && |
815 | !compiler->es3_keywords && |
816 | notPartOfIdent(idx[5])) { |
817 | idx += 5; |
818 | return T_Native; |
819 | } |
820 | goto bigswitch_end; |
821 | default: |
822 | goto bigswitch_end; |
823 | } |
824 | case 'e': |
825 | if (idx[1] == 'w' && |
826 | notPartOfIdent(idx[2])) { |
827 | idx += 2; |
828 | return T_New; |
829 | } |
830 | goto bigswitch_end; |
831 | case 'u': |
832 | if (idx[1] == 'l' && |
833 | idx[2] == 'l' && |
834 | notPartOfIdent(idx[3])) { |
835 | idx += 3; |
836 | return T_Null; |
837 | } |
838 | goto bigswitch_end; |
839 | default: |
840 | goto bigswitch_end; |
841 | } |
842 | case 'o': |
843 | if (idx[0] == 'v' && |
844 | idx[1] == 'e' && |
845 | idx[2] == 'r' && |
846 | idx[3] == 'r' && |
847 | idx[4] == 'i' && |
848 | idx[5] == 'd' && |
849 | idx[6] == 'e' && |
850 | !compiler->es3_keywords && |
851 | notPartOfIdent(idx[7])) { |
852 | idx += 7; |
853 | return T_Override; |
854 | } |
855 | goto bigswitch_end; |
856 | case 'p': |
857 | switch(idx[0]) { |
858 | case 'a': |
859 | if (idx[1] == 'c' && |
860 | idx[2] == 'k' && |
861 | idx[3] == 'a' && |
862 | idx[4] == 'g' && |
863 | idx[5] == 'e' && |
864 | !compiler->es3_keywords && |
865 | notPartOfIdent(idx[6])) { |
866 | idx += 6; |
867 | return T_Package; |
868 | } |
869 | goto bigswitch_end; |
870 | case 'r': |
871 | switch(idx[1]) { |
872 | case 'i': |
873 | if (idx[2] == 'v' && |
874 | idx[3] == 'a' && |
875 | idx[4] == 't' && |
876 | idx[5] == 'e' && |
877 | !compiler->es3_keywords && |
878 | notPartOfIdent(idx[6])) { |
879 | idx += 6; |
880 | return T_Private; |
881 | } |
882 | goto bigswitch_end; |
883 | case 'o': |
884 | if (idx[2] == 't' && |
885 | idx[3] == 'e' && |
886 | idx[4] == 'c' && |
887 | idx[5] == 't' && |
888 | idx[6] == 'e' && |
889 | idx[7] == 'd' && |
890 | !compiler->es3_keywords && |
891 | notPartOfIdent(idx[8])) { |
892 | idx += 8; |
893 | return T_Protected; |
894 | } |
895 | goto bigswitch_end; |
896 | default: |
897 | goto bigswitch_end; |
898 | } |
899 | case 'u': |
900 | if (idx[1] == 'b' && |
901 | idx[2] == 'l' && |
902 | idx[3] == 'i' && |
903 | idx[4] == 'c' && |
904 | !compiler->es3_keywords && |
905 | notPartOfIdent(idx[5])) { |
906 | idx += 5; |
907 | return T_Public; |
908 | } |
909 | goto bigswitch_end; |
910 | default: |
911 | goto bigswitch_end; |
912 | } |
913 | case 'r': |
914 | if (idx[0] == 'e' && |
915 | idx[1] == 't' && |
916 | idx[2] == 'u' && |
917 | idx[3] == 'r' && |
918 | idx[4] == 'n' && |
919 | notPartOfIdent(idx[5])) { |
920 | idx += 5; |
921 | return T_Return; |
922 | } |
923 | goto bigswitch_end; |
924 | case 's': |
925 | switch(idx[0]) { |
926 | case 't': |
927 | if (idx[1] == 'a' && |
928 | idx[2] == 't' && |
929 | idx[3] == 'i' && |
930 | idx[4] == 'c' && |
931 | !compiler->es3_keywords && |
932 | notPartOfIdent(idx[5])) { |
933 | idx += 5; |
934 | return T_Static; |
935 | } |
936 | goto bigswitch_end; |
937 | case 'u': |
938 | if (idx[1] == 'p' && |
939 | idx[2] == 'e' && |
940 | idx[3] == 'r' && |
941 | !compiler->es3_keywords && |
942 | notPartOfIdent(idx[4])) { |
943 | idx += 4; |
944 | return T_Super; |
945 | } |
946 | goto bigswitch_end; |
947 | case 'w': |
948 | if (idx[1] == 'i' && |
949 | idx[2] == 't' && |
950 | idx[3] == 'c' && |
951 | idx[4] == 'h' && |
952 | notPartOfIdent(idx[5])) { |
953 | idx += 5; |
954 | return T_Switch; |
955 | } |
956 | goto bigswitch_end; |
957 | default: |
958 | goto bigswitch_end; |
959 | } |
960 | case 't': |
961 | switch(idx[0]) { |
962 | case 'h': |
963 | switch(idx[1]) { |
964 | case 'i': |
965 | if (idx[2] == 's' && |
966 | notPartOfIdent(idx[3])) { |
967 | idx += 3; |
968 | return T_This; |
969 | } |
970 | goto bigswitch_end; |
971 | case 'r': |
972 | if (idx[2] == 'o' && |
973 | idx[3] == 'w' && |
974 | notPartOfIdent(idx[4])) { |
975 | idx += 4; |
976 | return T_Throw; |
977 | } |
978 | goto bigswitch_end; |
979 | default: |
980 | goto bigswitch_end; |
981 | } |
982 | case 'r': |
983 | switch(idx[1]) { |
984 | case 'u': |
985 | if (idx[2] == 'e' && |
986 | notPartOfIdent(idx[3])) { |
987 | idx += 3; |
988 | return T_True; |
989 | } |
990 | goto bigswitch_end; |
991 | case 'y': |
992 | if (!notPartOfIdent(idx[2])) |
993 | goto bigswitch_end; |
994 | idx += 2; |
995 | return T_Try; |
996 | default: |
997 | goto bigswitch_end; |
998 | } |
999 | case 'y': |
1000 | if (idx[1] == 'p' && |
1001 | idx[2] == 'e' && |
1002 | idx[3] == 'o' && |
1003 | idx[4] == 'f' && |
1004 | notPartOfIdent(idx[5])) { |
1005 | idx += 5; |
1006 | return T_TypeOf; |
1007 | } |
1008 | goto bigswitch_end; |
1009 | default: |
1010 | goto bigswitch_end; |
1011 | } |
1012 | case 'u': |
1013 | if (idx[0] == 's' && |
1014 | idx[1] == 'e' && |
1015 | !compiler->es3_keywords && |
1016 | notPartOfIdent(idx[2])) { |
1017 | idx += 2; |
1018 | return T_Use; |
1019 | } |
1020 | goto bigswitch_end; |
1021 | case 'v': |
1022 | switch(idx[0]) { |
1023 | case 'a': |
1024 | if (idx[1] == 'r' && |
1025 | notPartOfIdent(idx[2])) { |
1026 | idx += 2; |
1027 | return T_Var; |
1028 | } |
1029 | goto bigswitch_end; |
1030 | case 'o': |
1031 | if (idx[1] == 'i' && |
1032 | idx[2] == 'd' && |
1033 | notPartOfIdent(idx[3])) { |
1034 | idx += 3; |
1035 | return T_Void; |
1036 | } |
1037 | goto bigswitch_end; |
1038 | default: |
1039 | goto bigswitch_end; |
1040 | } |
1041 | case 'w': |
1042 | switch(idx[0]) { |
1043 | case 'h': |
1044 | if (idx[1] == 'i' && |
1045 | idx[2] == 'l' && |
1046 | idx[3] == 'e' && |
1047 | notPartOfIdent(idx[4])) { |
1048 | idx += 4; |
1049 | return T_While; |
1050 | } |
1051 | goto bigswitch_end; |
1052 | case 'i': |
1053 | if (idx[1] == 't' && |
1054 | idx[2] == 'h' && |
1055 | notPartOfIdent(idx[3])) { |
1056 | idx += 3; |
1057 | return T_With; |
1058 | } |
1059 | goto bigswitch_end; |
1060 | default: |
1061 | goto bigswitch_end; |
1062 | } |
1063 | |
1064 | |
1065 | |
1066 | case '\\': |
1067 | |
1068 | |
1069 | |
1070 | goto bigswitch_end; |
1071 | |
1072 | case '0': |
1073 | case '1': |
1074 | case '2': |
1075 | case '3': |
1076 | case '4': |
1077 | case '5': |
1078 | case '6': |
1079 | case '7': |
1080 | case '8': |
1081 | case '9': |
1082 | mark = --idx; |
1083 | return numberLiteral(); |
1084 | |
1085 | default: { |
1086 | |
1087 | |
1088 | |
1089 | |
1090 | |
1091 | int c = idx[-1]; |
1092 | |
1093 | |
1094 | |
1095 | if (c == 0x00A0) |
1096 | continue; |
1097 | |
1098 | |
1099 | |
1100 | |
1101 | |
1102 | if (c >= UNICHAR_LOWEST_ODDSPACE) { |
1103 | switch (c) { |
1104 | case UNICHAR_Zs1: |
1105 | case UNICHAR_Zs2: |
1106 | case UNICHAR_Zs3: |
1107 | case UNICHAR_Zs4: |
1108 | case UNICHAR_Zs5: |
1109 | case UNICHAR_Zs6: |
1110 | case UNICHAR_Zs7: |
1111 | case UNICHAR_Zs8: |
1112 | case UNICHAR_Zs9: |
1113 | case UNICHAR_Zs10: |
1114 | case UNICHAR_Zs11: |
1115 | case UNICHAR_Zs12: |
1116 | case UNICHAR_Zs13: |
1117 | case UNICHAR_Zs14: |
1118 | case UNICHAR_Zs15: |
1119 | case UNICHAR_Zs16: |
1120 | case UNICHAR_BOM: |
1121 | continue; |
1122 | case UNICHAR_LS: |
1123 | case UNICHAR_PS: |
1124 | lineno++; |
1125 | continue; |
1126 | } |
1127 | } |
1128 | |
1129 | goto bigswitch_end; |
1130 | } |
1131 | } |
1132 | |
1133 | bigswitch_end: |
1134 | |
1135 | |
1136 | |
1137 | |
1138 | |
1139 | |
1140 | |
1141 | |
1142 | |
1143 | |
1144 | |
1145 | |
1146 | |
1147 | |
1148 | |
1149 | |
1150 | |
1151 | if (keyword_or_ident) { |
1152 | DEBUG_ONLY(last_token = T_Identifier); |
1153 | return T_Identifier; |
1154 | } |
1155 | |
1156 | --idx; |
1157 | return identifier(); |
1158 | |
1159 | } |
1160 | } |
1161 | |
1162 | |
1163 | |
1164 | |
1165 | Token Lexer::numberLiteral() |
1166 | { |
1167 | switch (*idx) { |
1168 | case '0': |
1169 | |
1170 | switch (idx[1]) { |
1171 | case 'x': |
1172 | case 'X': |
1173 | idx += 2; |
1174 | mark = idx; |
1175 | if (!hexDigits(-1)) |
1176 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGAL_NUMBER); |
1177 | return integerLiteral(16); |
1178 | |
1179 | case '.': |
1180 | idx += 2; |
1181 | numberFraction(truetrue); |
1182 | return floatingLiteral(); |
1183 | |
1184 | case 'E': |
1185 | case 'e': |
1186 | idx += 2; |
1187 | numberExponent(); |
1188 | return floatingLiteral(); |
1189 | |
1190 | default: { |
1191 | if (!compiler->octal_literals) |
1192 | break; |
1193 | |
1194 | |
1195 | const wchar* startIndex = idx; |
1196 | octalDigits(-1); |
1197 | switch (*idx) { |
1198 | case '8': |
1199 | case '9': |
1200 | |
1201 | |
1202 | idx = startIndex; |
1203 | break; |
1204 | default: |
1205 | return integerLiteral(8); |
1206 | } |
1207 | } |
1208 | } |
1209 | break; |
1210 | |
1211 | case '.': |
1212 | idx++; |
1213 | numberFraction(falsefalse); |
1214 | return floatingLiteral(); |
1215 | } |
1216 | |
1217 | if (numberLiteralPrime()) |
1218 | return floatingLiteral(); |
1219 | else |
1220 | return integerLiteral(10); |
1221 | } |
1222 | |
1223 | Token Lexer::integerLiteral(int base) |
1224 | { |
1225 | checkNextCharForNumber(); |
1226 | double n = parseInt(base); |
1227 | if (n >= (-0x7fffffff - 1) && n <= 0x7FFFFFFF) { |
1228 | val.i = (int32_t)n; |
1229 | DEBUG_ONLY(last_token = T_IntLiteral); |
1230 | return T_IntLiteral; |
1231 | } |
1232 | if (n >= 0x80000000U && n <= 0xFFFFFFFFU) { |
1233 | val.u = (uint32_t)n; |
1234 | DEBUG_ONLY(last_token = T_UIntLiteral); |
1235 | return T_UIntLiteral; |
1236 | } |
1237 | val.d = n; |
1238 | DEBUG_ONLY(last_token = T_DoubleLiteral); |
1239 | return T_DoubleLiteral; |
1240 | } |
1241 | |
1242 | Token Lexer::floatingLiteral() |
1243 | { |
1244 | checkNextCharForNumber(); |
1245 | val.d = parseDouble(); |
1246 | DEBUG_ONLY(last_token = T_DoubleLiteral); |
1247 | return T_DoubleLiteral; |
1248 | } |
1249 | |
1250 | |
1251 | |
1252 | |
1253 | |
1254 | void Lexer::checkNextCharForNumber() |
1255 | { |
1256 | int c = *idx; |
1257 | if ((c >= '0' && c <= '9') || isUnicodeIdentifierStart(c)) |
1258 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGALCHAR_POSTNUMBER, c); |
1259 | } |
1260 | |
1261 | |
1262 | |
1263 | |
1264 | boolbool Lexer::numberLiteralPrime() |
1265 | { |
1266 | if (!decimalDigits(-1)) |
1267 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGAL_NUMBER); |
1268 | |
1269 | switch (*idx) { |
1270 | case '.': |
1271 | idx++; |
1272 | numberFraction (truetrue); |
1273 | return truetrue; |
1274 | |
1275 | case 'e': |
1276 | case 'E': |
1277 | idx++; |
1278 | numberExponent (); |
1279 | return truetrue; |
1280 | |
1281 | default: |
1282 | return falsefalse; |
1283 | } |
1284 | } |
1285 | |
1286 | |
1287 | |
1288 | |
1289 | |
1290 | |
1291 | void Lexer::numberFraction(boolbool has_leading_digits) |
1292 | { |
1293 | if (!decimalDigits (-1) && !has_leading_digits) |
1294 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGAL_NUMBER); |
1295 | |
1296 | switch (*idx) { |
1297 | case 'e': |
1298 | case 'E': |
1299 | idx++; |
1300 | numberExponent (); |
1301 | break; |
1302 | } |
1303 | } |
1304 | |
1305 | |
1306 | |
1307 | void Lexer::numberExponent() |
1308 | { |
1309 | switch (*idx) { |
1310 | case '+': |
1311 | case '-': |
1312 | idx++; |
1313 | break; |
1314 | } |
1315 | if (!decimalDigits(-1)) |
1316 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGAL_NUMBER); |
1317 | } |
1318 | |
1319 | boolbool Lexer::digits(int k, int attrmask) |
1320 | { |
1321 | const wchar* startIndex = idx; |
1322 | int c; |
1323 | while (k != 0 && (c = *idx) < 128 && (char_attrs[c] & attrmask) != 0) { |
1324 | k--; |
1325 | idx++; |
1326 | } |
1327 | return idx > startIndex && k <= 0; |
1328 | } |
1329 | |
1330 | void Lexer::lineComment() |
1331 | { |
1332 | for (;;) { |
1333 | switch (*idx++) { |
1334 | case 0: |
1335 | case '\n': |
1336 | case '\r': |
1337 | case UNICHAR_LS: |
1338 | case UNICHAR_PS: |
1339 | idx--; |
1340 | return; |
1341 | } |
1342 | } |
1343 | } |
1344 | |
1345 | void Lexer::blockComment() |
1346 | { |
1347 | for (;;) { |
1348 | int c; |
1349 | while ((c = *idx++) != '*' && |
1350 | c != 0 && |
1351 | c != '\n' && |
1352 | c != '\r' && |
1353 | c != UNICHAR_LS && |
1354 | c != UNICHAR_PS) |
1355 | ; |
1356 | if (c == '*') { |
1357 | if (*idx == '/') { |
1358 | idx++; |
1359 | return; |
1360 | } |
1361 | continue; |
1362 | } |
1363 | if (c == 0) { |
1364 | if (idx >= limit) { |
1365 | idx = limit; |
1366 | compiler->syntaxError(lineno, SYNTAXERR_EOI_IN_COMMENT); |
1367 | } |
1368 | continue; |
1369 | } |
1370 | if (c == '\r') { |
1371 | if (*idx == '\n') |
1372 | idx++; |
1373 | c = '\n'; |
1374 | } |
1375 | AvmAssert(c == '\n' || c == UNICHAR_LS || c == UNICHAR_PS)do { } while (0); |
1376 | lineno++; |
1377 | } |
1378 | } |
1379 | |
1380 | Token Lexer::identifier() |
1381 | { |
1382 | |
1383 | |
1384 | |
1385 | |
1386 | int c; |
1387 | const wchar* start = idx; |
1388 | |
1389 | if ((c = *idx) < 128 && (char_attrs[c] & CHAR_ATTR_INITIAL) != 0) { |
1390 | idx++; |
1391 | while ((c = *idx) < 128 && (char_attrs[c] & CHAR_ATTR_SUBSEQUENT) != 0) |
1392 | idx++; |
1393 | } |
1394 | |
1395 | if (notPartOfIdent(c) && c != '\\') { |
1396 | if (idx == start) |
1397 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGALCHAR, *idx); |
1398 | val.s = compiler->intern(start, uint32_t(idx-start)); |
1399 | DEBUG_ONLY(last_token = T_Identifier); |
1400 | return T_Identifier; |
1401 | } |
1402 | |
1403 | |
1404 | |
1405 | |
1406 | |
1407 | StringBuilder s(compiler); |
1408 | boolbool has_backslashes = falsefalse; |
1409 | for (;;) { |
1410 | c = *idx; |
1411 | if (c != '\\') { |
1412 | if (!(idx == start ? isUnicodeIdentifierStart(c) : isUnicodeIdentifierPart(c))) |
1413 | break; |
1414 | idx++; |
1415 | } |
1416 | else { |
1417 | has_backslashes = truetrue; |
1418 | idx++; |
1419 | c = *idx; |
1420 | if (c != 'u') |
1421 | compiler->internalError(lineno, "Only unicode escapes allowed here"); |
1422 | idx++; |
1423 | c = unicodeEscape(); |
1424 | if (!(compiler->liberal_idents || (idx == start ? isUnicodeIdentifierStart(c) : isUnicodeIdentifierPart(c)))) |
1425 | compiler->internalError(lineno, "Illegal identifier: unicode character is not allowed in identifier"); |
1426 | } |
1427 | s.append(c); |
1428 | } |
1429 | |
1430 | if (has_backslashes && !compiler->liberal_idents) { |
1431 | |
1432 | |
1433 | |
1434 | |
1435 | |
1436 | StringBuilder s2(compiler); |
1437 | s2.append(&s); |
1438 | s2.append(0); |
1439 | Str* text = s2.str(); |
1440 | uint32_t textlen = s2.length(); |
1441 | Lexer subscan(compiler, text->s, textlen, truetrue); |
1442 | uint32_t l; |
1443 | TokenValue v; |
1444 | if (subscan.lex(&l, &v) != T_Identifier) |
1445 | compiler->syntaxError(lineno, SYNTAXERR_IDENT_IS_KWD); |
1446 | AvmAssert(subscan.lex(&l, &v) == T_EOS)do { } while (0); |
1447 | } |
1448 | |
1449 | if (s.length() == 0) |
1450 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGALCHAR, *idx); |
1451 | |
1452 | val.s = s.str(); |
1453 | DEBUG_ONLY(last_token = T_Identifier); |
1454 | return T_Identifier; |
1455 | } |
1456 | |
1457 | Token Lexer::stringLiteral(int delimiter) |
1458 | { |
1459 | StringBuilder s(compiler); |
1460 | int c; |
1461 | |
1462 | |
1463 | |
1464 | |
1465 | |
1466 | |
1467 | |
1468 | for (;;) { |
1469 | const wchar* start = idx; |
1470 | |
1471 | |
1472 | |
1473 | |
1474 | while ((c = *idx) != delimiter && |
1475 | c != '\\' && |
1476 | c != 0 && |
1477 | c != '\n' && |
1478 | c != '\r' && |
1479 | c != UNICHAR_LS && |
1480 | c != UNICHAR_PS && |
1481 | c != UNICHAR_BOM) |
1482 | idx++; |
1483 | s.append(start, idx); |
1484 | |
1485 | switch (*idx) { |
1486 | case '\'': |
1487 | case '"': |
1488 | if (*idx == delimiter) { |
1489 | idx++; |
1490 | val.s = s.str(); |
1491 | DEBUG_ONLY(last_token = T_StringLiteral); |
1492 | return T_StringLiteral; |
1493 | } |
1494 | break; |
1495 | |
1496 | case '\\': |
1497 | idx++; |
1498 | |
1499 | switch (*idx) { |
1500 | case '\r': |
1501 | idx++; |
1502 | if (*idx == '\n') |
1503 | idx++; |
1504 | lineno++; |
1505 | continue; |
1506 | |
1507 | case UNICHAR_LS: |
1508 | case UNICHAR_PS: |
1509 | case '\n': |
1510 | idx++; |
1511 | lineno++; |
1512 | continue; |
1513 | |
1514 | default: |
1515 | s.append(escapeSequence()); |
1516 | continue; |
1517 | } |
1518 | |
1519 | case UNICHAR_BOM: |
1520 | s.append(' '); |
1521 | idx++; |
1522 | continue; |
1523 | |
1524 | case 0: |
1525 | if (idx < limit) { |
1526 | s.append(0); |
1527 | idx++; |
1528 | continue; |
1529 | } |
1530 | break; |
1531 | } |
1532 | |
1533 | compiler->syntaxError(lineno, SYNTAXERR_UNTERMINATED_STRING); |
1534 | } |
1535 | } |
1536 | |
1537 | int Lexer::escapeSequence() |
1538 | { |
1539 | switch (*idx) { |
1540 | case '0': |
1541 | case '1': |
1542 | case '2': |
1543 | case '3': |
1544 | case '4': |
1545 | case '5': |
1546 | case '6': |
1547 | case '7': |
1548 | return octalOrNulEscape (); |
1549 | |
1550 | case 'x': |
1551 | idx++; |
1552 | |
1553 | mark = idx; |
1554 | if (hexDigits(2)) { |
1555 | idx = mark; |
1556 | return hexEscape(2); |
1557 | } |
1558 | idx = mark; |
1559 | return 'x'; |
1560 | |
1561 | case 'u': |
1562 | idx++; |
1563 | |
1564 | mark = idx; |
1565 | if (hexDigits(4)) { |
1566 | idx = mark; |
1567 | return unicodeEscape (); |
1568 | } |
1569 | idx = mark; |
1570 | return 'u'; |
1571 | |
1572 | case 'b': |
1573 | idx++; |
1574 | return '\b'; |
1575 | |
1576 | case 'f': |
1577 | idx++; |
1578 | return '\f'; |
1579 | |
1580 | case 'n': |
1581 | idx++; |
1582 | return '\n'; |
1583 | |
1584 | case 'r': |
1585 | idx++; |
1586 | return '\r'; |
1587 | |
1588 | case 't': |
1589 | idx++; |
1590 | return '\t'; |
1591 | |
1592 | case 'v': |
1593 | idx++; |
1594 | return '\v'; |
1595 | |
1596 | case '\'': |
1597 | case '"': |
1598 | case '\\': |
1599 | return *idx++; |
1600 | |
1601 | case 0: |
1602 | if (idx+1 >= limit) |
1603 | compiler->syntaxError(lineno, SYNTAXERR_EOI_IN_ESC); |
1604 | idx++; |
1605 | return 0; |
1606 | |
1607 | case '\n': |
1608 | case '\r': |
1609 | case UNICHAR_LS: |
1610 | case UNICHAR_PS: |
1611 | compiler->syntaxError(lineno, SYNTAXERR_EOL_IN_ESC); |
1612 | |
1613 | default: |
1614 | return *idx++; |
1615 | } |
1616 | } |
1617 | |
1618 | int Lexer::octalOrNulEscape() |
1619 | { |
1620 | int c; |
1621 | if ((c = *idx) >= 128 || (char_attrs[c] & CHAR_ATTR_OCTAL) == 0) |
1622 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGAL_NUMBER); |
1623 | |
1624 | if (c == '0') { |
1625 | idx++; |
1626 | if ((c = *idx) < 128 && (char_attrs[c] & CHAR_ATTR_OCTAL) != 0) |
1627 | return octalEscape(3); |
1628 | else |
1629 | return 0; |
1630 | } |
1631 | |
1632 | if (c <= '3') |
1633 | return octalEscape(3); |
1634 | |
1635 | return octalEscape(2); |
1636 | } |
1637 | |
1638 | int Lexer::octalEscape(int n) |
1639 | { |
1640 | mark = idx; |
1641 | octalDigits(n); |
1642 | return (int)parseInt(8); |
1643 | } |
1644 | |
1645 | |
1646 | |
1647 | |
1648 | int Lexer::hexEscape(int n) |
1649 | { |
1650 | mark = idx; |
1651 | if (!hexDigits(n)) |
1652 | compiler->syntaxError(lineno, SYNTAXERR_ILLEGAL_NUMBER); |
1653 | return (int)parseInt(16); |
1654 | } |
1655 | |
1656 | |
1657 | |
1658 | int Lexer::unicodeEscape() |
1659 | { |
1660 | if (*idx == '{') { |
1661 | idx++; |
1662 | mark = idx; |
1663 | if (!hexDigits(-1) || *idx != '}') |
1664 | compiler->syntaxError(lineno, SYNTAXERR_INVALID_VAR_ESC); |
1665 | int n = (int)parseInt(16); |
1666 | idx++; |
1667 | return n; |
1668 | } |
1669 | return hexEscape(4); |
1670 | } |
1671 | |
1672 | |
1673 | |
1674 | |
1675 | static uint32_t digitValue(wchar c) |
1676 | { |
1677 | if (c <= '9') |
1678 | return c - '0'; |
1679 | if (c <= 'F') |
1680 | return c - ('A' - 10); |
1681 | return c - ('a' - 10); |
1682 | } |
1683 | |
1684 | |
1685 | |
1686 | |
1687 | double Lexer::parseInt(int base) |
1688 | { |
1689 | uint64_t bits = 0; |
1690 | uint32_t scale = 0; |
1691 | uint32_t k = 0; |
1692 | |
1693 | while (mark < idx && *mark == '0') |
1694 | mark++; |
1695 | |
1696 | if (mark==idx) |
1697 | return 0.0; |
1698 | |
1699 | switch (base) { |
1700 | case 8: |
1701 | for ( const wchar* i=mark ; i < idx ; i++ ) { |
1702 | if (k < 22) { |
1703 | bits = bits << 3 | digitValue(*i); |
1704 | k++; |
1705 | } |
1706 | scale += 3; |
1707 | } |
1708 | goto bits_and_scale; |
1709 | |
1710 | case 16: |
1711 | for ( const wchar* i=mark ; i < idx ; i++ ) { |
1712 | if (k < 16) { |
1713 | bits = bits << 4 | digitValue(*i); |
1714 | k++; |
1715 | } |
1716 | scale += 4; |
1717 | } |
1718 | goto bits_and_scale; |
1719 | |
1720 | case 10: |
1721 | return parseDouble(); |
1722 | |
1723 | default: |
1724 | compiler->internalError(lineno, "Unknown base in parseInt"); |
1725 | } |
1726 | |
1727 | bits_and_scale: |
1728 | |
1729 | uint32_t n = scale; |
1730 | if (n < 33) { bits <<= 32; n += 32; } |
1731 | if (n < 49) { bits <<= 16; n += 16; } |
1732 | if (n < 57) { bits <<= 8; n += 8; } |
1733 | if (n < 61) { bits <<= 4; n += 4; } |
1734 | if (n < 63) { bits <<= 2; n += 2; } |
1735 | if (n < 64) { bits <<= 1; n += 1; } |
| Value stored to 'n' is never read |
1736 | |
1737 | |
1738 | if ((int64_t)bits > 0) { bits <<= 1; scale--; } |
1739 | if ((int64_t)bits > 0) { bits <<= 1; scale--; } |
1740 | if ((int64_t)bits > 0) { bits <<= 1; scale--; } |
1741 | |
1742 | |
1743 | bits <<= 1; |
1744 | uint64_t lost = bits & 0xFFF; |
1745 | bits >>= 12; |
1746 | scale--; |
1747 | |
1748 | |
1749 | |
1750 | if (lost > 0x800) |
1751 | bits += 1; |
1752 | else if (lost == 0x800) { |
1753 | if (bits & 1) |
1754 | bits += 1; |
1755 | } |
1756 | bits &= ~(uint64_t)0 >> 12; |
1757 | |
1758 | |
1759 | |
1760 | bits |= (uint64_t)(1023 + scale) << 52; |
1761 | double_overlay d(bits); |
1762 | return d.value; |
1763 | } |
1764 | |
1765 | double Lexer::parseDouble() |
1766 | { |
1767 | |
1768 | |
1769 | |
1770 | |
1771 | |
1772 | |
1773 | |
1774 | |
1775 | |
1776 | |
1777 | double n; |
1778 | StringBuilder s(compiler); |
1779 | s.append(mark, idx); |
1780 | DEBUG_ONLY(bool flag =) compiler->context->stringToDouble(s.chardata(), &n); |
1781 | AvmAssert(flag)do { } while (0); |
1782 | return n; |
1783 | } |
1784 | |
1785 | boolbool Lexer::isUnicodeIdentifierStart(int c) |
1786 | { |
1787 | if (c < 128) |
1788 | return (char_attrs[c] & CHAR_ATTR_INITIAL) != 0; |
1789 | else |
1790 | return isNonASCIIIdentifierStart((wchar)c); |
1791 | } |
1792 | |
1793 | boolbool Lexer::isUnicodeIdentifierPart(int c) |
1794 | { |
1795 | if (c < 128) |
1796 | return (char_attrs[c] & CHAR_ATTR_SUBSEQUENT) != 0; |
1797 | else |
1798 | return isNonASCIIIdentifierSubsequent((wchar)c); |
1799 | } |
1800 | |
1801 | #ifdef DEBUG |
1802 | |
1803 | void Lexer::print(Token t, uint32_t, TokenValue v) |
1804 | { |
1805 | char buf[200]; |
1806 | *buf = 0; |
1807 | switch (t) { |
1808 | case T_Identifier: |
1809 | VMPI_strcpy::strcpy(buf, "I "); |
1810 | getn(buf+2, v.s, sizeof(buf)-2); |
1811 | break; |
1812 | case T_StringLiteral: |
1813 | VMPI_strcpy::strcpy(buf, "S "); |
1814 | getn(buf+2, v.s, sizeof(buf)-2); |
1815 | break; |
1816 | case T_RegexpLiteral: |
1817 | VMPI_strcpy::strcpy(buf, "R "); |
1818 | getn(buf+2, v.s, sizeof(buf)-2); |
1819 | break; |
1820 | case T_IntLiteral: |
1821 | VMPI_sprintf::sprintf(buf, "i %d", v.i); |
1822 | break; |
1823 | case T_UIntLiteral: |
1824 | VMPI_sprintf::sprintf(buf, "u %u", v.u); |
1825 | break; |
1826 | case T_DoubleLiteral: |
1827 | VMPI_sprintf::sprintf(buf, "d %g", v.d); |
1828 | break; |
1829 | default: |
1830 | break; |
1831 | } |
1832 | printf("%d %s\n", (int)t, buf); |
1833 | } |
1834 | |
1835 | #endif // DEBUG |
1836 | |
1837 | } |
1838 | } |
1839 | |
1840 | #endif // VMCFG_EVAL |