LCOV - code coverage report
Current view: top level - lib - lexer.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 871 871 100.0 %
Date: 2014-11-22 Functions: 19 19 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* lexer.cpp -- written by Alexis WILKE for Made to Order Software Corp. (c) 2005-2014 */
       2             : 
       3             : /*
       4             : 
       5             : Copyright (c) 2005-2014 Made to Order Software Corp.
       6             : 
       7             : http://snapwebsites.org/project/as2js
       8             : 
       9             : Permission is hereby granted, free of charge, to any
      10             : person obtaining a copy of this software and
      11             : associated documentation files (the "Software"), to
      12             : deal in the Software without restriction, including
      13             : without limitation the rights to use, copy, modify,
      14             : merge, publish, distribute, sublicense, and/or sell
      15             : copies of the Software, and to permit persons to whom
      16             : the Software is furnished to do so, subject to the
      17             : following conditions:
      18             : 
      19             : The above copyright notice and this permission notice
      20             : shall be included in all copies or substantial
      21             : portions of the Software.
      22             : 
      23             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
      24             : ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
      25             : LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
      26             : FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
      27             : EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
      28             : LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
      29             : WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      30             : ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      31             : SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      32             : SOFTWARE.
      33             : 
      34             : */
      35             : 
      36             : #include    "as2js/lexer.h"
      37             : 
      38             : #include    "as2js/exceptions.h"
      39             : #include    "as2js/message.h"
      40             : 
      41             : #include    <iomanip>
      42             : 
      43             : 
      44             : namespace as2js
      45             : {
      46             : 
      47             : 
      48             : /** \brief The Lexer private functions to handle character types.
      49             :  *
      50             :  * This unnamed namespace is used by the lexer to define a set of
      51             :  * private functions and tables used to handle the characters
      52             :  * and tokens.
      53             :  */
      54             : namespace
      55             : {
      56             : 
      57             : /** \brief Define one valid range of characters.
      58             :  *
      59             :  * This structure defines the range of characters that represent
      60             :  * letters viewed as being valid in EMCAScript version 5.
      61             :  *
      62             :  * The range is defined as min/max pairs. The two values are inclusive.
      63             :  */
      64             : struct identifier_characters_t
      65             : {
      66             :     as_char_t   f_min;
      67             :     as_char_t   f_max;
      68             : };
      69             : 
      70             : 
      71             : /** \brief List of characters that are considered to be letters.
      72             :  *
      73             :  * The ECMAScript version 5 document defines the letters supported in
      74             :  * its identifiers in terms of Unicode characters. This includes many
      75             :  * characters that represent either letters or punctuation.
      76             :  *
      77             :  * The following table includes ranges (min/max) that include characters
      78             :  * that are considered letters in JavaScript code.
      79             :  *
      80             :  * The table was generated using the code in:
      81             :  *
      82             :  * tests/unicode_characters.cpp
      83             :  *
      84             :  * The number of items in the table is defined as
      85             :  * g_identifier_characters_size (see below).
      86             :  */
      87             : identifier_characters_t g_identifier_characters[] =
      88             : {
      89             :     // The ASCII characters are already handled by the time we reach the
      90             :     // code using this table
      91             :     //{ 0x00030, 0x00039 },
      92             :     //{ 0x00041, 0x0005a },
      93             :     //{ 0x0005f, 0x0005f },
      94             :     //{ 0x00061, 0x0007a },
      95             :     { 0x000aa, 0x000aa },
      96             :     { 0x000b5, 0x000b5 },
      97             :     { 0x000ba, 0x000ba },
      98             :     { 0x000c0, 0x000d6 },
      99             :     { 0x000d8, 0x000f6 },
     100             :     { 0x000f8, 0x002c1 },
     101             :     { 0x002c6, 0x002d1 },
     102             :     { 0x002e0, 0x002e4 },
     103             :     { 0x002ec, 0x002ec },
     104             :     { 0x002ee, 0x002ee },
     105             :     { 0x00300, 0x00374 },
     106             :     { 0x00376, 0x00377 },
     107             :     { 0x0037a, 0x0037d },
     108             :     { 0x00386, 0x00386 },
     109             :     { 0x00388, 0x0038a },
     110             :     { 0x0038c, 0x0038c },
     111             :     { 0x0038e, 0x003a1 },
     112             :     { 0x003a3, 0x003f5 },
     113             :     { 0x003f7, 0x00481 },
     114             :     { 0x00483, 0x00487 },
     115             :     { 0x0048a, 0x00527 },
     116             :     { 0x00531, 0x00556 },
     117             :     { 0x00559, 0x00559 },
     118             :     { 0x00561, 0x00587 },
     119             :     { 0x00591, 0x005bd },
     120             :     { 0x005bf, 0x005bf },
     121             :     { 0x005c1, 0x005c2 },
     122             :     { 0x005c4, 0x005c5 },
     123             :     { 0x005c7, 0x005c7 },
     124             :     { 0x005d0, 0x005ea },
     125             :     { 0x005f0, 0x005f2 },
     126             :     { 0x00610, 0x0061a },
     127             :     { 0x00620, 0x00669 },
     128             :     { 0x0066e, 0x006d3 },
     129             :     { 0x006d5, 0x006dc },
     130             :     { 0x006df, 0x006e8 },
     131             :     { 0x006ea, 0x006fc },
     132             :     { 0x006ff, 0x006ff },
     133             :     { 0x00710, 0x0074a },
     134             :     { 0x0074d, 0x007b1 },
     135             :     { 0x007c0, 0x007f5 },
     136             :     { 0x007fa, 0x007fa },
     137             :     { 0x00800, 0x0082d },
     138             :     { 0x00840, 0x0085b },
     139             :     { 0x008a0, 0x008a0 },
     140             :     { 0x008a2, 0x008b2 },
     141             :     { 0x008e4, 0x008ff },
     142             :     { 0x00900, 0x00963 },
     143             :     { 0x00966, 0x0096f },
     144             :     { 0x00971, 0x00977 },
     145             :     { 0x00979, 0x0097f },
     146             :     { 0x00981, 0x00983 },
     147             :     { 0x00985, 0x0098c },
     148             :     { 0x0098f, 0x00990 },
     149             :     { 0x00993, 0x009a8 },
     150             :     { 0x009aa, 0x009b0 },
     151             :     { 0x009b2, 0x009b2 },
     152             :     { 0x009b6, 0x009b9 },
     153             :     { 0x009bc, 0x009c4 },
     154             :     { 0x009c7, 0x009c8 },
     155             :     { 0x009cb, 0x009ce },
     156             :     { 0x009d7, 0x009d7 },
     157             :     { 0x009dc, 0x009dd },
     158             :     { 0x009df, 0x009e3 },
     159             :     { 0x009e6, 0x009f1 },
     160             :     { 0x00a01, 0x00a03 },
     161             :     { 0x00a05, 0x00a0a },
     162             :     { 0x00a0f, 0x00a10 },
     163             :     { 0x00a13, 0x00a28 },
     164             :     { 0x00a2a, 0x00a30 },
     165             :     { 0x00a32, 0x00a33 },
     166             :     { 0x00a35, 0x00a36 },
     167             :     { 0x00a38, 0x00a39 },
     168             :     { 0x00a3c, 0x00a3c },
     169             :     { 0x00a3e, 0x00a42 },
     170             :     { 0x00a47, 0x00a48 },
     171             :     { 0x00a4b, 0x00a4d },
     172             :     { 0x00a51, 0x00a51 },
     173             :     { 0x00a59, 0x00a5c },
     174             :     { 0x00a5e, 0x00a5e },
     175             :     { 0x00a66, 0x00a75 },
     176             :     { 0x00a81, 0x00a83 },
     177             :     { 0x00a85, 0x00a8d },
     178             :     { 0x00a8f, 0x00a91 },
     179             :     { 0x00a93, 0x00aa8 },
     180             :     { 0x00aaa, 0x00ab0 },
     181             :     { 0x00ab2, 0x00ab3 },
     182             :     { 0x00ab5, 0x00ab9 },
     183             :     { 0x00abc, 0x00ac5 },
     184             :     { 0x00ac7, 0x00ac9 },
     185             :     { 0x00acb, 0x00acd },
     186             :     { 0x00ad0, 0x00ad0 },
     187             :     { 0x00ae0, 0x00ae3 },
     188             :     { 0x00ae6, 0x00aef },
     189             :     { 0x00b01, 0x00b03 },
     190             :     { 0x00b05, 0x00b0c },
     191             :     { 0x00b0f, 0x00b10 },
     192             :     { 0x00b13, 0x00b28 },
     193             :     { 0x00b2a, 0x00b30 },
     194             :     { 0x00b32, 0x00b33 },
     195             :     { 0x00b35, 0x00b39 },
     196             :     { 0x00b3c, 0x00b44 },
     197             :     { 0x00b47, 0x00b48 },
     198             :     { 0x00b4b, 0x00b4d },
     199             :     { 0x00b56, 0x00b57 },
     200             :     { 0x00b5c, 0x00b5d },
     201             :     { 0x00b5f, 0x00b63 },
     202             :     { 0x00b66, 0x00b6f },
     203             :     { 0x00b71, 0x00b71 },
     204             :     { 0x00b82, 0x00b83 },
     205             :     { 0x00b85, 0x00b8a },
     206             :     { 0x00b8e, 0x00b90 },
     207             :     { 0x00b92, 0x00b95 },
     208             :     { 0x00b99, 0x00b9a },
     209             :     { 0x00b9c, 0x00b9c },
     210             :     { 0x00b9e, 0x00b9f },
     211             :     { 0x00ba3, 0x00ba4 },
     212             :     { 0x00ba8, 0x00baa },
     213             :     { 0x00bae, 0x00bb9 },
     214             :     { 0x00bbe, 0x00bc2 },
     215             :     { 0x00bc6, 0x00bc8 },
     216             :     { 0x00bca, 0x00bcd },
     217             :     { 0x00bd0, 0x00bd0 },
     218             :     { 0x00bd7, 0x00bd7 },
     219             :     { 0x00be6, 0x00bef },
     220             :     { 0x00c01, 0x00c03 },
     221             :     { 0x00c05, 0x00c0c },
     222             :     { 0x00c0e, 0x00c10 },
     223             :     { 0x00c12, 0x00c28 },
     224             :     { 0x00c2a, 0x00c33 },
     225             :     { 0x00c35, 0x00c39 },
     226             :     { 0x00c3d, 0x00c44 },
     227             :     { 0x00c46, 0x00c48 },
     228             :     { 0x00c4a, 0x00c4d },
     229             :     { 0x00c55, 0x00c56 },
     230             :     { 0x00c58, 0x00c59 },
     231             :     { 0x00c60, 0x00c63 },
     232             :     { 0x00c66, 0x00c6f },
     233             :     { 0x00c82, 0x00c83 },
     234             :     { 0x00c85, 0x00c8c },
     235             :     { 0x00c8e, 0x00c90 },
     236             :     { 0x00c92, 0x00ca8 },
     237             :     { 0x00caa, 0x00cb3 },
     238             :     { 0x00cb5, 0x00cb9 },
     239             :     { 0x00cbc, 0x00cc4 },
     240             :     { 0x00cc6, 0x00cc8 },
     241             :     { 0x00cca, 0x00ccd },
     242             :     { 0x00cd5, 0x00cd6 },
     243             :     { 0x00cde, 0x00cde },
     244             :     { 0x00ce0, 0x00ce3 },
     245             :     { 0x00ce6, 0x00cef },
     246             :     { 0x00cf1, 0x00cf2 },
     247             :     { 0x00d02, 0x00d03 },
     248             :     { 0x00d05, 0x00d0c },
     249             :     { 0x00d0e, 0x00d10 },
     250             :     { 0x00d12, 0x00d3a },
     251             :     { 0x00d3d, 0x00d44 },
     252             :     { 0x00d46, 0x00d48 },
     253             :     { 0x00d4a, 0x00d4e },
     254             :     { 0x00d57, 0x00d57 },
     255             :     { 0x00d60, 0x00d63 },
     256             :     { 0x00d66, 0x00d6f },
     257             :     { 0x00d7a, 0x00d7f },
     258             :     { 0x00d82, 0x00d83 },
     259             :     { 0x00d85, 0x00d96 },
     260             :     { 0x00d9a, 0x00db1 },
     261             :     { 0x00db3, 0x00dbb },
     262             :     { 0x00dbd, 0x00dbd },
     263             :     { 0x00dc0, 0x00dc6 },
     264             :     { 0x00dca, 0x00dca },
     265             :     { 0x00dcf, 0x00dd4 },
     266             :     { 0x00dd6, 0x00dd6 },
     267             :     { 0x00dd8, 0x00ddf },
     268             :     { 0x00df2, 0x00df3 },
     269             :     { 0x00e01, 0x00e3a },
     270             :     { 0x00e40, 0x00e4e },
     271             :     { 0x00e50, 0x00e59 },
     272             :     { 0x00e81, 0x00e82 },
     273             :     { 0x00e84, 0x00e84 },
     274             :     { 0x00e87, 0x00e88 },
     275             :     { 0x00e8a, 0x00e8a },
     276             :     { 0x00e8d, 0x00e8d },
     277             :     { 0x00e94, 0x00e97 },
     278             :     { 0x00e99, 0x00e9f },
     279             :     { 0x00ea1, 0x00ea3 },
     280             :     { 0x00ea5, 0x00ea5 },
     281             :     { 0x00ea7, 0x00ea7 },
     282             :     { 0x00eaa, 0x00eab },
     283             :     { 0x00ead, 0x00eb9 },
     284             :     { 0x00ebb, 0x00ebd },
     285             :     { 0x00ec0, 0x00ec4 },
     286             :     { 0x00ec6, 0x00ec6 },
     287             :     { 0x00ec8, 0x00ecd },
     288             :     { 0x00ed0, 0x00ed9 },
     289             :     { 0x00edc, 0x00edf },
     290             :     { 0x00f00, 0x00f00 },
     291             :     { 0x00f18, 0x00f19 },
     292             :     { 0x00f20, 0x00f29 },
     293             :     { 0x00f35, 0x00f35 },
     294             :     { 0x00f37, 0x00f37 },
     295             :     { 0x00f39, 0x00f39 },
     296             :     { 0x00f3e, 0x00f47 },
     297             :     { 0x00f49, 0x00f6c },
     298             :     { 0x00f71, 0x00f84 },
     299             :     { 0x00f86, 0x00f97 },
     300             :     { 0x00f99, 0x00fbc },
     301             :     { 0x00fc6, 0x00fc6 },
     302             :     { 0x01000, 0x01049 },
     303             :     { 0x01050, 0x0109d },
     304             :     { 0x010a0, 0x010c5 },
     305             :     { 0x010c7, 0x010c7 },
     306             :     { 0x010cd, 0x010cd },
     307             :     { 0x010d0, 0x010fa },
     308             :     { 0x010fc, 0x01248 },
     309             :     { 0x0124a, 0x0124d },
     310             :     { 0x01250, 0x01256 },
     311             :     { 0x01258, 0x01258 },
     312             :     { 0x0125a, 0x0125d },
     313             :     { 0x01260, 0x01288 },
     314             :     { 0x0128a, 0x0128d },
     315             :     { 0x01290, 0x012b0 },
     316             :     { 0x012b2, 0x012b5 },
     317             :     { 0x012b8, 0x012be },
     318             :     { 0x012c0, 0x012c0 },
     319             :     { 0x012c2, 0x012c5 },
     320             :     { 0x012c8, 0x012d6 },
     321             :     { 0x012d8, 0x01310 },
     322             :     { 0x01312, 0x01315 },
     323             :     { 0x01318, 0x0135a },
     324             :     { 0x0135d, 0x0135f },
     325             :     { 0x01380, 0x0138f },
     326             :     { 0x013a0, 0x013f4 },
     327             :     { 0x01401, 0x0166c },
     328             :     { 0x0166f, 0x0167f },
     329             :     { 0x01681, 0x0169a },
     330             :     { 0x016a0, 0x016ea },
     331             :     { 0x016ee, 0x016f0 },
     332             :     { 0x01700, 0x0170c },
     333             :     { 0x0170e, 0x01714 },
     334             :     { 0x01720, 0x01734 },
     335             :     { 0x01740, 0x01753 },
     336             :     { 0x01760, 0x0176c },
     337             :     { 0x0176e, 0x01770 },
     338             :     { 0x01772, 0x01773 },
     339             :     { 0x01780, 0x017d3 },
     340             :     { 0x017d7, 0x017d7 },
     341             :     { 0x017dc, 0x017dd },
     342             :     { 0x017e0, 0x017e9 },
     343             :     { 0x0180b, 0x0180d },
     344             :     { 0x01810, 0x01819 },
     345             :     { 0x01820, 0x01877 },
     346             :     { 0x01880, 0x018aa },
     347             :     { 0x018b0, 0x018f5 },
     348             :     { 0x01900, 0x0191c },
     349             :     { 0x01920, 0x0192b },
     350             :     { 0x01930, 0x0193b },
     351             :     { 0x01946, 0x0196d },
     352             :     { 0x01970, 0x01974 },
     353             :     { 0x01980, 0x019ab },
     354             :     { 0x019b0, 0x019c9 },
     355             :     { 0x019d0, 0x019d9 },
     356             :     { 0x01a00, 0x01a1b },
     357             :     { 0x01a20, 0x01a5e },
     358             :     { 0x01a60, 0x01a7c },
     359             :     { 0x01a7f, 0x01a89 },
     360             :     { 0x01a90, 0x01a99 },
     361             :     { 0x01aa7, 0x01aa7 },
     362             :     { 0x01b00, 0x01b4b },
     363             :     { 0x01b50, 0x01b59 },
     364             :     { 0x01b6b, 0x01b73 },
     365             :     { 0x01b80, 0x01bf3 },
     366             :     { 0x01c00, 0x01c37 },
     367             :     { 0x01c40, 0x01c49 },
     368             :     { 0x01c4d, 0x01c7d },
     369             :     { 0x01cd0, 0x01cd2 },
     370             :     { 0x01cd4, 0x01cf6 },
     371             :     { 0x01d00, 0x01de6 },
     372             :     { 0x01dfc, 0x01f15 },
     373             :     { 0x01f18, 0x01f1d },
     374             :     { 0x01f20, 0x01f45 },
     375             :     { 0x01f48, 0x01f4d },
     376             :     { 0x01f50, 0x01f57 },
     377             :     { 0x01f59, 0x01f59 },
     378             :     { 0x01f5b, 0x01f5b },
     379             :     { 0x01f5d, 0x01f5d },
     380             :     { 0x01f5f, 0x01f7d },
     381             :     { 0x01f80, 0x01fb4 },
     382             :     { 0x01fb6, 0x01fbc },
     383             :     { 0x01fbe, 0x01fbe },
     384             :     { 0x01fc2, 0x01fc4 },
     385             :     { 0x01fc6, 0x01fcc },
     386             :     { 0x01fd0, 0x01fd3 },
     387             :     { 0x01fd6, 0x01fdb },
     388             :     { 0x01fe0, 0x01fec },
     389             :     { 0x01ff2, 0x01ff4 },
     390             :     { 0x01ff6, 0x01ffc },
     391             :     { 0x0200c, 0x0200d },
     392             :     { 0x0203f, 0x02040 },
     393             :     { 0x02054, 0x02054 },
     394             :     { 0x02071, 0x02071 },
     395             :     { 0x0207f, 0x0207f },
     396             :     { 0x02090, 0x0209c },
     397             :     { 0x020d0, 0x020dc },
     398             :     { 0x020e1, 0x020e1 },
     399             :     { 0x020e5, 0x020f0 },
     400             :     { 0x02102, 0x02102 },
     401             :     { 0x02107, 0x02107 },
     402             :     { 0x0210a, 0x02113 },
     403             :     { 0x02115, 0x02115 },
     404             :     { 0x02119, 0x0211d },
     405             :     { 0x02124, 0x02124 },
     406             :     { 0x02126, 0x02126 },
     407             :     { 0x02128, 0x02128 },
     408             :     { 0x0212a, 0x0212d },
     409             :     { 0x0212f, 0x02139 },
     410             :     { 0x0213c, 0x0213f },
     411             :     { 0x02145, 0x02149 },
     412             :     { 0x0214e, 0x0214e },
     413             :     { 0x02160, 0x02188 },
     414             :     { 0x02c00, 0x02c2e },
     415             :     { 0x02c30, 0x02c5e },
     416             :     { 0x02c60, 0x02ce4 },
     417             :     { 0x02ceb, 0x02cf3 },
     418             :     { 0x02d00, 0x02d25 },
     419             :     { 0x02d27, 0x02d27 },
     420             :     { 0x02d2d, 0x02d2d },
     421             :     { 0x02d30, 0x02d67 },
     422             :     { 0x02d6f, 0x02d6f },
     423             :     { 0x02d7f, 0x02d96 },
     424             :     { 0x02da0, 0x02da6 },
     425             :     { 0x02da8, 0x02dae },
     426             :     { 0x02db0, 0x02db6 },
     427             :     { 0x02db8, 0x02dbe },
     428             :     { 0x02dc0, 0x02dc6 },
     429             :     { 0x02dc8, 0x02dce },
     430             :     { 0x02dd0, 0x02dd6 },
     431             :     { 0x02dd8, 0x02dde },
     432             :     { 0x02de0, 0x02dff },
     433             :     { 0x02e2f, 0x02e2f },
     434             :     { 0x03005, 0x03007 },
     435             :     { 0x03021, 0x0302f },
     436             :     { 0x03031, 0x03035 },
     437             :     { 0x03038, 0x0303c },
     438             :     { 0x03041, 0x03096 },
     439             :     { 0x03099, 0x0309a },
     440             :     { 0x0309d, 0x0309f },
     441             :     { 0x030a1, 0x030fa },
     442             :     { 0x030fc, 0x030ff },
     443             :     { 0x03105, 0x0312d },
     444             :     { 0x03131, 0x0318e },
     445             :     { 0x031a0, 0x031ba },
     446             :     { 0x031f0, 0x031ff },
     447             :     { 0x03400, 0x04db5 },
     448             :     { 0x04e00, 0x09fcc },
     449             :     { 0x0a000, 0x0a48c },
     450             :     { 0x0a4d0, 0x0a4fd },
     451             :     { 0x0a500, 0x0a60c },
     452             :     { 0x0a610, 0x0a62b },
     453             :     { 0x0a640, 0x0a66f },
     454             :     { 0x0a674, 0x0a67d },
     455             :     { 0x0a67f, 0x0a697 },
     456             :     { 0x0a69f, 0x0a6f1 },
     457             :     { 0x0a717, 0x0a71f },
     458             :     { 0x0a722, 0x0a788 },
     459             :     { 0x0a78b, 0x0a78e },
     460             :     { 0x0a790, 0x0a79f },
     461             :     { 0x0a7a0, 0x0a7b1 },
     462             :     { 0x0a7f8, 0x0a827 },
     463             :     { 0x0a840, 0x0a873 },
     464             :     { 0x0a880, 0x0a8c4 },
     465             :     { 0x0a8d0, 0x0a8d9 },
     466             :     { 0x0a8e0, 0x0a8f7 },
     467             :     { 0x0a8fb, 0x0a8fb },
     468             :     { 0x0a900, 0x0a92d },
     469             :     { 0x0a930, 0x0a953 },
     470             :     { 0x0a960, 0x0a97c },
     471             :     { 0x0a980, 0x0a9c0 },
     472             :     { 0x0a9cf, 0x0a9d9 },
     473             :     { 0x0aa00, 0x0aa36 },
     474             :     { 0x0aa40, 0x0aa4d },
     475             :     { 0x0aa50, 0x0aa59 },
     476             :     { 0x0aa60, 0x0aa76 },
     477             :     { 0x0aa7a, 0x0aa7b },
     478             :     { 0x0aa80, 0x0aac2 },
     479             :     { 0x0aadb, 0x0aadd },
     480             :     { 0x0aae0, 0x0aaef },
     481             :     { 0x0aaf2, 0x0aaf6 },
     482             :     { 0x0ab01, 0x0ab06 },
     483             :     { 0x0ab09, 0x0ab0e },
     484             :     { 0x0ab11, 0x0ab16 },
     485             :     { 0x0ab20, 0x0ab26 },
     486             :     { 0x0ab28, 0x0ab2e },
     487             :     { 0x0abc0, 0x0abea },
     488             :     { 0x0abec, 0x0abed },
     489             :     { 0x0abf0, 0x0abf9 },
     490             :     { 0x0ac00, 0x0d7a3 },
     491             :     { 0x0d7b0, 0x0d7c6 },
     492             :     { 0x0d7cb, 0x0d7fb },
     493             :     { 0x0f900, 0x0fa6d },
     494             :     { 0x0fa70, 0x0fad9 },
     495             :     { 0x0fb00, 0x0fb06 },
     496             :     { 0x0fb13, 0x0fb17 },
     497             :     { 0x0fb1d, 0x0fb28 },
     498             :     { 0x0fb2a, 0x0fb36 },
     499             :     { 0x0fb38, 0x0fb3c },
     500             :     { 0x0fb3e, 0x0fb3e },
     501             :     { 0x0fb40, 0x0fb41 },
     502             :     { 0x0fb43, 0x0fb44 },
     503             :     { 0x0fb46, 0x0fbb1 },
     504             :     { 0x0fbd3, 0x0fd3d },
     505             :     { 0x0fd50, 0x0fd8f },
     506             :     { 0x0fd92, 0x0fdc7 },
     507             :     { 0x0fdf0, 0x0fdfb },
     508             :     { 0x0fe00, 0x0fe0f },
     509             :     { 0x0fe20, 0x0fe26 },
     510             :     { 0x0fe33, 0x0fe34 },
     511             :     { 0x0fe4d, 0x0fe4f },
     512             :     { 0x0fe70, 0x0fe74 },
     513             :     { 0x0fe76, 0x0fefc },
     514             :     { 0x0ff10, 0x0ff19 },
     515             :     { 0x0ff21, 0x0ff3a },
     516             :     { 0x0ff3f, 0x0ff3f },
     517             :     { 0x0ff41, 0x0ff5a },
     518             :     { 0x0ff66, 0x0ffbe },
     519             :     { 0x0ffc2, 0x0ffc7 },
     520             :     { 0x0ffca, 0x0ffcf },
     521             :     { 0x0ffd2, 0x0ffd7 },
     522             :     { 0x0ffda, 0x0ffdc },
     523             :     { 0x10000, 0x1000b },
     524             :     { 0x1000d, 0x10026 },
     525             :     { 0x10028, 0x1003a },
     526             :     { 0x1003c, 0x1003d },
     527             :     { 0x1003f, 0x1004d },
     528             :     { 0x10050, 0x1005d },
     529             :     { 0x10080, 0x100fa },
     530             :     { 0x10140, 0x10174 },
     531             :     { 0x101fd, 0x101fd },
     532             :     { 0x10280, 0x1029c },
     533             :     { 0x102a0, 0x102d0 },
     534             :     { 0x10300, 0x1031e },
     535             :     { 0x10330, 0x1034a },
     536             :     { 0x10380, 0x1039d },
     537             :     { 0x103a0, 0x103c3 },
     538             :     { 0x103c8, 0x103cf },
     539             :     { 0x103d1, 0x103d5 },
     540             :     { 0x10400, 0x1049d },
     541             :     { 0x104a0, 0x104a9 },
     542             :     { 0x10800, 0x10805 },
     543             :     { 0x10808, 0x10808 },
     544             :     { 0x1080a, 0x10835 },
     545             :     { 0x10837, 0x10838 },
     546             :     { 0x1083c, 0x1083c },
     547             :     { 0x1083f, 0x10855 },
     548             :     { 0x10900, 0x10915 },
     549             :     { 0x10920, 0x10939 },
     550             :     { 0x10980, 0x109b7 },
     551             :     { 0x109be, 0x109bf },
     552             :     { 0x10a00, 0x10a03 },
     553             :     { 0x10a05, 0x10a06 },
     554             :     { 0x10a0c, 0x10a13 },
     555             :     { 0x10a15, 0x10a17 },
     556             :     { 0x10a19, 0x10a33 },
     557             :     { 0x10a38, 0x10a3a },
     558             :     { 0x10a3f, 0x10a3f },
     559             :     { 0x10a60, 0x10a7c },
     560             :     { 0x10b00, 0x10b35 },
     561             :     { 0x10b40, 0x10b55 },
     562             :     { 0x10b60, 0x10b72 },
     563             :     { 0x10c00, 0x10c48 },
     564             :     { 0x11000, 0x11046 },
     565             :     { 0x11066, 0x1106f },
     566             :     { 0x11080, 0x110ba },
     567             :     { 0x110d0, 0x110e8 },
     568             :     { 0x110f0, 0x110f9 },
     569             :     { 0x11100, 0x11134 },
     570             :     { 0x11136, 0x1113f },
     571             :     { 0x11180, 0x111c8 },
     572             :     { 0x111d0, 0x111da },
     573             :     { 0x11680, 0x116b7 },
     574             :     { 0x116c0, 0x116c9 },
     575             :     { 0x12000, 0x1236e },
     576             :     { 0x12400, 0x12462 },
     577             :     { 0x13000, 0x1342e },
     578             :     { 0x16800, 0x16a38 },
     579             :     { 0x11f00, 0x16f44 },
     580             :     { 0x11f50, 0x16f7e },
     581             :     { 0x11f8f, 0x16f9f },
     582             :     { 0x1b000, 0x1b001 },
     583             :     { 0x1d165, 0x1d169 },
     584             :     { 0x1d16d, 0x1d172 },
     585             :     { 0x1d17b, 0x1d182 },
     586             :     { 0x1d185, 0x1d18b },
     587             :     { 0x1d1aa, 0x1d1ad },
     588             :     { 0x1d242, 0x1d244 },
     589             :     { 0x1d400, 0x1d454 },
     590             :     { 0x1d456, 0x1d49c },
     591             :     { 0x1d49e, 0x1d49f },
     592             :     { 0x1d4a2, 0x1d4a2 },
     593             :     { 0x1d4a5, 0x1d4a6 },
     594             :     { 0x1d4a9, 0x1d4ac },
     595             :     { 0x1d4ae, 0x1d4b9 },
     596             :     { 0x1d4bb, 0x1d4bb },
     597             :     { 0x1d4bd, 0x1d4c3 },
     598             :     { 0x1d4c5, 0x1d505 },
     599             :     { 0x1d507, 0x1d50a },
     600             :     { 0x1d50d, 0x1d514 },
     601             :     { 0x1d516, 0x1d51c },
     602             :     { 0x1d51e, 0x1d539 },
     603             :     { 0x1d53b, 0x1d53e },
     604             :     { 0x1d540, 0x1d544 },
     605             :     { 0x1d546, 0x1d546 },
     606             :     { 0x1d54a, 0x1d550 },
     607             :     { 0x1d552, 0x1d6a5 },
     608             :     { 0x1d6a8, 0x1d6c0 },
     609             :     { 0x1d6c2, 0x1d6da },
     610             :     { 0x1d6dc, 0x1d6fa },
     611             :     { 0x1d6fc, 0x1d714 },
     612             :     { 0x1d716, 0x1d734 },
     613             :     { 0x1d736, 0x1d74e },
     614             :     { 0x1d750, 0x1d76e },
     615             :     { 0x1d770, 0x1d788 },
     616             :     { 0x1d78a, 0x1d7a8 },
     617             :     { 0x1d7aa, 0x1d7c2 },
     618             :     { 0x1d7c4, 0x1d7cb },
     619             :     { 0x1d7ce, 0x1d7ff },
     620             :     { 0x1ee00, 0x1ee03 },
     621             :     { 0x1ee05, 0x1ee1f },
     622             :     { 0x1ee21, 0x1ee22 },
     623             :     { 0x1ee24, 0x1ee24 },
     624             :     { 0x1ee27, 0x1ee27 },
     625             :     { 0x1ee29, 0x1ee32 },
     626             :     { 0x1ee34, 0x1ee37 },
     627             :     { 0x1ee39, 0x1ee39 },
     628             :     { 0x1ee3b, 0x1ee3b },
     629             :     { 0x1ee42, 0x1ee42 },
     630             :     { 0x1ee47, 0x1ee47 },
     631             :     { 0x1ee49, 0x1ee49 },
     632             :     { 0x1ee4b, 0x1ee4b },
     633             :     { 0x1ee4d, 0x1ee4f },
     634             :     { 0x1ee51, 0x1ee52 },
     635             :     { 0x1ee54, 0x1ee54 },
     636             :     { 0x1ee57, 0x1ee57 },
     637             :     { 0x1ee59, 0x1ee59 },
     638             :     { 0x1ee5b, 0x1ee5b },
     639             :     { 0x1ee5d, 0x1ee5d },
     640             :     { 0x1ee5f, 0x1ee5f },
     641             :     { 0x1ee61, 0x1ee62 },
     642             :     { 0x1ee64, 0x1ee64 },
     643             :     { 0x1ee67, 0x1ee6a },
     644             :     { 0x1ee6c, 0x1ee72 },
     645             :     { 0x1ee74, 0x1ee77 },
     646             :     { 0x1ee79, 0x1ee7c },
     647             :     { 0x1ee7e, 0x1ee7e },
     648             :     { 0x1ee80, 0x1ee89 },
     649             :     { 0x1ee8b, 0x1ee9b },
     650             :     { 0x1eea1, 0x1eea3 },
     651             :     { 0x1eea5, 0x1eea9 },
     652             :     { 0x1eeab, 0x1eebb },
     653             :     { 0x1eef0, 0x1eef1 },
     654             :     { 0x20000, 0x2a6d6 },
     655             :     { 0x2a700, 0x2b734 },
     656             :     { 0x2b740, 0x2b81d },
     657             :     { 0x2f800, 0x2fa1d },
     658             :     { 0xe0100, 0xe01ef }
     659             : };
     660             : 
     661             : /** \brief The size of the character table.
     662             :  *
     663             :  * When defining the type of a character, the Lexer uses the
     664             :  * character table. This parameter defines the number of
     665             :  * entries defined in the table.
     666             :  */
     667             : size_t const g_identifier_characters_size = sizeof(g_identifier_characters) / sizeof(g_identifier_characters[0]);
     668             : 
     669             : 
     670             : }
     671             : // no name namespace
     672             : 
     673             : /**********************************************************************/
     674             : /**********************************************************************/
     675             : /***  PARSER CREATOR  *************************************************/
     676             : /**********************************************************************/
     677             : /**********************************************************************/
     678             : 
     679             : 
     680             : /** \brief Initialize the lexer object.
     681             :  *
     682             :  * The constructor of the Lexer expect a valid pointer of an Input
     683             :  * stream.
     684             :  *
     685             :  * It optionally accepts an Options pointer. If the pointer is null,
     686             :  * then all the options are assumed to be set to zero (0). So all
     687             :  * extensions are turned off.
     688             :  *
     689             :  * \param[in] input  The input stream.
     690             :  * \param[in] options  A set of options, may be null.
     691             :  */
     692     5995342 : Lexer::Lexer(Input::pointer_t input, Options::pointer_t options)
     693             :     : f_input(input)
     694     5995345 :     , f_options(options)
     695             :     //, f_char_type(CHAR_NO_FLAGS) -- auto-init
     696             :     //, f_position() -- auto-init
     697             :     //, f_result_type(NODE_UNKNOWN) -- auto-init
     698             :     //, f_result_string("") -- auto-init
     699             :     //, f_result_int64(0) -- auto-init
     700             :     //, f_result_float64(0.0) -- auto-init
     701             : {
     702     5995342 :     if(!f_input)
     703             :     {
     704           2 :         throw exception_invalid_data("The 'input' pointer cannot be null in the Lexer() constructor.");
     705             :     }
     706     5995340 :     if(!f_options)
     707             :     {
     708           1 :         throw exception_invalid_data("The 'options' pointer cannot be null in the Lexer() constructor.");
     709             :     }
     710     5995339 : }
     711             : 
     712             : 
     713             : 
     714             : /** \brief Retrieve the input stream pointer.
     715             :  *
     716             :  * This function returns the input stream pointer of the Lexer object.
     717             :  *
     718             :  * \return The input pointer as specified when creating the Lexer object.
     719             :  */
     720     7142572 : Input::pointer_t Lexer::get_input() const
     721             : {
     722     7142572 :     return f_input;
     723             : }
     724             : 
     725             : 
     726             : /** \brief Retrieve the next character of input.
     727             :  *
     728             :  * This function reads one character of input and returns it.
     729             :  *
     730             :  * If the character is a newline, linefeed, etc. it affects the current
     731             :  * line number, page number, etc. as required. The following characters
     732             :  * have such an effect:
     733             :  *
     734             :  * \li '\\n' -- the newline character adds a new line
     735             :  * \li '\\r' -- the carriage return character adds a new line; if followed
     736             :  *              by a '\n', remove it too; always return '\\n' and not '\\r'
     737             :  * \li '\\f' -- the formfeed adds a new page
     738             :  * \li LINE SEPARATOR (0x2028) -- add a new line
     739             :  * \li PARAGRAPH SEPARATOR (0x2029) -- add a new paragraph
     740             :  *
     741             :  * If the ungetc() function was called before a call to getc(), then
     742             :  * that last character is returned instead of a new character from the
     743             :  * input stream. In that case, the character has no effect on the line
     744             :  * number, page number, etc.
     745             :  *
     746             :  * \internal
     747             :  *
     748             :  * \return The next Unicode character.
     749             :  */
     750   305178085 : Input::char_t Lexer::getc()
     751             : {
     752             :     Input::char_t c;
     753             : 
     754             :     // if some characters were ungotten earlier, re-read those first
     755             :     // and avoid any side effects on the position... (which means
     756             :     // we could be a bit off, but the worst case is for regular expressions
     757             :     // and assuming the regular expression is valid, it will not be a
     758             :     // problem either...)
     759   305178085 :     if(!f_unget.empty())
     760             :     {
     761    25429496 :         c = f_unget.back();
     762    25429496 :         f_unget.pop_back();
     763    25429496 :         f_char_type = char_type(c);
     764             :     }
     765             :     else
     766             :     {
     767   279748589 :         c = f_input->getc();
     768             : 
     769   279748589 :         f_char_type = char_type(c);
     770   279748589 :         if((f_char_type & (CHAR_LINE_TERMINATOR | CHAR_WHITE_SPACE)) != 0)
     771             :         {
     772             :             // Unix (Linux, Mac OS/X, HP-UX, SunOS, etc.) uses '\n'
     773             :             // Microsoft (MS-DOS, MS-Windows) uses '\r\n'
     774             :             // Macintosh (OS 1 to OS 9, and Apple 1,2,3) uses '\r'
     775    30462705 :             switch(c)
     776             :             {
     777             :             case '\n':   // LINE FEED (LF)
     778             :                 // '\n' represents a newline
     779     4626247 :                 f_input->get_position().new_line();
     780     4626247 :                 break;
     781             : 
     782             :             case '\r':   // CARRIAGE RETURN (CR)
     783             :                 // skip '\r\n' as one newline
     784             :                 // also in case we are on Mac, skip each '\r' as one newline
     785       90518 :                 f_input->get_position().new_line();
     786       90518 :                 c = f_input->getc();
     787       90518 :                 if(c != '\n') // if '\n' follows, skip it silently
     788             :                 {
     789       45221 :                     ungetc(c);
     790             :                 }
     791       90518 :                 c = '\n';
     792       90518 :                 break;
     793             : 
     794             :             case '\f':   // FORM FEED (FF)
     795             :                 // view the form feed as a new page for now...
     796         258 :                 f_input->get_position().new_page();
     797         258 :                 break;
     798             : 
     799             :             //case 0x0085: // NEXT LINE (NEL) -- not in ECMAScript 5
     800             :             //    // 
     801             :             //    f_input->get_position().new_line();
     802             :             //    break;
     803             : 
     804             :             case 0x2028: // LINE SEPARATOR (LSEP)
     805      109039 :                 f_input->get_position().new_line();
     806      109039 :                 break;
     807             : 
     808             :             case 0x2029: // PARAGRAPH SEPARATOR (PSEP)
     809       45628 :                 f_input->get_position().new_paragraph();
     810       45628 :                 break;
     811             : 
     812             :             }
     813             :         }
     814             :     }
     815             : 
     816   305178085 :     return c;
     817             : }
     818             : 
     819             : 
     820             : /** \brief Unget a character.
     821             :  *
     822             :  * Whenever reading a token, it is most often that the end of the token
     823             :  * is discovered by reading one too many character. This function is
     824             :  * used to push that character back in the input stream.
     825             :  *
     826             :  * Also the stream implementation also includes an unget, we do not use
     827             :  * that unget. The reason is that the getc() function needs to know
     828             :  * whether the character is a brand new character from that input stream
     829             :  * or the last ungotten character. The difference is important to know
     830             :  * whether the character has to have an effect on the line number,
     831             :  * page number, etc.
     832             :  *
     833             :  * The getc() function first returns the last character sent via
     834             :  * ungetc() (i.e. LIFO).
     835             :  *
     836             :  * \internal
     837             :  *
     838             :  * \param[in] c  The input character to "push back in the stream".
     839             :  */
     840    26588154 : void Lexer::ungetc(Input::char_t c)
     841             : {
     842             :     // WARNING: we do not use the f_input ungetc() because otherwise
     843             :     //          it would count lines, paragraphs, or pages twice,
     844             :     //          which would be a problem...
     845    26588154 :     if(c > 0 && c < 0x110000)
     846             :     {
     847             :         // unget only if not an invalid characters (especially not EOF)
     848    25435658 :         f_unget.push_back(c);
     849             :     }
     850    26588154 : }
     851             : 
     852             : 
     853             : /** \brief Determine the type of a character.
     854             :  *
     855             :  * This function determines the type of a character.
     856             :  *
     857             :  * The function first uses a switch for most of the characters used in
     858             :  * JavaScript are ASCII characters and thus are well defined and can
     859             :  * have their type defined in a snap.
     860             :  *
     861             :  * Unicode characters make use of a table to convert the character in
     862             :  * a type. Unicode character are either viewed as letters (CHAR_LETTER)
     863             :  * or as punctuation (CHAR_PUNCTUATION).
     864             :  *
     865             :  * The exceptions are the characters viewed as either line terminators
     866             :  * or white space characters. Those are captured by the switch.
     867             :  *
     868             :  * \important
     869             :  * Each character type is is a flag that can be used to check whether
     870             :  * the character is of a certain category, or a set of categories all
     871             :  * at once (i.e. (CHAR_LETTER | CHAR_DIGIT) means any character which
     872             :  * represents a letter or a digit.)
     873             :  *
     874             :  * \internal
     875             :  *
     876             :  * \param[in] c  The character of which the type is to be determined.
     877             :  *
     878             :  * \return The character type (one of the CHAR_...)
     879             :  */
     880   305384756 : Lexer::char_type_t Lexer::char_type(Input::char_t c)
     881             : {
     882   305384756 :     switch(c) {
     883             :     case '\0':   // NULL (NUL)
     884             :     case String::STRING_CONTINUATION: // ( '\' + line terminator )
     885           2 :         return CHAR_INVALID;
     886             : 
     887             :     case '\n':   // LINE FEED (LF)
     888             :     case '\r':   // CARRIAGE RETURN (CR)
     889             :     //case 0x0085: // NEXT LINE (NEL) -- not in ECMAScript 5
     890             :     case 0x2028: // LINE SEPARATOR (LSEP)
     891             :     case 0x2029: // PARAGRAPH SEPARATOR (PSEP)
     892     5150862 :         return CHAR_LINE_TERMINATOR;
     893             : 
     894             :     case '\t':   // CHARACTER TABULATION (HT)
     895             :     case '\v':   // LINE TABULATION (VT)
     896             :     case '\f':   // FORM FEED (FF)
     897             :     case ' ':    // SPACE (SP)
     898             :     case 0x00A0: // NO-BREAK SPACE
     899             :     case 0x1680: // OGHAM SPACE MARK
     900             :     case 0x180E: // MOGOLIAN VOWEL SEPARATOR (MVS)
     901             :     case 0x2000: // EN QUAD (NQSP)
     902             :     case 0x2001: // EM QUAD (MQSP)
     903             :     case 0x2002: // EN SPACE (EMSP)
     904             :     case 0x2003: // EM SPACE (ENSP)
     905             :     case 0x2004: // THREE-PER-EM SPACE (3/MSP)
     906             :     case 0x2005: // FOUR-PER-EM SPACE (4/MSP)
     907             :     case 0x2006: // SIX-PER-EM SPACE (6/MSP)
     908             :     case 0x2007: // FIGURE SPACE (FSP)
     909             :     case 0x2008: // PUNCTUATION SPACE (PSP)
     910             :     case 0x2009: // THIN SPACE (THSP)
     911             :     case 0x200A: // HAIR SPACE HSP)
     912             :     //case 0x200B: // ZERO WIDTH SPACE (ZWSP) -- this was accepted before, but it is not marked as a Zs category
     913             :     case 0x202F: // NARROW NO-BREAK SPACE (NNBSP)
     914             :     case 0x205F: // MEDIUM MATHEMATICAL SPACE (MMSP)
     915             :     case 0x3000: // IDEOGRAPHIC SPACE (IDSP)
     916             :     case 0xFEFF: // BYTE ORDER MARK (BOM)
     917    36636157 :         return CHAR_WHITE_SPACE;
     918             : 
     919             :     case '0': // '0' ... '9'
     920             :     case '1':
     921             :     case '2':
     922             :     case '3':
     923             :     case '4':
     924             :     case '5':
     925             :     case '6':
     926             :     case '7':
     927             :     case '8':
     928             :     case '9':
     929    15775098 :         return CHAR_DIGIT | CHAR_HEXDIGIT;
     930             : 
     931             :     case 'a': // 'a' ... 'f'
     932             :     case 'b':
     933             :     case 'c':
     934             :     case 'd':
     935             :     case 'e':
     936             :     case 'f':
     937             :     case 'A': // 'A' ... 'F'
     938             :     case 'B':
     939             :     case 'C':
     940             :     case 'D':
     941             :     case 'E':
     942             :     case 'F':
     943    52027066 :         return CHAR_LETTER | CHAR_HEXDIGIT;
     944             : 
     945             :     case '_':
     946             :     case '$':
     947     4464566 :         return CHAR_LETTER;
     948             : 
     949             :     default:
     950   191331005 :         if((c >= 'g' && c <= 'z')
     951    90227775 :         || (c >= 'G' && c <= 'Z'))
     952             :         {
     953   105864841 :             return CHAR_LETTER;
     954             :         }
     955    85466164 :         if((c & 0x0FFFF) >= 0xFFFE
     956    80762116 :         || (c >= 0xD800 && c <= 0xDFFF))
     957             :         {
     958             :             // 0xFFFE and 0xFFFF are invalid in all planes
     959             :             // surrogates are not valid standalone characters
     960     4709944 :             return CHAR_INVALID;
     961             :         }
     962    80756220 :         if(c < 0x7F)
     963             :         {
     964    61628540 :             return CHAR_PUNCTUATION;
     965             :         }
     966             :         // TODO: this will be true in most cases, but not always!
     967             :         //       documentation says:
     968             :         //          Uppercase letter (Lu)
     969             :         //          Lowercase letter (Ll)
     970             :         //          Titlecase letter (Lt)
     971             :         //          Modifier letter (Lm)
     972             :         //          Other letter (Lo)
     973             :         //          Letter number (Nl)
     974             :         //          Non-spacing mark (Mn)
     975             :         //          Combining spacing mark (Mc)
     976             :         //          Decimal number (Nd)
     977             :         //          Connector punctuation (Pc)
     978             :         //          ZWNJ
     979             :         //          ZWJ
     980             :         {
     981             :             size_t i, j, p;
     982             :             int    r;
     983             : 
     984    19127680 :             i = 0;
     985    19127680 :             j = g_identifier_characters_size;
     986   159720315 :             while(i < j)
     987             :             {
     988   135049422 :                 p = (j - i) / 2 + i;
     989   135049422 :                 if(g_identifier_characters[p].f_min <= c && c <= g_identifier_characters[p].f_max)
     990             :                 {
     991    13584467 :                     return CHAR_LETTER;
     992             :                 }
     993   121464955 :                 r = g_identifier_characters[p].f_min - c;
     994   121464955 :                 if(r < 0)
     995             :                 {
     996    76307012 :                     i = p + 1;
     997             :                 }
     998             :                 else
     999             :                 {
    1000    45157943 :                     j = p;
    1001             :                 }
    1002             :             }
    1003             :         }
    1004     5543213 :         return CHAR_PUNCTUATION;
    1005             : 
    1006             :     }
    1007             :     /*NOTREACHED*/
    1008             : }
    1009             : 
    1010             : 
    1011             : 
    1012             : 
    1013             : /** \brief Read an hexadecimal number.
    1014             :  *
    1015             :  * This function reads 0's and 1's up until another character is found
    1016             :  * or \p max digits were read. That other character is ungotten so the
    1017             :  * next call to getc() will return that non-binary character.
    1018             :  *
    1019             :  * Since the function is called without an introducing digit, the
    1020             :  * number could end up being empty. If that happens, an error is
    1021             :  * generated and the function returns -1 (although -1 is a valid
    1022             :  * number assuming you accept all 64 bits.)
    1023             :  *
    1024             :  * \internal
    1025             :  *
    1026             :  * \param[in] max  The maximum number of digits to read.
    1027             :  *
    1028             :  * \return The number just read as an integer (64 bit).
    1029             :  */
    1030     1395195 : int64_t Lexer::read_hex(unsigned long max)
    1031             : {
    1032     1395195 :     int64_t result(0);
    1033     1395195 :     Input::char_t c(getc());
    1034     1395195 :     unsigned long p(0);
    1035    12121266 :     for(; (f_char_type & CHAR_HEXDIGIT) != 0 && p < max; ++p)
    1036             :     {
    1037    10726071 :         if(c <= '9')
    1038             :         {
    1039     8288737 :             result = result * 16 + c - '0';
    1040             :         }
    1041     2437334 :         else if(c <= 'F')
    1042             :         {
    1043        8780 :             result = result * 16 + c - ('A' - 10);
    1044             :         }
    1045     2428554 :         else if(c <= 'f')
    1046             :         {
    1047     2428554 :             result = result * 16 + c - ('a' - 10);
    1048             :         }
    1049    10726071 :         c = getc();
    1050             :     }
    1051     1395195 :     ungetc(c);
    1052             : 
    1053     1395195 :     if(p == 0)
    1054             :     {
    1055           2 :         Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_INVALID_NUMBER, f_input->get_position());
    1056           2 :         msg << "invalid hexadecimal number, at least one digit is required";
    1057           2 :         return -1;
    1058             :     }
    1059             : 
    1060             :     // TODO: In strict mode, should we check whether we got p == max?
    1061             :     // WARNING: this is also used by the ReadNumber() function
    1062             : 
    1063     1395193 :     return result;
    1064             : }
    1065             : 
    1066             : 
    1067             : /** \brief Read a binary number.
    1068             :  *
    1069             :  * This function reads 0's and 1's up until another character is found
    1070             :  * or \p max digits were read. That other character is ungotten so the
    1071             :  * next call to getc() will return that non-binary character.
    1072             :  *
    1073             :  * Since the function is called without an introducing digit, the
    1074             :  * number could end up being empty. If that happens, an error is
    1075             :  * generated and the function returns -1 (although -1 is a valid
    1076             :  * number assuming you accept all 64 bits.)
    1077             :  *
    1078             :  * \internal
    1079             :  *
    1080             :  * \param[in] max  The maximum number of digits to read.
    1081             :  *
    1082             :  * \return The number just read as an integer (64 bit).
    1083             :  */
    1084        4108 : int64_t Lexer::read_binary(unsigned long max)
    1085             : {
    1086        4108 :     int64_t result(0);
    1087        4108 :     Input::char_t c(getc());
    1088        4108 :     unsigned long p(0);
    1089       55410 :     for(; (c == '0' || c == '1') && p < max; ++p)
    1090             :     {
    1091       51302 :         result = result * 2 + c - '0';
    1092       51302 :         c = getc();
    1093             :     }
    1094        4108 :     ungetc(c);
    1095             : 
    1096        4108 :     if(p == 0)
    1097             :     {
    1098           2 :         Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_INVALID_NUMBER, f_input->get_position());
    1099           2 :         msg << "invalid binary number, at least one digit is required";
    1100           2 :         return -1;
    1101             :     }
    1102             : 
    1103        4106 :     return result;
    1104             : }
    1105             : 
    1106             : 
    1107             : /** \brief Read an octal number.
    1108             :  *
    1109             :  * This function reads octal digits up until a character other than a
    1110             :  * valid octal digit or \p max digits were read. That character is
    1111             :  * ungotten so the next call to getc() will return that non-octal
    1112             :  * character.
    1113             :  *
    1114             :  * \internal
    1115             :  *
    1116             :  * \param[in] c  The character that triggered a call to read_octal().
    1117             :  * \param[in] max  The maximum number of digits to read.
    1118             :  *
    1119             :  * \return The number just read as an integer (64 bit).
    1120             :  */
    1121        2320 : int64_t Lexer::read_octal(Input::char_t c, unsigned long max)
    1122             : {
    1123        2320 :     int64_t result(c - '0');
    1124        2320 :     c = getc();
    1125        6876 :     for(unsigned long p(1); c >= '0' && c <= '7' && p < max; ++p, c = getc())
    1126             :     {
    1127        4556 :         result = result * 8 + c - '0';
    1128             :     }
    1129        2320 :     ungetc(c);
    1130             : 
    1131        2320 :     return result;
    1132             : }
    1133             : 
    1134             : 
    1135             : /** \brief Read characters representing an escape sequence.
    1136             :  *
    1137             :  * This function reads the next few characters transforming them in one
    1138             :  * escape sequence character.
    1139             :  *
    1140             :  * Some characters are extensions and require the extended escape
    1141             :  * sequences to be turned on in order to be accepted. These are marked
    1142             :  * as an extension in the list below.
    1143             :  *
    1144             :  * The function supports:
    1145             :  *
    1146             :  * \li \\u#### -- the 4 digit Unicode character
    1147             :  * \li \\U######## -- the 8 digit Unicode character, this is an extension
    1148             :  * \li \\x## or \\X## -- the 2 digit ISO-8859-1 character
    1149             :  * \li \\' -- escape the single quote (') character
    1150             :  * \li \\" -- escape the double quote (") character
    1151             :  * \li \\\\ -- escape the backslash (\) character
    1152             :  * \li \\b -- the backspace character
    1153             :  * \li \\e -- the escape character, this is an extension
    1154             :  * \li \\f -- the formfeed character
    1155             :  * \li \\n -- the newline character
    1156             :  * \li \\r -- the carriage return character
    1157             :  * \li \\t -- the tab character
    1158             :  * \li \\v -- the vertical tab character
    1159             :  * \li \\\<newline> or \\\<#x2028> or \\\<#x2029> -- continuation characters
    1160             :  * \li \\### -- 1 to 3 octal digit ISO-8859-1 character, this is an extension
    1161             :  * \li \\0 -- the NUL character
    1162             :  *
    1163             :  * Any other character generates an error message if appearing after a
    1164             :  * backslash (\).
    1165             :  *
    1166             :  * \internal
    1167             :  *
    1168             :  * \param[in] accept_continuation  Whether the backslash + newline combination
    1169             :  *                                 is acceptable in this token.
    1170             :  *
    1171             :  * \return The escape character if valid, '?' otherwise.
    1172             :  */
    1173     2539534 : Input::char_t Lexer::escape_sequence(bool accept_continuation)
    1174             : {
    1175     2539534 :     Input::char_t c(getc());
    1176     2539534 :     switch(c)
    1177             :     {
    1178             :     case 'u':
    1179             :         // 4 hex digits
    1180       97039 :         return read_hex(4);
    1181             : 
    1182             :     case 'U':
    1183             :         // We support full Unicode without the need for the programmer to
    1184             :         // encode his characters in UTF-16 by hand! The compiler spits out
    1185             :         // the characters using two '\uXXXX' characters.
    1186     1285376 :         if(has_option_set(Options::option_t::OPTION_EXTENDED_ESCAPE_SEQUENCES))
    1187             :         {
    1188             :             // 8 hex digits
    1189     1285375 :             return read_hex(8);
    1190             :         }
    1191           1 :         break;
    1192             : 
    1193             :     case 'x':
    1194             :     case 'X':
    1195             :         // 2 hex digits
    1196         325 :         return read_hex(2);
    1197             : 
    1198             :     case '\'':
    1199             :     case '\"':
    1200             :     case '\\':
    1201       13746 :         return c;
    1202             : 
    1203             :     case 'b':
    1204        4104 :         return '\b';
    1205             : 
    1206             :     case 'e':
    1207        4096 :         if(has_option_set(Options::option_t::OPTION_EXTENDED_ESCAPE_SEQUENCES))
    1208             :         {
    1209        2048 :             return '\033';
    1210             :         }
    1211        2048 :         break;
    1212             : 
    1213             :     case 'f':
    1214        4101 :         return '\f';
    1215             : 
    1216             :     case 'n':
    1217        5114 :         return '\n';
    1218             : 
    1219             :     case 'r':
    1220        4105 :         return '\r';
    1221             : 
    1222             :     case 't':
    1223        4106 :         return '\t';
    1224             : 
    1225             :     case 'v':
    1226        4096 :         return '\v';
    1227             : 
    1228             :     case '\n':
    1229             :     case 0x2028:
    1230             :     case 0x2029:
    1231        1015 :         if(accept_continuation)
    1232             :         {
    1233        1014 :             return String::STRING_CONTINUATION;
    1234             :         }
    1235             :         // make sure line terminators do not get skipped
    1236           1 :         ungetc(c);
    1237           1 :         break;
    1238             : 
    1239             :     default:
    1240     1112311 :         if(has_option_set(Options::option_t::OPTION_EXTENDED_ESCAPE_SEQUENCES))
    1241             :         {
    1242         256 :             if(c >= '0' && c <= '7')
    1243             :             {
    1244         256 :                 return read_octal(c, 3);
    1245             :             }
    1246             :         }
    1247             :         else
    1248             :         {
    1249     1112055 :             if(c == '0')
    1250             :             {
    1251          10 :                 return '\0';
    1252             :             }
    1253             :         }
    1254     1112045 :         break;
    1255             : 
    1256             :     }
    1257             : 
    1258     1114095 :     if(c > ' ' && c < 0x7F)
    1259             :     {
    1260        2128 :         Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNKNOWN_ESCAPE_SEQUENCE, f_input->get_position());
    1261        2128 :         msg << "unknown escape letter '" << static_cast<char>(c) << "'";
    1262             :     }
    1263             :     else
    1264             :     {
    1265     1111967 :         Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNKNOWN_ESCAPE_SEQUENCE, f_input->get_position());
    1266     1111967 :         msg << "unknown escape letter '\\U" << std::hex << std::setfill('0') << std::setw(8) << static_cast<int32_t>(c) << "'";
    1267             :     }
    1268             : 
    1269     1114095 :     return '?';
    1270             : }
    1271             : 
    1272             : 
    1273             : /** \brief Read a set of characters as defined by \p flags.
    1274             :  *
    1275             :  * This function reads all the characters as long as their type match
    1276             :  * the specified flags. The result is saved in the \p str parameter.
    1277             :  *
    1278             :  * At the time the function is called, \p c is expected to be the first
    1279             :  * character to be added to \p str.
    1280             :  *
    1281             :  * The first character that does not satisfy the flags is pushed back
    1282             :  * in the input stream so one can call getc() again to retrieve it.
    1283             :  *
    1284             :  * \param[in] c  The character that prompted this call and which ends up
    1285             :  *               first in \p str.
    1286             :  * \param[in] flags  The flags that must match each character, including
    1287             :  *                   \p c character type.
    1288             :  * \param[in,out] str  The resulting string. It is expected to be empty on
    1289             :  *                     call but does not need to (it does not get cleared.)
    1290             :  *
    1291             :  * \internal
    1292             :  *
    1293             :  * \return The next character, although it was also ungotten.
    1294             :  */
    1295     3496216 : Input::char_t Lexer::read(Input::char_t c, char_type_t flags, String& str)
    1296             : {
    1297     3496216 :     do
    1298             :     {
    1299     3496216 :         if((f_char_type & CHAR_INVALID) == 0)
    1300             :         {
    1301     3496216 :             str += c;
    1302             :         }
    1303     3496216 :         c = getc();
    1304             :     }
    1305     3496216 :     while((f_char_type & flags) != 0 && c >= 0);
    1306             : 
    1307     2105539 :     ungetc(c);
    1308             : 
    1309     2105539 :     return c;
    1310             : }
    1311             : 
    1312             : 
    1313             : 
    1314             : /** \brief Read an identifier.
    1315             :  *
    1316             :  * This function reads an identifier and checks whether that identifier
    1317             :  * is a keyword.
    1318             :  *
    1319             :  * The list of reserved keywords has defined in ECMAScript is defined
    1320             :  * below. Note that includes all versions (1 through 5) and we mark
    1321             :  * all of these identifiers as keywords and we are NOT flexible at
    1322             :  * all with those. (i.e. JavaScript allows for keywords to be used
    1323             :  * as object field names as in 'myObj.break = 123;' and we do not.)
    1324             :  *
    1325             :  * \li abstract
    1326             :  * \li boolean
    1327             :  * \li break
    1328             :  * \li byte
    1329             :  * \li case
    1330             :  * \li catch
    1331             :  * \li char
    1332             :  * \li class
    1333             :  * \li const
    1334             :  * \li continue
    1335             :  * \li debugger
    1336             :  * \li default
    1337             :  * \li delete
    1338             :  * \li do
    1339             :  * \li double
    1340             :  * \li else
    1341             :  * \li enum
    1342             :  * \li export
    1343             :  * \li extends
    1344             :  * \li false
    1345             :  * \li final
    1346             :  * \li finally
    1347             :  * \li float
    1348             :  * \li for
    1349             :  * \li function
    1350             :  * \li goto
    1351             :  * \li if
    1352             :  * \li implements
    1353             :  * \li import
    1354             :  * \li in
    1355             :  * \li int
    1356             :  * \li instanceof
    1357             :  * \li interface
    1358             :  * \li let
    1359             :  * \li long
    1360             :  * \li native
    1361             :  * \li new
    1362             :  * \li null
    1363             :  * \li package
    1364             :  * \li private
    1365             :  * \li protected
    1366             :  * \li public
    1367             :  * \li return
    1368             :  * \li short
    1369             :  * \li static
    1370             :  * \li super
    1371             :  * \li switch
    1372             :  * \li synchronized
    1373             :  * \li this
    1374             :  * \li throw
    1375             :  * \li throws
    1376             :  * \li transient
    1377             :  * \li true
    1378             :  * \li try
    1379             :  * \li typeof
    1380             :  * \li var
    1381             :  * \li void
    1382             :  * \li volatile
    1383             :  * \li while
    1384             :  * \li with
    1385             :  * \li yield
    1386             :  *
    1387             :  * The function sets the f_result_type and f_result_string as required.
    1388             :  *
    1389             :  * We also understand additional keywords as defined here:
    1390             :  *
    1391             :  * \li as -- from ActionScript, to do a cast
    1392             :  * \li is -- from ActionScript, to check a value type
    1393             :  * \li namespace -- to encompass many declarations in a namespace
    1394             :  * \li use -- to avoid having to declare certain namespaces, declare number
    1395             :  *            types, change pragma (options) value
    1396             :  *
    1397             :  * We also support the special names:
    1398             :  *
    1399             :  * \li Infinity, which is supposed to be a global variable
    1400             :  * \li NaN, which is supposed to be a global variable
    1401             :  * \li undefined, which is supposed to never be defined
    1402             :  * \li __FILE__, which gets transformed to the filename of the input stream
    1403             :  * \li __LINE__, which gets transformed to the current line number
    1404             :  *
    1405             :  * \internal
    1406             :  *
    1407             :  * \param[in] c  The current character representing the first identifier character.
    1408             :  */
    1409    17282255 : void Lexer::read_identifier(Input::char_t c)
    1410             : {
    1411             :     // identifiers support character escaping like strings
    1412             :     // so we have a special identifier read instead of
    1413             :     // calling the read() function
    1414    17282255 :     String str;
    1415             :     for(;;)
    1416             :     {
    1417             :         // here escaping is not used to insert invalid characters
    1418             :         // in a literal, but instead to add characters that
    1419             :         // could otherwise be difficult to type (or possibly
    1420             :         // difficult to share between users).
    1421             :         //
    1422             :         // so we immediately manage the backslash and use the
    1423             :         // character type of the escape character!
    1424   120425041 :         if(c == '\\')
    1425             :         {
    1426      206667 :             c = escape_sequence(false);
    1427      206667 :             f_char_type = char_type(c);
    1428      206667 :             if((f_char_type & (CHAR_LETTER | CHAR_DIGIT)) == 0 || c < 0)
    1429             :             {
    1430             :                 // do not unget() this character...
    1431           1 :                 break;
    1432             :             }
    1433             :         }
    1434   120218374 :         else if((f_char_type & (CHAR_LETTER | CHAR_DIGIT)) == 0 || c < 0)
    1435             :         {
    1436             :             // unget this character
    1437    17282254 :             ungetc(c);
    1438    17282254 :             break;
    1439             :         }
    1440   103142786 :         if((f_char_type & CHAR_INVALID) == 0)
    1441             :         {
    1442   103142786 :             str += c;
    1443             :         }
    1444   103142786 :         c = getc();
    1445             :     }
    1446             : 
    1447             :     // An identifier can be a keyword, we check that right here!
    1448    17282255 :     size_t l(str.length());
    1449    17282255 :     if(l > 1)
    1450             :     {
    1451    15014183 :         as_char_t const *s(str.c_str());
    1452    15014183 :         switch(s[0])
    1453             :         {
    1454             :         case 'a':
    1455      589839 :             if(l == 8 && str == "abstract")
    1456             :             {
    1457       36865 :                 f_result_type = Node::node_t::NODE_ABSTRACT;
    1458       36865 :                 return;
    1459             :             }
    1460      552974 :             if(l == 2 && s[1] == 's')
    1461             :             {
    1462      102401 :                 f_result_type = Node::node_t::NODE_AS;
    1463      102401 :                 return;
    1464             :             }
    1465      450573 :             break;
    1466             : 
    1467             :         case 'b':
    1468      405511 :             if(l == 7 && str == "boolean")
    1469             :             {
    1470        4096 :                 f_result_type = Node::node_t::NODE_BOOLEAN;
    1471        4096 :                 return;
    1472             :             }
    1473      401415 :             if(l == 5 && str == "break")
    1474             :             {
    1475       53248 :                 f_result_type = Node::node_t::NODE_BREAK;
    1476       53248 :                 return;
    1477             :             }
    1478      348167 :             if(l == 4 && str == "byte")
    1479             :             {
    1480        4096 :                 f_result_type = Node::node_t::NODE_BYTE;
    1481        4096 :                 return;
    1482             :             }
    1483      344071 :             break;
    1484             : 
    1485             :         case 'c':
    1486     1171514 :             if(l == 4 && str == "case")
    1487             :             {
    1488       86016 :                 f_result_type = Node::node_t::NODE_CASE;
    1489       86016 :                 return;
    1490             :             }
    1491     1085498 :             if(l == 5 && str == "catch")
    1492             :             {
    1493       86016 :                 f_result_type = Node::node_t::NODE_CATCH;
    1494       86016 :                 return;
    1495             :             }
    1496      999482 :             if(l == 4 && str == "char")
    1497             :             {
    1498        4096 :                 f_result_type = Node::node_t::NODE_CHAR;
    1499        4096 :                 return;
    1500             :             }
    1501      995386 :             if(l == 5 && str == "class")
    1502             :             {
    1503      282642 :                 f_result_type = Node::node_t::NODE_CLASS;
    1504      282642 :                 return;
    1505             :             }
    1506      712744 :             if(l == 5 && str == "const")
    1507             :             {
    1508      192528 :                 f_result_type = Node::node_t::NODE_CONST;
    1509      192528 :                 return;
    1510             :             }
    1511      520216 :             if(l == 8 && str == "continue")
    1512             :             {
    1513       36864 :                 f_result_type = Node::node_t::NODE_CONTINUE;
    1514       36864 :                 return;
    1515             :             }
    1516      483352 :             break;
    1517             : 
    1518             :         case 'd':
    1519      528400 :             if(l == 8 && str == "debugger")
    1520             :             {
    1521       12288 :                 f_result_type = Node::node_t::NODE_DEBUGGER;
    1522       12288 :                 return;
    1523             :             }
    1524      516112 :             if(l == 7 && str == "default")
    1525             :             {
    1526       36864 :                 f_result_type = Node::node_t::NODE_DEFAULT;
    1527       36864 :                 return;
    1528             :             }
    1529      479248 :             if(l == 6 && str == "delete")
    1530             :             {
    1531        4096 :                 f_result_type = Node::node_t::NODE_DELETE;
    1532        4096 :                 return;
    1533             :             }
    1534      475152 :             if(l == 2 && s[1] == 'o')
    1535             :             {
    1536       45058 :                 f_result_type = Node::node_t::NODE_DO;
    1537       45058 :                 return;
    1538             :             }
    1539      430094 :             if(l == 6 && str == "double")
    1540             :             {
    1541        4097 :                 f_result_type = Node::node_t::NODE_DOUBLE;
    1542        4097 :                 return;
    1543             :             }
    1544      425997 :             break;
    1545             : 
    1546             :         case 'e':
    1547      782463 :             if(l == 4 && str == "else")
    1548             :             {
    1549       61443 :                 f_result_type = Node::node_t::NODE_ELSE;
    1550       61443 :                 return;
    1551             :             }
    1552      721020 :             if(l == 4 && str == "enum")
    1553             :             {
    1554      126977 :                 f_result_type = Node::node_t::NODE_ENUM;
    1555      126977 :                 return;
    1556             :             }
    1557      594043 :             if(l == 6 && str == "ensure")
    1558             :             {
    1559       28672 :                 f_result_type = Node::node_t::NODE_ENSURE;
    1560       28672 :                 return;
    1561             :             }
    1562      565371 :             if(l == 6 && str == "export")
    1563             :             {
    1564        4096 :                 f_result_type = Node::node_t::NODE_EXPORT;
    1565        4096 :                 return;
    1566             :             }
    1567      561275 :             if(l == 7 && str == "extends")
    1568             :             {
    1569       94225 :                 f_result_type = Node::node_t::NODE_EXTENDS;
    1570       94225 :                 return;
    1571             :             }
    1572      467050 :             break;
    1573             : 
    1574             :         case 'f':
    1575     1425791 :             if(l == 5 && str == "false")
    1576             :             {
    1577       77861 :                 f_result_type = Node::node_t::NODE_FALSE;
    1578       77861 :                 return;
    1579             :             }
    1580     1347930 :             if(l == 5 && str == "final")
    1581             :             {
    1582       69632 :                 f_result_type = Node::node_t::NODE_FINAL;
    1583       69632 :                 return;
    1584             :             }
    1585     1278298 :             if(l == 7 && str == "finally")
    1586             :             {
    1587       12288 :                 f_result_type = Node::node_t::NODE_FINALLY;
    1588       12288 :                 return;
    1589             :             }
    1590     1266010 :             if(l == 5 && str == "float")
    1591             :             {
    1592        4096 :                 f_result_type = Node::node_t::NODE_FLOAT;
    1593        4096 :                 return;
    1594             :             }
    1595     1261914 :             if(l == 3 && s[1] == 'o' && s[2] == 'r')
    1596             :             {
    1597      118785 :                 f_result_type = Node::node_t::NODE_FOR;
    1598      118785 :                 return;
    1599             :             }
    1600     1143129 :             if(l == 8 && str == "function")
    1601             :             {
    1602      954693 :                 f_result_type = Node::node_t::NODE_FUNCTION;
    1603      954693 :                 return;
    1604             :             }
    1605      188436 :             break;
    1606             : 
    1607             :         case 'g':
    1608      135192 :             if(l == 4 && str == "goto")
    1609             :             {
    1610       28672 :                 f_result_type = Node::node_t::NODE_GOTO;
    1611       28672 :                 return;
    1612             :             }
    1613      106520 :             break;
    1614             : 
    1615             :         case 'i':
    1616      941554 :             if(l == 2 && s[1] == 'f')
    1617             :             {
    1618       86022 :                 f_result_type = Node::node_t::NODE_IF;
    1619       86022 :                 return;
    1620             :             }
    1621      855532 :             if(l == 10 && str == "implements")
    1622             :             {
    1623       77824 :                 f_result_type = Node::node_t::NODE_IMPLEMENTS;
    1624       77824 :                 return;
    1625             :             }
    1626      777708 :             if(l == 6 && str == "import")
    1627             :             {
    1628      177275 :                 f_result_type = Node::node_t::NODE_IMPORT;
    1629      177275 :                 return;
    1630             :             }
    1631      600433 :             if(l == 2 && s[1] == 'n')
    1632             :             {
    1633      184663 :                 f_result_type = Node::node_t::NODE_IN;
    1634      184663 :                 return;
    1635             :             }
    1636      415770 :             if(l == 6 && str == "inline")
    1637             :             {
    1638        4096 :                 f_result_type = Node::node_t::NODE_INLINE;
    1639        4096 :                 return;
    1640             :             }
    1641      411674 :             if(l == 10 && str == "instanceof")
    1642             :             {
    1643       20480 :                 f_result_type = Node::node_t::NODE_INSTANCEOF;
    1644       20480 :                 return;
    1645             :             }
    1646      391194 :             if(l == 9 && str == "interface")
    1647             :             {
    1648       20480 :                 f_result_type = Node::node_t::NODE_INTERFACE;
    1649       20480 :                 return;
    1650             :             }
    1651      370714 :             if(l == 9 && str == "invariant")
    1652             :             {
    1653       45056 :                 f_result_type = Node::node_t::NODE_INVARIANT;
    1654       45056 :                 return;
    1655             :             }
    1656      325658 :             if(l == 2 && s[1] == 's')
    1657             :             {
    1658        4097 :                 f_result_type = Node::node_t::NODE_IS;
    1659        4097 :                 return;
    1660             :             }
    1661      321561 :             break;
    1662             : 
    1663             :         case 'I':
    1664      102674 :             if(l == 8 && str == "Infinity")
    1665             :             {
    1666             :                 // Note:
    1667             :                 //
    1668             :                 // JavaScript does NOT automaticlly see this identifier as
    1669             :                 // a number, so you can write statements such as:
    1670             :                 //
    1671             :                 //     var Infinity = 123;
    1672             :                 //
    1673             :                 // On our end, by immediately transforming that identifier
    1674             :                 // into a number, we at least prevent such strange syntax
    1675             :                 // and we do not have to "specially" handle "Infinity" when
    1676             :                 // encountering an identifier.
    1677             :                 //
    1678             :                 // However, JavaScript considers Infinity as a read-only
    1679             :                 // object defined in the global scope. It can also be
    1680             :                 // retrieved from Number as in:
    1681             :                 //
    1682             :                 //     Number.POSITIVE_INFINITY
    1683             :                 //     Number.NEGATIVE_INFINITY
    1684             :                 //
    1685             :                 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/Infinity
    1686             :                 //
    1687        4368 :                 f_result_type = Node::node_t::NODE_FLOAT64;
    1688        4368 :                 f_result_float64.set_infinity();
    1689        4368 :                 return;
    1690             :             }
    1691       98306 :             break;
    1692             : 
    1693             :         case 'l':
    1694      159763 :             if(l == 4 && str == "long")
    1695             :             {
    1696        4096 :                 f_result_type = Node::node_t::NODE_LONG;
    1697        4096 :                 return;
    1698             :             }
    1699      155667 :             break;
    1700             : 
    1701             :         case 'n':
    1702     1430489 :             if(l == 9 && str == "namespace")
    1703             :             {
    1704       69632 :                 f_result_type = Node::node_t::NODE_NAMESPACE;
    1705       69632 :                 return;
    1706             :             }
    1707     1360857 :             if(l == 6 && str == "native")
    1708             :             {
    1709       61451 :                 f_result_type = Node::node_t::NODE_NATIVE;
    1710       61451 :                 return;
    1711             :             }
    1712     1299406 :             if(l == 3 && s[1] == 'e' && s[2] == 'w')
    1713             :             {
    1714       20480 :                 f_result_type = Node::node_t::NODE_NEW;
    1715       20480 :                 return;
    1716             :             }
    1717     1278926 :             if(l == 4 && str == "null")
    1718             :             {
    1719       45092 :                 f_result_type = Node::node_t::NODE_NULL;
    1720       45092 :                 return;
    1721             :             }
    1722     1233834 :             break;
    1723             : 
    1724             :         case 'N':
    1725       46779 :             if(l == 3 && s[1] == 'a' && s[2] == 'N')
    1726             :             {
    1727             :                 // Note:
    1728             :                 //
    1729             :                 // JavaScript does NOT automatically see this identifier as
    1730             :                 // a number, so you can write statements such as:
    1731             :                 //
    1732             :                 //     var NaN = 123;
    1733             :                 //
    1734             :                 // On our end, by immediately transforming that identifier
    1735             :                 // into a number, we at least prevent such strange syntax
    1736             :                 // and we do not have to "specially" handle "NaN" when
    1737             :                 // encountering an identifier.
    1738             :                 //
    1739             :                 // However, JavaScript considers NaN as a read-only
    1740             :                 // object defined in the global scope. It can also be
    1741             :                 // retrieved from Number as in:
    1742             :                 //
    1743             :                 //     Number.NaN
    1744             :                 //
    1745             :                 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/NaN
    1746             :                 //
    1747        4290 :                 f_result_type = Node::node_t::NODE_FLOAT64;
    1748        4290 :                 f_result_float64.set_NaN();
    1749        4290 :                 return;
    1750             :             }
    1751       42489 :             break;
    1752             : 
    1753             :         case 'p':
    1754     1351707 :             if(l == 7 && str == "package")
    1755             :             {
    1756      135179 :                 f_result_type = Node::node_t::NODE_PACKAGE;
    1757      135179 :                 return;
    1758             :             }
    1759     1216528 :             if(l == 7 && str == "private")
    1760             :             {
    1761      299010 :                 f_result_type = Node::node_t::NODE_PRIVATE;
    1762      299010 :                 return;
    1763             :             }
    1764      917518 :             if(l == 9 && str == "protected")
    1765             :             {
    1766       77824 :                 f_result_type = Node::node_t::NODE_PROTECTED;
    1767       77824 :                 return;
    1768             :             }
    1769      839694 :             if(l == 6 && str == "public")
    1770             :             {
    1771      659456 :                 f_result_type = Node::node_t::NODE_PUBLIC;
    1772      659456 :                 return;
    1773             :             }
    1774      180238 :             break;
    1775             : 
    1776             :         case 'r':
    1777     1179669 :             if(l == 7 && str == "require")
    1778             :             {
    1779       28672 :                 f_result_type = Node::node_t::NODE_REQUIRE;
    1780       28672 :                 return;
    1781             :             }
    1782     1150997 :             if(l == 6 && str == "return")
    1783             :             {
    1784      266240 :                 f_result_type = Node::node_t::NODE_RETURN;
    1785      266240 :                 return;
    1786             :             }
    1787      884757 :             break;
    1788             : 
    1789             :         case 's':
    1790      839786 :             if(l == 5 && str == "short")
    1791             :             {
    1792        4096 :                 f_result_type = Node::node_t::NODE_SHORT;
    1793        4096 :                 return;
    1794             :             }
    1795      835690 :             if(l == 6 && str == "static")
    1796             :             {
    1797       36905 :                 f_result_type = Node::node_t::NODE_STATIC;
    1798       36905 :                 return;
    1799             :             }
    1800      798785 :             if(l == 5 && str == "super")
    1801             :             {
    1802        4096 :                 f_result_type = Node::node_t::NODE_SUPER;
    1803        4096 :                 return;
    1804             :             }
    1805      794689 :             if(l == 6 && str == "switch")
    1806             :             {
    1807       94208 :                 f_result_type = Node::node_t::NODE_SWITCH;
    1808       94208 :                 return;
    1809             :             }
    1810      700481 :             if(l == 12 && str == "synchronized")
    1811             :             {
    1812       36864 :                 f_result_type = Node::node_t::NODE_SYNCHRONIZED;
    1813       36864 :                 return;
    1814             :             }
    1815      663617 :             break;
    1816             : 
    1817             :         case 't':
    1818      737418 :             if(l == 4 && str == "then")
    1819             :             {
    1820       20480 :                 f_result_type = Node::node_t::NODE_THEN;
    1821       20480 :                 return;
    1822             :             }
    1823      716938 :             if(l == 4 && str == "this")
    1824             :             {
    1825      282624 :                 f_result_type = Node::node_t::NODE_THIS;
    1826      282624 :                 return;
    1827             :             }
    1828      434314 :             if(l == 5 && str == "throw")
    1829             :             {
    1830       28672 :                 f_result_type = Node::node_t::NODE_THROW;
    1831       28672 :                 return;
    1832             :             }
    1833      405642 :             if(l == 6 && str == "throws")
    1834             :             {
    1835       20480 :                 f_result_type = Node::node_t::NODE_THROWS;
    1836       20480 :                 return;
    1837             :             }
    1838      385162 :             if(l == 9 && str == "transient")
    1839             :             {
    1840       12288 :                 f_result_type = Node::node_t::NODE_TRANSIENT;
    1841       12288 :                 return;
    1842             :             }
    1843      372874 :             if(l == 4 && str == "true")
    1844             :             {
    1845       36955 :                 f_result_type = Node::node_t::NODE_TRUE;
    1846       36955 :                 return;
    1847             :             }
    1848      335919 :             if(l == 3 && s[1] == 'r' && s[2] == 'y')
    1849             :             {
    1850       20480 :                 f_result_type = Node::node_t::NODE_TRY;
    1851       20480 :                 return;
    1852             :             }
    1853      315439 :             if(l == 6 && str == "typeof")
    1854             :             {
    1855       45056 :                 f_result_type = Node::node_t::NODE_TYPEOF;
    1856       45056 :                 return;
    1857             :             }
    1858      270383 :             break;
    1859             : 
    1860             :         case 'u':
    1861      417953 :             if(l == 9 && str == "undefined")
    1862             :             {
    1863             :                 // Note: undefined is actually not a reserved keyword, but
    1864             :                 //       by reserving it, we avoid stupid mistakes like:
    1865             :                 //
    1866             :                 //       var undefined = 5;
    1867             :                 //
    1868             :                 // https://developer.mozilla.org/en-US/docs/Web/JavaScript/Reference/Global_Objects/undefined
    1869             :                 //
    1870        4161 :                 f_result_type = Node::node_t::NODE_UNDEFINED;
    1871        4161 :                 return;
    1872             :             }
    1873      413792 :             if(l == 3 && s[1] == 's' && s[2] == 'e')
    1874             :             {
    1875      192605 :                 f_result_type = Node::node_t::NODE_USE;
    1876      192605 :                 return;
    1877             :             }
    1878      221187 :             break;
    1879             : 
    1880             :         case 'v':
    1881     1135132 :             if(l == 3 && s[1] == 'a' && s[2] == 'r')
    1882             :             {
    1883      414082 :                 f_result_type = Node::node_t::NODE_VAR;
    1884      414082 :                 return;
    1885             :             }
    1886      721050 :             if(l == 4 && str == "void")
    1887             :             {
    1888       86016 :                 f_result_type = Node::node_t::NODE_VOID;
    1889       86016 :                 return;
    1890             :             }
    1891      635034 :             if(l == 8 && str == "volatile")
    1892             :             {
    1893       28672 :                 f_result_type = Node::node_t::NODE_VOLATILE;
    1894       28672 :                 return;
    1895             :             }
    1896      606362 :             break;
    1897             : 
    1898             :         case 'w':
    1899      245770 :             if(l == 5 && str == "while")
    1900             :             {
    1901      110597 :                 f_result_type = Node::node_t::NODE_WHILE;
    1902      110597 :                 return;
    1903             :             }
    1904      135173 :             if(l == 4 && str == "with")
    1905             :             {
    1906       94208 :                 f_result_type = Node::node_t::NODE_WITH;
    1907       94208 :                 return;
    1908             :             }
    1909       40965 :             break;
    1910             : 
    1911             :         case 'y':
    1912       28678 :             if(l == 5 && str == "yield")
    1913             :             {
    1914       20480 :                 f_result_type = Node::node_t::NODE_YIELD;
    1915       20480 :                 return;
    1916             :             }
    1917        8198 :             break;
    1918             : 
    1919             :         case '_':
    1920       16409 :             if(l == 8 && str == "__FILE__")
    1921             :             {
    1922        4096 :                 f_result_type = Node::node_t::NODE_STRING;
    1923        4096 :                 f_result_string = f_input->get_position().get_filename();
    1924        4096 :                 return;
    1925             :             }
    1926       12313 :             if(l == 8 && str == "__LINE__")
    1927             :             {
    1928        4117 :                 f_result_type = Node::node_t::NODE_INT64;
    1929        4117 :                 f_result_int64 = f_input->get_position().get_line();
    1930        4117 :                 return;
    1931             :             }
    1932        8196 :             break;
    1933             : 
    1934             :         }
    1935             :     }
    1936             : 
    1937    10811523 :     if(l == 0)
    1938             :     {
    1939           1 :         f_result_type = Node::node_t::NODE_UNKNOWN;
    1940             :     }
    1941             :     else
    1942             :     {
    1943    10811522 :         f_result_type = Node::node_t::NODE_IDENTIFIER;
    1944    10811522 :         f_result_string = str;
    1945    10811523 :     }
    1946             : }
    1947             : 
    1948             : 
    1949             : /** \brief Read one number from the input stream.
    1950             :  *
    1951             :  * This function is called whenever a digit is found in the input
    1952             :  * stream. It may also be called if a period was read (the rules
    1953             :  * are a little more complicated for the period.)
    1954             :  *
    1955             :  * The function checks the following character, if it is:
    1956             :  *
    1957             :  * \li 'x' or 'X' -- it reads an hexadecimal number, see read_hex()
    1958             :  * \li 'b' or 'B' -- it reads a binary number, see read_binary()
    1959             :  * \li '0' -- if the number starts with a zero, it reads an octal,
    1960             :  *            see read_octal()
    1961             :  * \li '.' -- it reads a floating point number
    1962             :  * \li otherwise it reads an integer, although if the integer is
    1963             :  *     followed by '.', 'e', or 'E', it ends up reading the number
    1964             :  *     as a floating point
    1965             :  *
    1966             :  * The result is directly saved in the necessary f_result_...
    1967             :  * variables.
    1968             :  *
    1969             :  * \internal
    1970             :  *
    1971             :  * \param[in] c  The digit or period that triggered this call.
    1972             :  */
    1973     2670503 : void Lexer::read_number(Input::char_t c)
    1974             : {
    1975     2670503 :     String      number;
    1976             : 
    1977             :     // TODO: accept '_' within the number (between digits) like Java 7
    1978     2670503 :     if(c == '.')
    1979             :     {
    1980             :         // in case the std::stod() does not support a missing 0
    1981             :         // at the start of a floating point
    1982        8192 :         number = "0";
    1983             :     }
    1984     2662311 :     else if(c == '0')
    1985             :     {
    1986      799151 :         c = getc();
    1987      799151 :         if(c == 'x' || c == 'X')
    1988             :         {
    1989             :             // hexadecimal number
    1990       12456 :             f_result_type = Node::node_t::NODE_INT64;
    1991       12456 :             f_result_int64 = read_hex(16);
    1992       12456 :             return;
    1993             :         }
    1994     1573390 :         if(has_option_set(Options::option_t::OPTION_BINARY)
    1995      786695 :         && (c == 'b' || c == 'B'))
    1996             :         {
    1997             :             // binary number
    1998        4108 :             f_result_type = Node::node_t::NODE_INT64;
    1999        4108 :             f_result_int64 = read_binary(64);
    2000        4108 :             return;
    2001             :         }
    2002             :         // octal is not permitted in ECMAScript version 3+
    2003             :         // (especially in strict  mode)
    2004     1565174 :         if(has_option_set(Options::option_t::OPTION_OCTAL)
    2005      782587 :         && c >= '0' && c <= '7')
    2006             :         {
    2007             :             // octal
    2008        2064 :             f_result_type = Node::node_t::NODE_INT64;
    2009        2064 :             f_result_int64 = read_octal(c, 22);
    2010        2064 :             return;
    2011             :         }
    2012      780523 :         number = "0";
    2013      780523 :         ungetc(c);
    2014             :     }
    2015             :     else
    2016             :     {
    2017     1863160 :         c = read(c, CHAR_DIGIT, number);
    2018             :     }
    2019             : 
    2020             :     // TODO: we may want to support 32 bits floats as well
    2021             :     //       JavaScript really only supports 64 bit floats
    2022             :     //       and nothing else...
    2023     2651875 :     f_result_type = Node::node_t::NODE_FLOAT64;
    2024     2651875 :     if(c == '.')
    2025             :     {
    2026      209612 :         getc(); // re-read the '.' character
    2027             : 
    2028      209612 :         Input::char_t f(getc()); // check the following character
    2029      209612 :         if(f != '.' && (f_char_type & CHAR_DIGIT) != 0)
    2030             :         {
    2031      197321 :             ungetc(f);
    2032             : 
    2033      197321 :             Input::char_t q(read(c, CHAR_DIGIT, number));
    2034      197321 :             if(q == 'e' || q == 'E')
    2035             :             {
    2036       16387 :                 getc();        // skip the 'e'
    2037       16387 :                 c = getc();    // get the character after!
    2038       16387 :                 if(c == '-' || c == '+' || (c >= '0' && c <= '9'))
    2039             :                 {
    2040       16386 :                     number += 'e';
    2041       16386 :                     c = read(c, CHAR_DIGIT, number);
    2042             :                 }
    2043             :                 else
    2044             :                 {
    2045           1 :                     ungetc(c);
    2046           1 :                     ungetc(q);
    2047           1 :                     f_char_type = char_type(q); // restore this character type, we'll most certainly get an error
    2048             :                 }
    2049             :             }
    2050             :             // TODO: detect whether an error was detected in the conversion
    2051      197321 :             f_result_float64 = number.to_float64();
    2052      197321 :             return;
    2053             :         }
    2054       12291 :         if(f == 'e' || f == 'E')
    2055             :         {
    2056       12289 :             Input::char_t s(getc());
    2057       12289 :             if(s == '+' || s == '-')
    2058             :             {
    2059        8193 :                 Input::char_t e(getc());
    2060        8193 :                 if((f_char_type & CHAR_DIGIT) != 0)
    2061             :                 {
    2062             :                     // considered floating point
    2063        8192 :                     number += 'e';
    2064        8192 :                     number += s;
    2065        8192 :                     c = read(e, CHAR_DIGIT, number);
    2066        8192 :                     f_result_float64 = number.to_float64();
    2067        8192 :                     return;
    2068             :                 }
    2069           1 :                 ungetc(e);
    2070             :             }
    2071             :             // TODO:
    2072             :             // Here we could check to know whether this really
    2073             :             // represents a decimal number or whether the decimal
    2074             :             // point is a member operator. This can be very tricky.
    2075             :             //
    2076             :             // This is partially done now, we still fail in cases
    2077             :             // were someone was to use a member name such as e4z
    2078             :             // because we would detect 'e' as exponent and multiply
    2079             :             // the value by 10000... then fail on the 'z'
    2080        4097 :             if((f_char_type & CHAR_DIGIT) != 0)
    2081             :             {
    2082             :                 // considered floating point
    2083        4096 :                 number += 'e';
    2084        4096 :                 c = read(s, CHAR_DIGIT, number);
    2085        4096 :                 f_result_float64 = number.to_float64();
    2086        4096 :                 return;
    2087             :             }
    2088           1 :             ungetc(s);
    2089             :         }
    2090             :         // restore the '.' and following character (another '.' or a letter)
    2091             :         // this means we allow for 33.length and 3..5
    2092           3 :         ungetc(f);
    2093           3 :         ungetc('.');
    2094           3 :         f_char_type = char_type('.');
    2095             :     }
    2096     2442263 :     else if(c == 'e' || c == 'E')
    2097             :     {
    2098       12290 :         getc(); // re-read the 'e'
    2099             : 
    2100       12290 :         Input::char_t s(getc());
    2101       12290 :         if(s == '+' || s == '-')
    2102             :         {
    2103        8193 :             Input::char_t e(getc());
    2104        8193 :             if((f_char_type & CHAR_DIGIT) != 0)
    2105             :             {
    2106             :                 // considered floating point
    2107        8192 :                 number += 'e';
    2108        8192 :                 number += s;
    2109        8192 :                 c = read(e, CHAR_DIGIT, number);
    2110        8192 :                 f_result_float64 = number.to_float64();
    2111        8192 :                 return;
    2112             :             }
    2113           1 :             ungetc(e);
    2114             :         }
    2115             :         // TODO:
    2116             :         // Here we could check to know whether this really
    2117             :         // represents a decimal number or whether the decimal
    2118             :         // point is a member operator. This can be very tricky.
    2119             :         //
    2120             :         // This is partially done now, we still fail in cases
    2121             :         // were someone was to use a member name such as e4z
    2122             :         // because we would detect 'e' as exponent and multiply
    2123             :         // the value by 10000... then fail on the 'z'
    2124        4098 :         if((f_char_type & CHAR_DIGIT) != 0)
    2125             :         {
    2126             :             // considered floating point
    2127        4096 :             number += 'e';
    2128        4096 :             c = read(s, CHAR_DIGIT, number);
    2129        4096 :             f_result_float64 = number.to_float64();
    2130        4096 :             return;
    2131             :         }
    2132           2 :         ungetc(s);
    2133             :     }
    2134             : 
    2135             : 
    2136             :     // TODO: Support 8, 16, 32 bits, unsigned thereof?
    2137             :     //       (we have NODE_BYTE and NODE_SHORT, but not really a 32bit
    2138             :     //       definition yet; NODE_LONG should be 64 bits I think,
    2139             :     //       although really all of those are types, not literals.)
    2140     2429978 :     f_result_type = Node::node_t::NODE_INT64;
    2141             : 
    2142             :     // TODO: detect whether an error was detected in the conversion
    2143             :     //       (this would mainly be overflows)
    2144     2429978 :     f_result_int64 = std::stoull(number.to_utf8(), nullptr, 10);
    2145             : }
    2146             : 
    2147             : 
    2148             : /** \brief Read one string.
    2149             :  *
    2150             :  * This function reads one string from the input stream.
    2151             :  *
    2152             :  * The function expects \p quote as an input parameter representing the
    2153             :  * opening quote. It will read the input stream up to the next line
    2154             :  * terminator (unless escaped) or the closing quote.
    2155             :  *
    2156             :  * Note that we support backslash quoted "strings" which actually
    2157             :  * represent regular expressions. These cannot be continuated on
    2158             :  * the following line.
    2159             :  *
    2160             :  * This function sets the result type to NODE_STRING. It is changed
    2161             :  * by the caller when a regular expression was found instead.
    2162             :  *
    2163             :  * \internal
    2164             :  *
    2165             :  * \param[in] quote  The opening quote, which will match the closing quote.
    2166             :  */
    2167     4168858 : void Lexer::read_string(Input::char_t quote)
    2168             : {
    2169     4168858 :     f_result_type = Node::node_t::NODE_STRING;
    2170     4168858 :     f_result_string.clear();
    2171             : 
    2172    23141135 :     for(Input::char_t c(getc()); c != quote; c = getc())
    2173             :     {
    2174    18972289 :         if(c < 0)
    2175             :         {
    2176           2 :             Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNTERMINATED_STRING, f_input->get_position());
    2177           2 :             msg << "the last string was not closed before the end of the input was reached";
    2178           2 :             return;
    2179             :         }
    2180    18972287 :         if((f_char_type & CHAR_LINE_TERMINATOR) != 0)
    2181             :         {
    2182          10 :             Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNTERMINATED_STRING, f_input->get_position());
    2183          10 :             msg << "a string cannot include a line terminator";
    2184          10 :             return;
    2185             :         }
    2186    18972277 :         if(c == '\\')
    2187             :         {
    2188     2332867 :             c = escape_sequence(quote != '`');
    2189             : 
    2190             :             // here c can be equal to quote (c == quote)
    2191             :         }
    2192    18972277 :         if(c != String::STRING_CONTINUATION)
    2193             :         {
    2194    18971263 :             f_result_string += c;
    2195             :         }
    2196             :     }
    2197             : }
    2198             : 
    2199             : 
    2200             : 
    2201             : /** \brief Create a new node of the specified type.
    2202             :  *
    2203             :  * This helper function creates a new node at the current position. This
    2204             :  * is useful internally and in the parser when creating nodes to build
    2205             :  * the input tree and in order for the new node to get the correct
    2206             :  * position according to the current lexer position.
    2207             :  *
    2208             :  * \param[in] type  The type of the new node.
    2209             :  *
    2210             :  * \return A pointer to the new node.
    2211             :  */
    2212    15440059 : Node::pointer_t Lexer::get_new_node(Node::node_t type)
    2213             : {
    2214    15440059 :     Node::pointer_t node(new Node(type));
    2215    15440059 :     node->set_position(f_position);
    2216             :     // no data by default in this case
    2217    15440059 :     return node;
    2218             : }
    2219             : 
    2220             : 
    2221             : /** \brief Get the next token from the input stream.
    2222             :  *
    2223             :  * This function reads one token from the input stream and transform
    2224             :  * it in a Node. The Node is automatically assigned the position after
    2225             :  * the token was read.
    2226             :  *
    2227             :  * \return The node representing the next token, or a NODE_EOF if the
    2228             :  *         end of the stream was found.
    2229             :  */
    2230    51632951 : Node::pointer_t Lexer::get_next_token()
    2231             : {
    2232             :     // get the info
    2233    51632951 :     get_token();
    2234             : 
    2235             :     // create a node for the result
    2236    51632951 :     Node::pointer_t node(new Node(f_result_type));
    2237    51632951 :     node->set_position(f_position);
    2238    51632951 :     switch(f_result_type)
    2239             :     {
    2240             :     case Node::node_t::NODE_IDENTIFIER:
    2241             :     case Node::node_t::NODE_REGULAR_EXPRESSION:
    2242             :     case Node::node_t::NODE_STRING:
    2243    14988572 :         node->set_string(f_result_string);
    2244    14988572 :         break;
    2245             : 
    2246             :     case Node::node_t::NODE_INT64:
    2247     2452723 :         if((f_char_type & CHAR_LETTER) != 0)
    2248             :         {
    2249             :             // numbers cannot be followed by a letter
    2250        4099 :             Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_INVALID_NUMBER, f_input->get_position());
    2251        4099 :             msg << "unexpected letter after an integer";
    2252        4099 :             f_result_int64 = -1;
    2253             :         }
    2254     2452723 :         node->set_int64(f_result_int64);
    2255     2452723 :         break;
    2256             : 
    2257             :     case Node::node_t::NODE_FLOAT64:
    2258      238747 :         if((f_char_type & CHAR_LETTER) != 0)
    2259             :         {
    2260             :             // numbers cannot be followed by a letter
    2261           4 :             Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_INVALID_NUMBER, f_input->get_position());
    2262           4 :             msg << "unexpected letter after a floating point number";
    2263           4 :             f_result_float64 = -1.0;
    2264             :         }
    2265      238747 :         node->set_float64(f_result_float64);
    2266      238747 :         break;
    2267             : 
    2268             :     default:
    2269             :         // no data attached
    2270    33952909 :         break;
    2271             : 
    2272             :     }
    2273    51632951 :     return node;
    2274             : }
    2275             : 
    2276             : 
    2277             : /** \brief Read one token in the f_result_... variables.
    2278             :  *
    2279             :  * This function reads one token from the input stream. It reads one
    2280             :  * character and determine the type of token (identifier, string,
    2281             :  * number, etc.) and then reads the whole token.
    2282             :  *
    2283             :  * The main purpose of the function is to read characters from the
    2284             :  * stream and determine what token it represents. It uses many
    2285             :  * sub-functions to read more complex tokens such as identifiers
    2286             :  * and numbers.
    2287             :  *
    2288             :  * If the end of the input stream is reached, the function returns
    2289             :  * with a NODE_EOF. The function can be called any number of times
    2290             :  * after the end of the input is reached.
    2291             :  *
    2292             :  * Only useful tokens are returned. Comments and white spaces (space,
    2293             :  * tab, new line, line feed, etc.) are all skipped silently.
    2294             :  *
    2295             :  * The function detects invalid characters which are ignored although
    2296             :  * the function will first emit an error.
    2297             :  *
    2298             :  * This is the function that handles the case of a regular expression
    2299             :  * written between slashes (/.../). One can also use the backward
    2300             :  * quotes (`...`) for regular expression to avoid potential confusions
    2301             :  * with the divide character.
    2302             :  *
    2303             :  * \note
    2304             :  * Most extended operators, such as the power operator (**) are
    2305             :  * silently returned by this function. If the extended operators are
    2306             :  * not allowed, the parser will emit an error as required. However,
    2307             :  * a few operators (<> and :=) are returned jus like the standard
    2308             :  * operator (NODE_NOT_EQUAL and NODE_ASSIGNMENT) and thus the error
    2309             :  * has to be emitted here, and it is.
    2310             :  *
    2311             :  * \internal
    2312             :  */
    2313    51632951 : void Lexer::get_token()
    2314             : {
    2315    74926921 :     for(Input::char_t c(getc());; c = getc())
    2316             :     {
    2317    74926921 :         f_position = f_input->get_position();
    2318    74926921 :         if(c < 0)
    2319             :         {
    2320             :             // we're done
    2321     3543200 :             f_result_type = Node::node_t::NODE_EOF;
    2322     3543200 :             return;
    2323             :         }
    2324             : 
    2325    71383721 :         if((f_char_type & (CHAR_WHITE_SPACE | CHAR_LINE_TERMINATOR)) != 0)
    2326             :         {
    2327    22029097 :             continue;
    2328             :         }
    2329             : 
    2330    49354624 :         if((f_char_type & CHAR_INVALID) != 0)
    2331             :         {
    2332        2050 :             Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNEXPECTED_PUNCTUATION, f_input->get_position());
    2333        2050 :             msg << "invalid character '\\U" << std::hex << std::setfill('0') << std::setw(8) << c << "' found as is in the input stream";
    2334        2050 :             continue;
    2335             :         }
    2336             : 
    2337    49352574 :         if((f_char_type & CHAR_LETTER) != 0)
    2338             :         {
    2339    17178921 :             read_identifier(c);
    2340    17178921 :             if(f_result_type == Node::node_t::NODE_UNKNOWN)
    2341             :             {
    2342             :                 // skip empty identifiers, in most cases
    2343             :                 // this was invalid data in the input
    2344             :                 // and we will have had a message output
    2345             :                 // already so we do not have more to do
    2346             :                 // here
    2347             :                 continue; // LCOV_EXCL_LINE
    2348             :             }
    2349    17178921 :             return;
    2350             :         }
    2351             : 
    2352    32173653 :         if((f_char_type & CHAR_DIGIT) != 0)
    2353             :         {
    2354     2662311 :             read_number(c);
    2355     2662311 :             return;
    2356             :         }
    2357             : 
    2358    29511342 :         switch(c)
    2359             :         {
    2360             :         case '\\':
    2361             :             // identifiers can start with a character being escaped
    2362             :             // (it still needs to be a valid character for an identifier though)
    2363      103334 :             read_identifier(c);
    2364      103334 :             if(f_result_type != Node::node_t::NODE_UNKNOWN)
    2365             :             {
    2366             :                 // this is a valid token, return it
    2367      103333 :                 return;
    2368             :             }
    2369             :             // not a valid identifier, ignore here
    2370             :             // (the read_identifier() emits errors as required)
    2371           1 :             break;
    2372             : 
    2373             :         case '"':
    2374             :         case '\'':
    2375             :         case '`':    // TODO: do we want to support the correct regex syntax?
    2376     4168858 :             read_string(c);
    2377     4168858 :             if(c == '`')
    2378             :             {
    2379        4096 :                 f_result_type = Node::node_t::NODE_REGULAR_EXPRESSION;
    2380             :             }
    2381     4168858 :             return;
    2382             : 
    2383             :         case '<':
    2384      237687 :             c = getc();
    2385      237687 :             if(c == '<')
    2386             :             {
    2387       32780 :                 c = getc();
    2388       32780 :                 if(c == '=')
    2389             :                 {
    2390       20485 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_SHIFT_LEFT;
    2391       20485 :                     return;
    2392             :                 }
    2393       12295 :                 ungetc(c);
    2394       12295 :                 f_result_type = Node::node_t::NODE_SHIFT_LEFT;
    2395       12295 :                 return;
    2396             :             }
    2397      204907 :             if(c == '=')
    2398             :             {
    2399       65592 :                 c = getc();
    2400       65592 :                 if(c == '>')
    2401             :                 {
    2402       20511 :                     f_result_type = Node::node_t::NODE_COMPARE;
    2403       20511 :                     return;
    2404             :                 }
    2405       45081 :                 ungetc(c);
    2406       45081 :                 f_result_type = Node::node_t::NODE_LESS_EQUAL;
    2407       45081 :                 return;
    2408             :             }
    2409      139315 :             if(c == '%')
    2410             :             {
    2411       40972 :                 c = getc();
    2412       40972 :                 if(c == '=')
    2413             :                 {
    2414       12293 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_ROTATE_LEFT;
    2415       12293 :                     return;
    2416             :                 }
    2417       28679 :                 ungetc(c);
    2418       28679 :                 f_result_type = Node::node_t::NODE_ROTATE_LEFT;
    2419       28679 :                 return;
    2420             :             }
    2421       98343 :             if(c == '>')
    2422             :             {
    2423             :                 // unfortunately we cannot know whether '<>' or '!=' was used
    2424             :                 // once this function returns so in this very specific case
    2425             :                 // the extended operator has to be checked here
    2426        4096 :                 if(!has_option_set(Options::option_t::OPTION_EXTENDED_OPERATORS))
    2427             :                 {
    2428        2048 :                     Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_NOT_ALLOWED, f_input->get_position());
    2429        2048 :                     msg << "the '<>' operator is only available when extended operators are authorized (use extended_operators;).";
    2430             :                 }
    2431        4096 :                 f_result_type = Node::node_t::NODE_NOT_EQUAL;
    2432        4096 :                 return;
    2433             :             }
    2434       94247 :             if(c == '?')
    2435             :             {
    2436       32782 :                 c = getc();
    2437       32782 :                 if(c == '=')
    2438             :                 {
    2439       12294 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_MINIMUM;
    2440       12294 :                     return;
    2441             :                 }
    2442       20488 :                 ungetc(c);
    2443       20488 :                 f_result_type = Node::node_t::NODE_MINIMUM;
    2444       20488 :                 return;
    2445             :             }
    2446       61465 :             ungetc(c);
    2447       61465 :             f_result_type = Node::node_t::NODE_LESS;
    2448       61465 :             return;
    2449             : 
    2450             :         case '>':
    2451      245867 :             c = getc();
    2452      245867 :             if(c == '>')
    2453             :             {
    2454       65562 :                 c = getc();
    2455       65562 :                 if(c == '>')
    2456             :                 {
    2457       32781 :                     c = getc();
    2458       32781 :                     if(c == '=')
    2459             :                     {
    2460       20485 :                         f_result_type = Node::node_t::NODE_ASSIGNMENT_SHIFT_RIGHT_UNSIGNED;
    2461       20485 :                         return;
    2462             :                     }
    2463       12296 :                     ungetc(c);
    2464       12296 :                     f_result_type = Node::node_t::NODE_SHIFT_RIGHT_UNSIGNED;
    2465       12296 :                     return;
    2466             :                 }
    2467       32781 :                 if(c == '=')
    2468             :                 {
    2469       20485 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_SHIFT_RIGHT;
    2470       20485 :                     return;
    2471             :                 }
    2472       12296 :                 ungetc(c);
    2473       12296 :                 f_result_type = Node::node_t::NODE_SHIFT_RIGHT;
    2474       12296 :                 return;
    2475             :             }
    2476      180305 :             if(c == '=')
    2477             :             {
    2478       61465 :                 f_result_type = Node::node_t::NODE_GREATER_EQUAL;
    2479       61465 :                 return;
    2480             :             }
    2481      118840 :             if(c == '%')
    2482             :             {
    2483       24593 :                 c = getc();
    2484       24593 :                 if(c == '=')
    2485             :                 {
    2486       12293 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_ROTATE_RIGHT;
    2487       12293 :                     return;
    2488             :                 }
    2489       12300 :                 ungetc(c);
    2490       12300 :                 f_result_type = Node::node_t::NODE_ROTATE_RIGHT;
    2491       12300 :                 return;
    2492             :             }
    2493       94247 :             if(c == '?')
    2494             :             {
    2495       32782 :                 c = getc();
    2496       32782 :                 if(c == '=')
    2497             :                 {
    2498       12294 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_MAXIMUM;
    2499       12294 :                     return;
    2500             :                 }
    2501       20488 :                 ungetc(c);
    2502       20488 :                 f_result_type = Node::node_t::NODE_MAXIMUM;
    2503       20488 :                 return;
    2504             :             }
    2505       61465 :             ungetc(c);
    2506       61465 :             f_result_type = Node::node_t::NODE_GREATER;
    2507       61465 :             return;
    2508             : 
    2509             :         case '!':
    2510      122964 :             c = getc();
    2511      122964 :             if(c == '~')
    2512             :             {
    2513             :                 // http://perldoc.perl.org/perlop.html#Binding-Operators
    2514       28677 :                 f_result_type = Node::node_t::NODE_NOT_MATCH;
    2515       28677 :                 return;
    2516             :             }
    2517       94287 :             if(c == '=')
    2518             :             {
    2519       49204 :                 c = getc();
    2520       49204 :                 if(c == '=')
    2521             :                 {
    2522       20507 :                     f_result_type = Node::node_t::NODE_STRICTLY_NOT_EQUAL;
    2523       20507 :                     return;
    2524             :                 }
    2525       28697 :                 ungetc(c);
    2526       28697 :                 f_result_type = Node::node_t::NODE_NOT_EQUAL;
    2527       28697 :                 return;
    2528             :             }
    2529       45083 :             ungetc(c);
    2530       45083 :             f_result_type = Node::node_t::NODE_LOGICAL_NOT;
    2531       45083 :             return;
    2532             : 
    2533             :         case '=':
    2534      766008 :             c = getc();
    2535      766008 :             if(c == '=')
    2536             :             {
    2537       73778 :                 c = getc();
    2538       73778 :                 if(c == '=')
    2539             :                 {
    2540       12313 :                     f_result_type = Node::node_t::NODE_STRICTLY_EQUAL;
    2541       12313 :                     return;
    2542             :                 }
    2543       61465 :                 ungetc(c);
    2544       61465 :                 f_result_type = Node::node_t::NODE_EQUAL;
    2545       61465 :                 return;
    2546             :             }
    2547      692230 :             if((f_options->get_option(Options::option_t::OPTION_EXTENDED_OPERATORS) & 2) != 0)
    2548             :             {
    2549             :                 // This one most people will not understand it...
    2550             :                 // The '=' operator by itself is often missused and thus a
    2551             :                 // big source of bugs. By forbiding it, we only allow :=
    2552             :                 // and == (and ===) which makes it safer to use the language.
    2553      344065 :                 Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_NOT_ALLOWED, f_input->get_position());
    2554      344065 :                 msg << "the '=' operator is not available when extended operators value bit 1 is set (use extended_operators(2);).";
    2555             :             }
    2556      692230 :             ungetc(c);
    2557      692230 :             f_result_type = Node::node_t::NODE_ASSIGNMENT;
    2558      692230 :             return;
    2559             : 
    2560             :         case ':':
    2561     1862615 :             c = getc();
    2562     1862615 :             if(c == '=')
    2563             :             {
    2564             :                 // unfortunately we cannot know whether ':=' or '=' was used
    2565             :                 // once this function returns so in this very specific case
    2566             :                 // the extended operator has to be checked here
    2567      127039 :                 if(!has_option_set(Options::option_t::OPTION_EXTENDED_OPERATORS))
    2568             :                 {
    2569       32768 :                     Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_NOT_ALLOWED, f_input->get_position());
    2570       32768 :                     msg << "the ':=' operator is only available when extended operators are authorized (use extended_operators;).";
    2571             :                 }
    2572      127039 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT;
    2573      127039 :                 return;
    2574             :             }
    2575     1735576 :             if(c == ':')
    2576             :             {
    2577      118784 :                 f_result_type = Node::node_t::NODE_SCOPE;
    2578      118784 :                 return;
    2579             :             }
    2580     1616792 :             ungetc(c);
    2581     1616792 :             f_result_type = Node::node_t::NODE_COLON;
    2582     1616792 :             return;
    2583             : 
    2584             :         case '~':
    2585       61479 :             c = getc();
    2586       61479 :             if(c == '=')
    2587             :             {
    2588             :                 // http://perldoc.perl.org/perlop.html#Binding-Operators
    2589             :                 // Note that we inverse it (perl uses =~) because otherwise
    2590             :                 // we may interfer with a valid expression:
    2591             :                 //    a = ~b;  <=>  a=~b;
    2592       20484 :                 f_result_type = Node::node_t::NODE_MATCH;
    2593       20484 :                 return;
    2594             :             }
    2595       40995 :             if(c == '~')
    2596             :             {
    2597             :                 // http://perldoc.perl.org/perlop.html#Smartmatch-Operator
    2598             :                 // WARNING: if ~~ is used as a unary, then it may get
    2599             :                 //          converted back to two BITWISE NOT by the
    2600             :                 //          parser (so 'a = ~~b;' works as expected).
    2601       28700 :                 f_result_type = Node::node_t::NODE_SMART_MATCH;
    2602       28700 :                 return;
    2603             :             }
    2604       12295 :             ungetc(c);
    2605       12295 :             f_result_type = Node::node_t::NODE_BITWISE_NOT;
    2606       12295 :             return;
    2607             : 
    2608             :         case '+':
    2609      422001 :             c = getc();
    2610      422001 :             if(c == '=')
    2611             :             {
    2612       69642 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_ADD;
    2613       69642 :                 return;
    2614             :             }
    2615      352359 :             if(c == '+')
    2616             :             {
    2617       77832 :                 f_result_type = Node::node_t::NODE_INCREMENT;
    2618       77832 :                 return;
    2619             :             }
    2620      274527 :             ungetc(c);
    2621      274527 :             f_result_type = Node::node_t::NODE_ADD;
    2622      274527 :             return;
    2623             : 
    2624             :         case '-':
    2625      225449 :             c = getc();
    2626      225449 :             if(c == '=')
    2627             :             {
    2628       28677 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_SUBTRACT;
    2629       28677 :                 return;
    2630             :             }
    2631      196772 :             if(c == '-')
    2632             :             {
    2633       45064 :                 f_result_type = Node::node_t::NODE_DECREMENT;
    2634       45064 :                 return;
    2635             :             }
    2636      151708 :             ungetc(c);
    2637      151708 :             f_result_type = Node::node_t::NODE_SUBTRACT;
    2638      151708 :             return;
    2639             : 
    2640             :         case '*':
    2641      204845 :             c = getc();
    2642      204845 :             if(c == '=')
    2643             :             {
    2644       36876 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_MULTIPLY;
    2645       36876 :                 return;
    2646             :             }
    2647      167969 :             if(c == '*')
    2648             :             {
    2649       49169 :                 c = getc();
    2650       49169 :                 if(c == '=')
    2651             :                 {
    2652       20485 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_POWER;
    2653       20485 :                     return;
    2654             :                 }
    2655       28684 :                 ungetc(c);
    2656       28684 :                 f_result_type = Node::node_t::NODE_POWER;
    2657       28684 :                 return;
    2658             :             }
    2659      118800 :             ungetc(c);
    2660      118800 :             f_result_type = Node::node_t::NODE_MULTIPLY;
    2661      118800 :             return;
    2662             : 
    2663             :         case '/':
    2664     1299702 :             c = getc();
    2665     1299702 :             if(c == '/')
    2666             :             {
    2667             :                 // skip comments (to end of line)
    2668      394455 :                 do
    2669             :                 {
    2670      394455 :                     c = getc();
    2671             :                 }
    2672      394455 :                 while((f_char_type & CHAR_LINE_TERMINATOR) == 0 && c >= 0);
    2673       19409 :                 break;
    2674             :             }
    2675     1280293 :             if(c == '*')
    2676             :             {
    2677             :                 // skip comments (multiline)
    2678    69663967 :                 do
    2679             :                 {
    2680    69663967 :                     c = getc();
    2681   144333281 :                     while(c == '*')
    2682             :                     {
    2683     6248757 :                         c = getc();
    2684     6248757 :                         if(c == '/')
    2685             :                         {
    2686     1243410 :                             c = -1;
    2687     1243410 :                             break;
    2688             :                         }
    2689             :                     }
    2690             :                 }
    2691             :                 while(c > 0);
    2692     1243410 :                 break;
    2693             :             }
    2694             :             // before we can determine whether we have
    2695             :             //    a literal RegExp
    2696             :             //    a /=
    2697             :             //    a /
    2698             :             // we have to read more data to match a RegExp (so at least
    2699             :             // up to another / with valid RegExp characters in between
    2700             :             // or no such thing and we have to back off)
    2701             :             {
    2702       36883 :                 String regexp;
    2703       36883 :                 Input::char_t r(c);
    2704             :                 for(;;)
    2705             :                 {
    2706      225545 :                     if(r < 0 || (f_char_type & CHAR_LINE_TERMINATOR) != 0 || r == '/')
    2707             :                     {
    2708       36883 :                         break;
    2709             :                     }
    2710      188662 :                     if((f_char_type & CHAR_INVALID) == 0)
    2711             :                     {
    2712      188662 :                         regexp += r;
    2713             :                     }
    2714      188662 :                     r = getc();
    2715             :                 }
    2716       36883 :                 if(r == '/')
    2717             :                 {
    2718             :                     // TBD -- shall we further verify that this looks like a
    2719             :                     //        regular expression before accepting it as such?
    2720             :                     //
    2721             :                     // this is a valid regular expression written between /.../
    2722             :                     // read the flags that follow if any
    2723        4096 :                     read(r, CHAR_LETTER | CHAR_DIGIT, regexp);
    2724        4096 :                     f_result_type = Node::node_t::NODE_REGULAR_EXPRESSION;
    2725        4096 :                     f_result_string = "/";
    2726        4096 :                     f_result_string += regexp;
    2727        4096 :                     return;
    2728             :                 }
    2729             :                 // not a regular expression, so unget all of that stuff
    2730       32787 :                 size_t p(regexp.length());
    2731      233756 :                 while(p > 0)
    2732             :                 {
    2733      168182 :                     --p;
    2734      168182 :                     ungetc(regexp[p]);
    2735       32787 :                 }
    2736             :                 // 'c' is still the character gotten at the start of this case
    2737             :             }
    2738       32787 :             if(c == '=')
    2739             :             {
    2740             :                 // the '=' was ungotten, so skip it again
    2741       20485 :                 getc();
    2742       20485 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_DIVIDE;
    2743       20485 :                 return;
    2744             :             }
    2745       12302 :             f_result_type = Node::node_t::NODE_DIVIDE;
    2746       12302 :             return;
    2747             : 
    2748             :         case '%':
    2749       32787 :             c = getc();
    2750       32787 :             if(c == '=')
    2751             :             {
    2752       20485 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_MODULO;
    2753       20485 :                 return;
    2754             :             }
    2755       12302 :             ungetc(c);
    2756       12302 :             f_result_type = Node::node_t::NODE_MODULO;
    2757       12302 :             return;
    2758             : 
    2759             :         case '?':
    2760     1511429 :             f_result_type = Node::node_t::NODE_CONDITIONAL;
    2761     1511429 :             return;
    2762             : 
    2763             :         case '&':
    2764       73767 :             c = getc();
    2765       73767 :             if(c == '=')
    2766             :             {
    2767       20485 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_BITWISE_AND;
    2768       20485 :                 return;
    2769             :             }
    2770       53282 :             if(c == '&')
    2771             :             {
    2772       32790 :                 c = getc();
    2773       32790 :                 if(c == '=')
    2774             :                 {
    2775       12293 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_LOGICAL_AND;
    2776       12293 :                     return;
    2777             :                 }
    2778       20497 :                 ungetc(c);
    2779       20497 :                 f_result_type = Node::node_t::NODE_LOGICAL_AND;
    2780       20497 :                 return;
    2781             :             }
    2782       20492 :             ungetc(c);
    2783       20492 :             f_result_type = Node::node_t::NODE_BITWISE_AND;
    2784       20492 :             return;
    2785             : 
    2786             :         case '^':
    2787       73766 :             c = getc();
    2788       73766 :             if(c == '=')
    2789             :             {
    2790       12293 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_BITWISE_XOR;
    2791       12293 :                 return;
    2792             :             }
    2793       61473 :             if(c == '^')
    2794             :             {
    2795       32788 :                 c = getc();
    2796       32788 :                 if(c == '=')
    2797             :                 {
    2798       12293 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_LOGICAL_XOR;
    2799       12293 :                     return;
    2800             :                 }
    2801       20495 :                 ungetc(c);
    2802       20495 :                 f_result_type = Node::node_t::NODE_LOGICAL_XOR;
    2803       20495 :                 return;
    2804             :             }
    2805       28685 :             ungetc(c);
    2806       28685 :             f_result_type = Node::node_t::NODE_BITWISE_XOR;
    2807       28685 :             return;
    2808             : 
    2809             :         case '|':
    2810       90146 :             c = getc();
    2811       90146 :             if(c == '=')
    2812             :             {
    2813       20487 :                 f_result_type = Node::node_t::NODE_ASSIGNMENT_BITWISE_OR;
    2814       20487 :                 return;
    2815             :             }
    2816       69659 :             if(c == '|')
    2817             :             {
    2818       40981 :                 c = getc();
    2819       40981 :                 if(c == '=')
    2820             :                 {
    2821       20485 :                     f_result_type = Node::node_t::NODE_ASSIGNMENT_LOGICAL_OR;
    2822       20485 :                     return;
    2823             :                 }
    2824       20496 :                 ungetc(c);
    2825       20496 :                 f_result_type = Node::node_t::NODE_LOGICAL_OR;
    2826       20496 :                 return;
    2827             :             }
    2828       28678 :             ungetc(c);
    2829       28678 :             f_result_type = Node::node_t::NODE_BITWISE_OR;
    2830       28678 :             return;
    2831             : 
    2832             :         case '.':
    2833     1331223 :             c = getc();
    2834     1331223 :             if(c >= '0' && c <= '9')
    2835             :             {
    2836             :                 // this is probably a valid float
    2837        8192 :                 ungetc(c);
    2838        8192 :                 ungetc('.');
    2839        8192 :                 read_number('.');
    2840        8192 :                 return;
    2841             :             }
    2842     1323031 :             if(c == '.')
    2843             :             {
    2844      270351 :                 c = getc();
    2845      270351 :                 if(c == '.')
    2846             :                 {
    2847             :                     // Elipsis!
    2848      200717 :                     f_result_type = Node::node_t::NODE_REST;
    2849      200717 :                     return;
    2850             :                 }
    2851       69634 :                 ungetc(c);
    2852             : 
    2853             :                 // Range (not too sure if this is really used yet
    2854             :                 // and whether it will be called RANGE)
    2855       69634 :                 f_result_type = Node::node_t::NODE_RANGE;
    2856       69634 :                 return;
    2857             :             }
    2858     1052680 :             ungetc(c);
    2859     1052680 :             f_result_type = Node::node_t::NODE_MEMBER;
    2860     1052680 :             return;
    2861             : 
    2862             :         case '[':
    2863      119186 :             f_result_type = Node::node_t::NODE_OPEN_SQUARE_BRACKET;
    2864      119186 :             return;
    2865             : 
    2866             :         case ']':
    2867      102795 :             f_result_type = Node::node_t::NODE_CLOSE_SQUARE_BRACKET;
    2868      102795 :             return;
    2869             : 
    2870             :         case '{':
    2871     1249349 :             f_result_type = Node::node_t::NODE_OPEN_CURVLY_BRACKET;
    2872     1249349 :             return;
    2873             : 
    2874             :         case '}':
    2875     1191994 :             f_result_type = Node::node_t::NODE_CLOSE_CURVLY_BRACKET;
    2876     1191994 :             return;
    2877             : 
    2878             :         case '(':
    2879     3682656 :             f_result_type = Node::node_t::NODE_OPEN_PARENTHESIS;
    2880     3682656 :             return;
    2881             : 
    2882             :         case ')':
    2883     3617120 :             f_result_type = Node::node_t::NODE_CLOSE_PARENTHESIS;
    2884     3617120 :             return;
    2885             : 
    2886             :         case ';':
    2887     3356669 :             f_result_type = Node::node_t::NODE_SEMICOLON;
    2888     3356669 :             return;
    2889             : 
    2890             :         case ',':
    2891     3349451 :             f_result_type = Node::node_t::NODE_COMMA;
    2892     3349451 :             return;
    2893             : 
    2894             :         case 0x221E: // INFINITY
    2895             :             // unicode infinity character which is viewed as a punctuation
    2896             :             // otherwise so we can reinterpret it safely (it could not be
    2897             :             // part of an identifier)
    2898        4096 :             f_result_type = Node::node_t::NODE_FLOAT64;
    2899        4096 :             f_result_float64.set_infinity();
    2900        4096 :             return;
    2901             : 
    2902             :         case 0xFFFD: // REPACEMENT CHARACTER
    2903             :             // Java has defined character FFFD as representing NaN so if
    2904             :             // found in the input we take it as such...
    2905             :             //
    2906             :             // see Unicode pri74:
    2907             :             // http://www.unicode.org/review/resolved-pri.html
    2908        4096 :             f_result_type = Node::node_t::NODE_FLOAT64;
    2909        4096 :             f_result_float64.set_NaN();
    2910        4096 :             return;
    2911             : 
    2912             :         default:
    2913           3 :             if(c > ' ' && c < 0x7F)
    2914             :             {
    2915           2 :                 Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNEXPECTED_PUNCTUATION, f_input->get_position());
    2916           2 :                 msg << "unexpected punctuation '" << static_cast<char>(c) << "'";
    2917             :             }
    2918             :             else
    2919             :             {
    2920           1 :                 Message msg(message_level_t::MESSAGE_LEVEL_ERROR, err_code_t::AS_ERR_UNEXPECTED_PUNCTUATION, f_input->get_position());
    2921           1 :                 msg << "unexpected punctuation '\\U" << std::hex << std::setfill('0') << std::setw(8) << c << "'";
    2922             :             }
    2923           3 :             break;
    2924             : 
    2925             :         }
    2926    23293970 :     }
    2927             :     /*NOTREACHED*/
    2928             : }
    2929             : 
    2930             : 
    2931             : /** \brief Check whether a given option is set.
    2932             :  *
    2933             :  * Because the lexer checks options in many places, it makes use of this
    2934             :  * helper function to simplify the many tests in the rest of the code.
    2935             :  *
    2936             :  * This function checks whether the specified option is set. If so,
    2937             :  * then it returns true, otherwise it returns false.
    2938             :  *
    2939             :  * \note
    2940             :  * Some options may be set to values other than 0 and 1. In that case
    2941             :  * this function cannot be used. Right now, this function returns true
    2942             :  * if the option is \em set, meaning that the option value is not zero.
    2943             :  * For example, the OPTION_EXTENDED_OPERATORS option may be set to
    2944             :  * 0, 1, 2, or 3.
    2945             :  *
    2946             :  * \param[in] option  The option to check.
    2947             :  *
    2948             :  * \return true if the option was set, false otherwise.
    2949             :  */
    2950     4102200 : bool Lexer::has_option_set(Options::option_t option) const
    2951             : {
    2952     4102200 :     return f_options->get_option(option) != 0;
    2953             : }
    2954             : 
    2955             : 
    2956          63 : }
    2957             : // namespace as2js
    2958             : 
    2959             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.10