LCOV - code coverage report
Current view: top level - lib - string.cpp (source / functions) Hit Total Coverage
Test: coverage.info Lines: 316 316 100.0 %
Date: 2014-11-22 Functions: 47 47 100.0 %
Legend: Lines: hit not hit

          Line data    Source code
       1             : /* string.cpp -- written by Alexis WILKE for Made to Order Software Corp. (c) 2005-2014 */
       2             : 
       3             : /*
       4             : 
       5             : Copyright (c) 2005-2014 Made to Order Software Corp.
       6             : 
       7             : http://snapwebsites.org/project/as2js
       8             : 
       9             : Permission is hereby granted, free of charge, to any
      10             : person obtaining a copy of this software and
      11             : associated documentation files (the "Software"), to
      12             : deal in the Software without restriction, including
      13             : without limitation the rights to use, copy, modify,
      14             : merge, publish, distribute, sublicense, and/or sell
      15             : copies of the Software, and to permit persons to whom
      16             : the Software is furnished to do so, subject to the
      17             : following conditions:
      18             : 
      19             : The above copyright notice and this permission notice
      20             : shall be included in all copies or substantial
      21             : portions of the Software.
      22             : 
      23             : THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF
      24             : ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT
      25             : LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
      26             : FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO
      27             : EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
      28             : LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
      29             : WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
      30             : ARISING FROM, OUT OF OR IN CONNECTION WITH THE
      31             : SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
      32             : SOFTWARE.
      33             : 
      34             : */
      35             : 
      36             : #include    "as2js/string.h"
      37             : #include    "as2js/exceptions.h"
      38             : 
      39             : #include    <limits>
      40             : 
      41             : 
      42             : /** \file
      43             :  * \brief String implementation.
      44             :  *
      45             :  * We use the std::basic_string to create our own string using an int32_t
      46             :  * for each character. This allows us to have full UTF-32 Unicode characters.
      47             :  *
      48             :  * This function redefines a few functions that the base string library
      49             :  * does not offer because of the special character type we use.
      50             :  */
      51             : 
      52             : 
      53             : namespace as2js
      54             : {
      55             : 
      56             : 
      57             : 
      58             : /** \brief Initialize an empty string.
      59             :  *
      60             :  * This function initializes an empty string.
      61             :  */
      62   988177063 : String::String()
      63   988177063 :     : basic_string()
      64             : {
      65   988177063 : }
      66             : 
      67             : 
      68             : /** \brief Create a string from the specified input string.
      69             :  *
      70             :  * This function creates a string and initializes it with the specified
      71             :  * input string.
      72             :  *
      73             :  * The input is considered to be ISO-8859-1 and thus it gets copied in
      74             :  * the string as is (see the from_char() function.) If you have UTF-8
      75             :  * data, make sure to use the from_utf8() function instead.
      76             :  *
      77             :  * Note that we cannot include '\0' characters in our strings. This function
      78             :  * stops at the first null terminator no matter what.
      79             :  *
      80             :  * \note
      81             :  * The \p str pointer can be set to nullptr in which case the string is
      82             :  * considered empty.
      83             :  *
      84             :  * \param[in] str  A string, if not null terminated, make sure to define
      85             :  *                 the \p len parameter.
      86             :  * \param[in] len  The length of the string, if -1, expect a '\0'.
      87             :  *
      88             :  * \sa from_utf8()
      89             :  * \sa from_char()
      90             :  */
      91   380690302 : String::String(char const *str, int len)
      92   380690302 :     : basic_string()
      93             : {
      94   380690302 :     from_char(str, len);
      95   380690302 : }
      96             : 
      97             : 
      98             : /** \brief Create a string from the specified input string.
      99             :  *
     100             :  * This function creates a string and initializes it with the specified
     101             :  * input string.
     102             :  *
     103             :  * The input is considered to be UTF-32 or UTF-16 depending on the width of
     104             :  * the wchar_t type.
     105             :  *
     106             :  * Note that we cannot include '\0' characters in our strings. This function
     107             :  * stops at the first null terminator no matter what.
     108             :  *
     109             :  * \note
     110             :  * The \p str pointer can be set to nullptr in which case the string is
     111             :  * considered empty.
     112             :  *
     113             :  * \param[in] str  A string, if not null terminated, make sure to define
     114             :  *                 the \p len parameter.
     115             :  * \param[in] len  The length of the string, if -1, expect a '\0'.
     116             :  *
     117             :  * \sa from_wchar()
     118             :  */
     119     6672380 : String::String(wchar_t const *str, int len)
     120     6672380 :     : basic_string()
     121             : {
     122     6672380 :     from_wchar(str, len);
     123     6672380 : }
     124             : 
     125             : 
     126             : /** \brief Create a string from the specified input string.
     127             :  *
     128             :  * This function creates a string and initializes it with the specified
     129             :  * input string.
     130             :  *
     131             :  * The input is considered to be UTF-32 and thus it gets copied as is.
     132             :  *
     133             :  * Note that we cannot include '\0' characters in our strings. This function
     134             :  * stops at the first null terminator no matter what.
     135             :  *
     136             :  * \note
     137             :  * The \p str pointer can be set to nullptr in which case the string is
     138             :  * considered empty.
     139             :  *
     140             :  * \param[in] str  A string, if not null terminated, make sure to define
     141             :  *                 the \p len parameter.
     142             :  * \param[in] len  The length of the string, if -1, expect a '\0'.
     143             :  *
     144             :  * \sa from_as_char()
     145             :  */
     146     5550592 : String::String(as_char_t const *str, int len)
     147     5550592 :     : basic_string()
     148             : {
     149     5550592 :     from_as_char(str, len);
     150     5550592 : }
     151             : 
     152             : 
     153             : /** \brief Copy str in this String.
     154             :  *
     155             :  * This function copies str in this String.
     156             :  *
     157             :  * The input is considered to be ISO-8859-1 and thus it gets copied in
     158             :  * the string as is (see the from_char() function.) If you have UTF-8
     159             :  * data, make sure to use the from_utf8() function instead.
     160             :  *
     161             :  * \param[in] str  The input string to copy in this String.
     162             :  *
     163             :  * \sa from_char()
     164             :  */
     165     1898006 : String::String(std::string const& str)
     166     1898006 :     : basic_string()
     167             : {
     168     1898006 :     from_char(str.c_str(), static_cast<int>(str.length()));
     169     1898006 : }
     170             : 
     171             : 
     172             : /** \brief Copy str in this String.
     173             :  *
     174             :  * This function copies str in this String.
     175             :  *
     176             :  * The input string is taken as UTF-16 if wchar_t is 2 bytes and as
     177             :  * such converts the surrogates (0xD800 to 0xDFFF) to UTF-32 characters
     178             :  * as expected. If wchar_t is 4 bytes, the string is copied as is.
     179             :  *
     180             :  * \param[in] str  The input string to copy in this String.
     181             :  *
     182             :  * \sa from_wchar()
     183             :  */
     184     1048578 : String::String(std::wstring const& str)
     185     1048578 :     : basic_string()
     186             : {
     187     1048578 :     from_wchar(str.c_str(), static_cast<int>(str.length()));
     188     1048578 : }
     189             : 
     190             : 
     191             : /** \brief Copy str in this String.
     192             :  *
     193             :  * This function copies str in this String.
     194             :  *
     195             :  * The input string is taken as UTF-32 and copied as is in its entirety.
     196             :  *
     197             :  * \param[in] str  The input string to copy in this String.
     198             :  */
     199       85755 : String::String(std::basic_string<as_char_t> const& str)
     200       85755 :     : basic_string(str)
     201             : {
     202       85755 : }
     203             : 
     204             : 
     205             : /** \brief Copy str in this String.
     206             :  *
     207             :  * This function copies str in this String. The string is viewed as
     208             :  * ISO-8859-1. If another format is expected, make sure to use the
     209             :  * proper function.
     210             :  *
     211             :  * \param[in] str  The string to copy in this String.
     212             :  *
     213             :  * \return A reference to this string.
     214             :  *
     215             :  * \sa from_char()
     216             :  */
     217   117076871 : String& String::operator = (char const *str)
     218             : {
     219   117076871 :     from_char(str);
     220   117076871 :     return *this;
     221             : }
     222             : 
     223             : 
     224             : /** \brief Copy str in this String.
     225             :  *
     226             :  * This function copies str in this String. The string is viewed as
     227             :  * UTF-16 if wchar_t is 2 bytes, and UTF-32 if wchar_t is 4 bytes.
     228             :  * If another format is expected, make sure to use the proper function.
     229             :  *
     230             :  * \param[in] str  The string to copy in this String.
     231             :  *
     232             :  * \return A reference to this string.
     233             :  *
     234             :  * \sa from_wchar()
     235             :  */
     236    11120630 : String& String::operator = (wchar_t const *str)
     237             : {
     238    11120630 :     from_wchar(str);
     239    11120630 :     return *this;
     240             : }
     241             : 
     242             : 
     243             : /** \brief Copy str in this String.
     244             :  *
     245             :  * This function copies str in this String. The string is viewed as
     246             :  * ISO-8859-1. If another format is expected, make sure to use the
     247             :  * proper function.
     248             :  *
     249             :  * \param[in] str  The string to copy in this String.
     250             :  *
     251             :  * \return A reference to this string.
     252             :  *
     253             :  * \sa from_char()
     254             :  */
     255        1243 : String& String::operator = (std::string const& str)
     256             : {
     257        1243 :     from_char(str.c_str(), static_cast<int>(str.length()));
     258        1243 :     return *this;
     259             : }
     260             : 
     261             : 
     262             : /** \brief Copy str in this String.
     263             :  *
     264             :  * This function copies str in this String. The string is viewed as
     265             :  * UTF-16 if wchar_t is 2 bytes, and UTF-32 if wchar_t is 4 bytes.
     266             :  * If another format is expected, make sure to use the proper function.
     267             :  *
     268             :  * \param[in] str  The string to copy in this String.
     269             :  *
     270             :  * \return A reference to this string.
     271             :  *
     272             :  * \sa from_wchar()
     273             :  */
     274     1048576 : String& String::operator = (std::wstring const& str)
     275             : {
     276     1048576 :     from_wchar(str.c_str(), static_cast<int>(str.length()));
     277     1048576 :     return *this;
     278             : }
     279             : 
     280             : 
     281             : /** \brief Copy str in this String.
     282             :  *
     283             :  * This function copies str in this String. The string is viewed as
     284             :  * UTF-32. If another format is expected, make sure to use the
     285             :  * proper function.
     286             :  *
     287             :  * \param[in] str  The string to copy in this String.
     288             :  *
     289             :  * \return A reference to this string.
     290             :  */
     291         101 : String& String::operator = (std::basic_string<as_char_t> const& str)
     292             : {
     293         101 :     basic_string<as_char_t>::operator = (str);
     294         101 :     return *this;
     295             : }
     296             : 
     297             : 
     298             : /** \brief Append str to this String.
     299             :  *
     300             :  * This function appends str to this String. The string is viewed as
     301             :  * ISO-8859-1. If another format is expected, make sure to use the
     302             :  * proper function.
     303             :  *
     304             :  * \param[in] str  The string to append to this String.
     305             :  *
     306             :  * \return A reference to this string.
     307             :  */
     308     1835904 : String& String::operator += (char const *str)
     309             : {
     310     1835904 :     String s(str);
     311     1835904 :     basic_string<as_char_t>::operator += (s);
     312     1835904 :     return *this;
     313             : }
     314             : 
     315             : 
     316             : /** \brief Append str to this String.
     317             :  *
     318             :  * This function appends str to this String. The string is viewed as
     319             :  * UTF-16 if wchar_t is 2 bytes, and UTF-32 if wchar_t is 4 bytes.
     320             :  * If another format is expected, make sure to use the proper function.
     321             :  *
     322             :  * \param[in] str  The string to append to this String.
     323             :  *
     324             :  * \return A reference to this string.
     325             :  *
     326             :  * \sa from_wchar()
     327             :  */
     328     1112063 : String& String::operator += (wchar_t const *str)
     329             : {
     330     1112063 :     String s(str);
     331     1112063 :     basic_string<as_char_t>::operator += (s);
     332     1112063 :     return *this;
     333             : }
     334             : 
     335             : 
     336             : /** \brief Append str to this String.
     337             :  *
     338             :  * This function appends str to this String. The string is viewed as
     339             :  * UTF-32. If another format is expected, make sure to use the
     340             :  * proper function.
     341             :  *
     342             :  * \param[in] str  The string to append to this String.
     343             :  *
     344             :  * \return A reference to this string.
     345             :  */
     346     1112063 : String& String::operator += (as_char_t const *str)
     347             : {
     348     1112063 :     basic_string<as_char_t>::operator += (str);
     349     1112063 :     return *this;
     350             : }
     351             : 
     352             : 
     353             : /** \brief Append str to this String.
     354             :  *
     355             :  * This function appends str to this String. The string is viewed as
     356             :  * ISO-8859-1. If another format is expected, make sure to use the
     357             :  * proper function.
     358             :  *
     359             :  * \param[in] str  The string to append to this String.
     360             :  *
     361             :  * \return A reference to this string.
     362             :  */
     363     1176797 : String& String::operator += (std::string const& str)
     364             : {
     365     1176797 :     String s(str);
     366     1176797 :     basic_string<as_char_t>::operator += (s);
     367     1176797 :     return *this;
     368             : }
     369             : 
     370             : 
     371             : /** \brief Append str to this String.
     372             :  *
     373             :  * This function appends str to this String. The string is viewed as
     374             :  * UTF-16 if wchar_t is 2 bytes, and UTF-32 if wchar_t is 4 bytes.
     375             :  * If another format is expected, make sure to use the proper function.
     376             :  *
     377             :  * \param[in] str  The string to append to this String.
     378             :  *
     379             :  * \return A reference to this string.
     380             :  */
     381     1048576 : String& String::operator += (std::wstring const& str)
     382             : {
     383     1048576 :     String s(str);
     384     1048576 :     basic_string<as_char_t>::operator += (s);
     385     1048576 :     return *this;
     386             : }
     387             : 
     388             : 
     389             : /** \brief Append str to this String.
     390             :  *
     391             :  * This function append str to this String. The string is viewed as
     392             :  * UTF-32. If another format is expected, make sure to use the
     393             :  * proper function.
     394             :  *
     395             :  * \param[in] str  The string to append to this String.
     396             :  *
     397             :  * \return A reference to this string.
     398             :  */
     399      307638 : String& String::operator += (std::basic_string<as_char_t> const& str)
     400             : {
     401      307638 :     basic_string<as_char_t>::operator += (str);
     402      307638 :     return *this;
     403             : }
     404             : 
     405             : 
     406             : /** \brief Append c to this String.
     407             :  *
     408             :  * This function append c to this String. The character is viewed as
     409             :  * UTF-32. If another format is expected, make sure to use the
     410             :  * proper function.
     411             :  *
     412             :  * \param[in] c  The character to append to this String.
     413             :  *
     414             :  * \return A reference to this string.
     415             :  */
     416   150584091 : String& String::operator += (as_char_t const c)
     417             : {
     418   150584091 :     basic_string<as_char_t>::operator += (c);
     419   150584091 :     return *this;
     420             : }
     421             : 
     422             : 
     423             : /** \brief Append c to this String.
     424             :  *
     425             :  * This function append c to this String. The character is viewed as
     426             :  * ISO-8859-1. If another format is expected, make sure to use the
     427             :  * proper function.
     428             :  *
     429             :  * \param[in] c  The character to append to this String.
     430             :  *
     431             :  * \return A reference to this string.
     432             :  */
     433    27116589 : String& String::operator += (char const c)
     434             : {
     435    27116589 :     basic_string<as_char_t>::operator += (static_cast<as_char_t>(static_cast<unsigned char>(c)));
     436    27116589 :     return *this;
     437             : }
     438             : 
     439             : 
     440             : /** \brief Append c to this String.
     441             :  *
     442             :  * This function append c to this String. The character is viewed as
     443             :  * UTF-32. If another format is expected, make sure to use the
     444             :  * proper function.
     445             :  *
     446             :  * \todo
     447             :  * Under MS-Windows the character is viewed as UTF-16, only we do
     448             :  * not properly manage surrogates in this case (i.e. if you just
     449             :  * added another surrogate, concatenate both surrogates in one
     450             :  * UTF-32 character.)
     451             :  *
     452             :  * \param[in] c  The character to append to this String.
     453             :  *
     454             :  * \return A reference to this string.
     455             :  */
     456     1112063 : String& String::operator += (wchar_t const c)
     457             : {
     458             :     // TODO: cannot add surrogate in this way?
     459             :     //       (under MS-Windows, where wchar_t is 16 bits, this would be
     460             :     //       the only way to add large characters with wchar_t... we could
     461             :     //       save leads and when a tail arrives convert the character, but
     462             :     //       that's rather unsafe...)
     463     1112063 :     if(valid_character(c))
     464             :     {
     465     1112063 :         basic_string<as_char_t>::operator += (static_cast<as_char_t>(c));
     466             :     }
     467     1112063 :     return *this;
     468             : }
     469             : 
     470             : 
     471             : /** \brief Copy a string in this String object.
     472             :  *
     473             :  * This function copies the ISO-8859-1 string pointer by str to
     474             :  * this string. The previous string is lost.
     475             :  *
     476             :  * If a null character is found, the copy stops.
     477             :  *
     478             :  * The \p len parameter can be used to limit the length of the copy.
     479             :  *
     480             :  * \note
     481             :  * This function can be called with a nullptr in \p str, in which
     482             :  * case the string is considered empty.
     483             :  *
     484             :  * \param[in] str  The input string to copy in this string.
     485             :  * \param[in] len  The maximum number of characters to copy, if -1, copy
     486             :  *                 up to the next null ('\0') character.
     487             :  *
     488             :  * \return Always STRING_GOOD since all bytes in ISO-8859-1 are all
     489             :  *         valid Unicode characters.
     490             :  */
     491   499667479 : String::conversion_result_t String::from_char(char const *str, int len)
     492             : {
     493   499667479 :     clear();
     494   499667479 :     if(str != nullptr)
     495             :     {
     496   499658121 :         if(len == -1)
     497             :         {
     498  6176380460 :             for(; *str != '\0'; ++str)
     499             :             {
     500  5678623590 :                 append(1, static_cast<unsigned char>(*str));
     501             :             }
     502             :         }
     503             :         else
     504             :         {
     505    80519239 :             for(; len > 0 && *str != '\0'; --len, ++str)
     506             :             {
     507    78617988 :                 append(1, static_cast<unsigned char>(*str));
     508             :             }
     509             :         }
     510             :     }
     511             : 
     512   499667479 :     return conversion_result_t::STRING_GOOD;
     513             : }
     514             : 
     515             : 
     516             : /** \brief Copy a wchar_t string to this String.
     517             :  *
     518             :  * This function copies a wchar_t string to this String. Internally we
     519             :  * only deal with UTF-32 characters. However, this function expects the
     520             :  * input to possibly be UTF-16 and converts surrogate characters to
     521             :  * UTF-32 as expected in UTF-16. (In other words, this functions works
     522             :  * under Linux and MS-Windows.)
     523             :  *
     524             :  * \note
     525             :  * This string is not modified if the input is not valid.
     526             :  *
     527             :  * \note
     528             :  * This function can be called with a nullptr in \p str, in which
     529             :  * case the string is considered empty.
     530             :  *
     531             :  * \param[in] str  The input string to copy in this string.
     532             :  * \param[in] len  The maximum number of characters to copy, if -1, copy
     533             :  *                 up to the next null ('\0') character.
     534             :  *
     535             :  * \return STRING_INVALID: if a character is not a valid UTF-32 character,
     536             :  *         STRING_BAD: if the input is invalid,
     537             :  *         STRING_END: could not be converted (not enough data for last
     538             :  *                     surrogate character),
     539             :  *         STRING_GOOD: the new string is valid.
     540             :  */
     541    26245112 : String::conversion_result_t String::from_wchar(wchar_t const *str, int len)
     542             : {
     543    52490224 :     struct out
     544             :     {
     545   160964619 :         String::conversion_result_t add(as_char_t c)
     546             :         {
     547   160964619 :             if(c >= 0xD800 && c < 0xDC00)
     548             :             {
     549    18874368 :                 f_lead_surrogate = c;
     550    18874368 :                 return conversion_result_t::STRING_END; // not an error unless it was the last character
     551             :             }
     552   142090251 :             else if(c >= 0xDC00 && c <= 0xDFFF)
     553             :             {
     554    18874368 :                 if(f_lead_surrogate == 0)
     555             :                 {
     556             :                     // invalid encoding
     557     2097152 :                     return conversion_result_t::STRING_BAD;
     558             :                 }
     559    16777216 :                 c = (((static_cast<as_char_t>(f_lead_surrogate) & 0x03FF) << 10) | (static_cast<as_char_t>(c) & 0x03FF)) + 0x10000;
     560             :                 // Note: UTF-16 characters cannot be invalid here
     561             :                 //       (unless we add code points such as 0xFFFE and 0xFFFF
     562             :                 //       among invalid characters)
     563    16777216 :                 if(!f_string.valid_character(c))
     564             :                 {
     565             :                     return conversion_result_t::STRING_INVALID; // LCOV_EXCL_LINE
     566             :                 }
     567    16777216 :                 f_lead_surrogate = 0;
     568             :             }
     569   139993099 :             f_string.append(1, c);
     570   139993099 :             return conversion_result_t::STRING_GOOD;
     571             :         }
     572             : 
     573             :         String          f_string;
     574             :         zas_char_t      f_lead_surrogate;
     575             :     };
     576             : 
     577    26245112 :     out o;
     578    26245112 :     String::conversion_result_t result(conversion_result_t::STRING_GOOD);
     579    26245112 :     if(str != nullptr)
     580             :     {
     581    26245110 :         if(len == -1)
     582             :         {
     583   168335279 :             for(; *str != '\0'; ++str)
     584             :             {
     585   147333053 :                 result = o.add(*str);
     586   147333053 :                 if(result != conversion_result_t::STRING_GOOD && result != conversion_result_t::STRING_END)
     587             :                 {
     588     1048576 :                     break;
     589             :                 }
     590             :             }
     591             :         }
     592             :         else
     593             :         {
     594    16777298 :             for(; len > 0 && *str != '\0'; --len, ++str)
     595             :             {
     596    13631566 :                 result = o.add(*str);
     597    13631566 :                 if(result != conversion_result_t::STRING_GOOD && result != conversion_result_t::STRING_END)
     598             :                 {
     599     1048576 :                     break;
     600             :                 }
     601             :             }
     602             :         }
     603             :     }
     604             : 
     605    26245112 :     if(result == conversion_result_t::STRING_GOOD)
     606             :     {
     607    22050808 :         *this = o.f_string;
     608             :     }
     609             : 
     610    26245112 :     return result;
     611             : }
     612             : 
     613             : 
     614             : /** \brief Copy an as_char_t string to this String.
     615             :  *
     616             :  * This function copies an as_char_t string to this String. Since an
     617             :  * as_char_t string has the same character type as a String, this copy
     618             :  * is straight forward.
     619             :  *
     620             :  * The copy stops as soon as a null ('\0') character is found.
     621             :  *
     622             :  * \note
     623             :  * If an error occurs, this String object is not modified.
     624             :  *
     625             :  * \note
     626             :  * This function can be called with a nullptr in \p str, in which
     627             :  * case the string is considered empty.
     628             :  *
     629             :  * \param[in] str  The input string to copy in this string.
     630             :  * \param[in] len  The maximum number of characters to copy, if -1, copy
     631             :  *                 up to the next null ('\0') character.
     632             :  *
     633             :  * \return STRING_INVALID: if the resulting character is not a valid UTF-32 character,
     634             :  *         STRING_GOOD: the new string is valid.
     635             :  */
     636     7723896 : String::conversion_result_t String::from_as_char(as_char_t const *str, int len)
     637             : {
     638     7723896 :     String s;
     639     7723896 :     if(str != nullptr)
     640             :     {
     641     7723894 :         if(len == -1)
     642             :         {
     643     5526020 :             for(; *str != '\0'; ++str)
     644             :             {
     645     3301784 :                 if(!valid_character(*str))
     646             :                 {
     647     1052094 :                     return conversion_result_t::STRING_INVALID;
     648             :                 }
     649     2249690 :                 s.append(1, *str);
     650             :             }
     651             :         }
     652             :         else
     653             :         {
     654    90727803 :             for(; len > 0 && *str != '\0'; --len, ++str)
     655             :             {
     656    86286835 :                 if(!valid_character(*str))
     657             :                 {
     658        6596 :                     return conversion_result_t::STRING_INVALID;
     659             :                 }
     660    86280239 :                 s.append(1, *str);
     661             :             }
     662             :         }
     663             :     }
     664             : 
     665     6665206 :     *this = s;
     666             : 
     667     6665206 :     return conversion_result_t::STRING_GOOD;
     668             : }
     669             : 
     670             : 
     671             : /** \brief Copy a UTF-8 string to this String.
     672             :  *
     673             :  * This function copies a string to this String. In this case
     674             :  * the input string is considered to be UTF-8.
     675             :  *
     676             :  * If you have an std::string, use the c_str() operation to call this
     677             :  * function.
     678             :  *
     679             :  * The copy stops as soon as a null ('\0') character is found.
     680             :  *
     681             :  * \note
     682             :  * If an error occurs, this String object is not modified.
     683             :  *
     684             :  * \param[in] str  The input string to copy in this string.
     685             :  * \param[in] len  The maximum number of characters to copy, if -1, copy
     686             :  *                 up to the next null ('\0') character.
     687             :  *
     688             :  * \return STRING_INVALID: if the resulting character is not a valid
     689             :  *                         UTF-32 character,
     690             :  *         STRING_BAD: if the input is invalid,
     691             :  *         STRING_END: could not be converted (not enough data for last
     692             :  *                     character),
     693             :  *         STRING_GOOD: the new string is valid.
     694             :  */
     695   281121108 : String::conversion_result_t String::from_utf8(char const *str, int len)
     696             : {
     697   281121108 :     String          result;
     698             :     unsigned char   c;
     699             :     as_char_t       w;
     700             :     int             l;
     701             : 
     702   281121108 :     if(str != nullptr)
     703             :     {
     704   281121106 :         if(len == -1)
     705             :         {
     706             :             // it's a bit of a waste, but makes it a lot easier
     707   281102109 :             len = std::char_traits<char>::length(str);
     708             :         }
     709             : 
     710  3239451961 :         while(len > 0)
     711             :         {
     712  2677737363 :             --len;
     713  2677737363 :             c = static_cast<unsigned char>(*str++);
     714             : 
     715  2677737363 :             if(c < 0x80)
     716             :             {
     717  2676088831 :                 w = c;
     718             :             }
     719             :             else
     720             :             {
     721     1648532 :                 if(c >= 0xC0 && c <= 0xDF)
     722             :                 {
     723        1953 :                     l = 1;
     724        1953 :                     w = c & 0x1F;
     725             :                 }
     726     1646579 :                 else if(c >= 0xE0 && c <= 0xEF)
     727             :                 {
     728       73755 :                     l = 2;
     729       73755 :                     w = c & 0x0F;
     730             :                 }
     731     1572824 :                 else if(c >= 0xF0 && c <= 0xF7)
     732             :                 {
     733     1049769 :                     l = 3;
     734     1049769 :                     w = c & 0x07;
     735             :                 }
     736             :                 // The following are not valid UTF-8 characters, these are
     737             :                 // refused below as we verify the validity of the character
     738      523055 :                 else if(c >= 0xF8 && c <= 0xFB)
     739             :                 {
     740       15726 :                     l = 4;
     741       15726 :                     w = c & 0x03;
     742             :                 }
     743      507329 :                 else if(c >= 0xFC && c <= 0xFD)
     744             :                 {
     745      507257 :                     l = 5;
     746      507257 :                     w = c & 0x01;
     747             :                 }
     748             :                 else
     749             :                 {
     750             :                     // invalid UTF-8 sequence
     751          72 :                     return conversion_result_t::STRING_BAD;
     752             :                 }
     753     1648460 :                 if(len < l)
     754             :                 {
     755             :                     // not enough character
     756         190 :                     return conversion_result_t::STRING_END;
     757             :                 }
     758     1648270 :                 len -= l;
     759     9193812 :                 while(l > 0)
     760             :                 {
     761     5897338 :                     c = static_cast<unsigned char>(*str++);
     762     5897338 :                     if(c < 0x80 || c > 0xBF)
     763             :                     {
     764          66 :                         return conversion_result_t::STRING_BAD;
     765             :                     }
     766     5897272 :                     l--;
     767     5897272 :                     w = (w << 6) | (c & 0x3F);
     768             :                 }
     769             :             }
     770  2677737035 :             if(!valid_character(w))
     771             :             {
     772      527286 :                 return conversion_result_t::STRING_INVALID;
     773             :             }
     774  2677209749 :             result.append(1, w);
     775             :         }
     776             :     }
     777             : 
     778             :     // it worked, we can smash this String
     779   280593494 :     *this = result;
     780             : 
     781   280593494 :     return conversion_result_t::STRING_GOOD;
     782             : }
     783             : 
     784             : 
     785             : /** \brief Compare this String against a char const * string.
     786             :  *
     787             :  * This function compares an ISO-8859-1 string against this String.
     788             :  * If you have a UTF-8 string, make sure to use from_utf8() first
     789             :  * and then compare the two String's against each other.
     790             :  *
     791             :  * \param[in] str  The string to compare as ISO-8859-1.
     792             :  *
     793             :  * \return true if both strings are equal.
     794             :  */
     795   211882460 : bool String::operator == (char const *str) const
     796             : {
     797   211882460 :     String s(str);
     798   211882460 :     return *this == s;
     799             : }
     800             : 
     801             : 
     802             : /** \brief Compare a String against a char const * string.
     803             :  *
     804             :  * This function compares an ISO-8859-1 string against a String.
     805             :  * If you have a UTF-8 string, make sure to use from_utf8() first
     806             :  * and then compare the two String's against each other.
     807             :  *
     808             :  * \param[in] str  The string to compare as ISO-8859-1.
     809             :  * \param[in] string  The String to compare with.
     810             :  *
     811             :  * \return true if both strings are equal.
     812             :  */
     813    61101240 : bool operator == (char const *str, String const& string)
     814             : {
     815    61101240 :     String s(str);
     816    61101240 :     return s == string;
     817             : }
     818             : 
     819             : 
     820             : /** \brief Compare this String against a char const * string.
     821             :  *
     822             :  * This function compares an ISO-8859-1 string against this String.
     823             :  * If you have a UTF-8 string, make sure to use from_utf8() first
     824             :  * and then compare the two String's against each other.
     825             :  *
     826             :  * \param[in] str  The string to compare as ISO-8859-1.
     827             :  *
     828             :  * \return true if both strings are not equal.
     829             :  */
     830     1323510 : bool String::operator != (char const *str) const
     831             : {
     832     1323510 :     String s(str);
     833     1323510 :     return *this != s;
     834             : }
     835             : 
     836             : 
     837             : /** \brief Compare a String against a char const * string.
     838             :  *
     839             :  * This function compares an ISO-8859-1 string against a String.
     840             :  * If you have a UTF-8 string, make sure to use from_utf8() first
     841             :  * and then compare the two String's against each other.
     842             :  *
     843             :  * \param[in] str  The string to compare as ISO-8859-1.
     844             :  * \param[in] string  The String to compare with.
     845             :  *
     846             :  * \return true if both strings are not equal.
     847             :  */
     848     4269176 : bool operator != (char const *str, String const& string)
     849             : {
     850     4269176 :     String s(str);
     851     4269176 :     return s != string;
     852             : }
     853             : 
     854             : 
     855             : /** \brief Check validity of the string.
     856             :  *
     857             :  * This function checks all the characters for validity. This is based
     858             :  * on a Unicode piece of code that clearly specifies that a certain
     859             :  * number of characters just cannot be used (i.e. this includes UTF-16
     860             :  * surrogates, and any value larger than 0x10FFFF or negative numbers.)
     861             :  *
     862             :  * Note that the null character '\0' is considered valid and part of
     863             :  * the string, however, anything after that character is ignored.
     864             :  *
     865             :  * \todo
     866             :  * We are actually transforming the String object to properly check
     867             :  * all of its characters as added to the buffer so this function
     868             :  * should become obsolete at some point.
     869             :  *
     870             :  * \return true if the string is considered valid.
     871             :  *
     872             :  * \sa valid_character()
     873             :  */
     874    31760324 : bool String::valid() const
     875             : {
     876   261210761 :     for(as_char_t  const *s(c_str()); *s != '\0'; ++s)
     877             :     {
     878   229452937 :         if(!valid_character(*s))
     879             :         {
     880        2500 :             return false;
     881             :         }
     882             :     }
     883             : 
     884    31757824 :     return true;
     885             : }
     886             : 
     887             : 
     888             : /** \brief Check whether a character is considered valid.
     889             :  *
     890             :  * The UTF-32 type is limited in the code points that can be used. This
     891             :  * function returns true if the code point of \p c is considered valid.
     892             :  *
     893             :  * Characters in UTF-32 must be defined between 0 and 0x10FFFF inclusive,
     894             :  * except for code points 0xD800 to 0xDFFF which are used as surrogate
     895             :  * for UTF-16 encoding.
     896             :  *
     897             :  * \param[in] c  The character to be checked.
     898             :  *
     899             :  * \return true if c is considered valid.
     900             :  *
     901             :  * \sa valid()
     902             :  */
     903  3223146622 : bool String::valid_character(as_char_t c)
     904             : {
     905             :     // Note: as_char_t is an int32_t (i.e. a signed value)
     906    78449473 :     return (c < 0xD800 || c > 0xDFFF)   // UTF-16 surrogates
     907  3223126142 :         && c < 0x110000                 // too large?
     908  6441744919 :         && c >= 0;                      // too small?
     909             : }
     910             : 
     911             : 
     912             : /** \brief Check whether this string represents a valid integer.
     913             :  *
     914             :  * This function checks the strings to see whether it represents a
     915             :  * valid integer. The function supports decimal and hexadecimal
     916             :  * numbers. Octals are not supported because JavaScript does not
     917             :  * convert numbers that start with a 0 as if these were octal
     918             :  * numbers.
     919             :  *
     920             :  * \li Decimal number: [-+]?[0-9]+
     921             :  * \li Hexadecimal number: [-+]?0[xX][0-9a-fA-F]+
     922             :  *
     923             :  * \return true if the string represents an integer.
     924             :  */
     925     1531212 : bool String::is_int64() const
     926             : {
     927             :     struct hex_test
     928             :     {
     929     3182106 :         static bool is_hex(as_char_t c)
     930             :         {
     931     2581977 :             return (c >= '0' && c <= '9')
     932     1418889 :                 || (c >= 'a' && c <= 'f')
     933     3782709 :                 || (c >= 'A' && c <= 'F');
     934             :         }
     935             :     };
     936             : 
     937     1531212 :     as_char_t const *s(c_str());
     938             : 
     939             :     // sign
     940             :     // TODO: in strict mode hexadecimal numbers cannot be signed
     941     1531212 :     if(*s == '-' || *s == '+')
     942             :     {
     943      915440 :         ++s;
     944             :     }
     945             : 
     946             :     // handle special case of hexadecimal
     947     1531212 :     if(*s == '0')
     948             :     {
     949      600641 :         ++s;
     950      600641 :         if(*s == 'x' || *s == 'X')
     951             :         {
     952      600609 :             if(s[1] == '\0')
     953             :             {
     954             :                 // just "0x" or "0X" is not a valid number
     955           6 :                 return false;
     956             :             }
     957      600603 :             for(++s; hex_test::is_hex(*s); ++s);
     958      600603 :             return *s == '\0';
     959             :         }
     960             :         // no octal support in strings
     961             :     }
     962             : 
     963             :     // number
     964      930603 :     for(; *s >= '0' && *s <= '9'; ++s);
     965             : 
     966      930603 :     return *s == '\0';
     967             : }
     968             : 
     969             : 
     970             : /** \brief Check whether the string represents a valid floating pointer number.
     971             :  *
     972             :  * This function parses the string to see whether it represents a valid
     973             :  * floating pointer number: an integral part, an optional decimal part,
     974             :  * and an optional signed exponent.
     975             :  *
     976             :  * The sign of the exponent is also itself optional.
     977             :  *
     978             :  * Note that this function returns true if the number is an integer in
     979             :  * decimal number representation, however, it will return false for
     980             :  * hexadecimal numbers. You may also call the is_number() function to
     981             :  * know whether a string represents either a decimal number or a floating
     982             :  * point number.
     983             :  *
     984             :  * \li A floating point number: [-+]?[0-9]+(\.[0-9]+)?([eE]?[0-9]+)?
     985             :  *
     986             :  * \todo
     987             :  * Ameliorate the test so if no digits are present where required then
     988             :  * an error is emitted (i.e. you may have '0.', '.0' but not just '.';
     989             :  * same problem with exponent).
     990             :  *
     991             :  * \return true if the string represents a floating point number.
     992             :  */
     993     1253149 : bool String::is_float64() const
     994             : {
     995     1253149 :     as_char_t const *s(c_str());
     996             : 
     997             :     // sign
     998     1253149 :     if(*s == '-' || *s == '+')
     999             :     {
    1000      615290 :         ++s;
    1001             :     }
    1002             : 
    1003             :     // integral part
    1004     1253149 :     for(; *s >= '0' && *s <= '9'; ++s);
    1005             : 
    1006             :     // if '.' check for a decimal part
    1007     1253149 :     if(*s == '.')
    1008             :     {
    1009      227633 :         for(++s; *s >= '0' && *s <= '9'; ++s);
    1010             :     }
    1011             : 
    1012             :     // if 'e' check for an exponent
    1013     1253149 :     if(*s == 'e' || *s == 'E')
    1014             :     {
    1015       61355 :         ++s;
    1016       61355 :         if(*s == '+' || *s == '-')
    1017             :         {
    1018             :             // skip the sign
    1019       43940 :             ++s;
    1020             :         }
    1021       61355 :         for(; *s >= '0' && *s <= '9'; ++s);
    1022             :     }
    1023             : 
    1024     1253149 :     return *s == '\0';
    1025             : }
    1026             : 
    1027             : 
    1028             : /** \brief Check whether this string represents a number.
    1029             :  *
    1030             :  * This function checks whether this string represents a number.
    1031             :  * This means it returns true in the following cases:
    1032             :  *
    1033             :  * \li The string represents a decimal number ([-+]?[0-9]+)
    1034             :  * \li The string represents an hexadecimal number ([-+]?0[xX][0-9a-fA-F]+)
    1035             :  * \li The string represents a floating point number ([-+]?[0-9]+(\.[0-9]+)?([eE]?[0-9]+)?)
    1036             :  *
    1037             :  * Unfortunately, JavaScript does not understand "true", "false",
    1038             :  * and "null" as numbers (even though isNaN(true), isNaN(false),
    1039             :  * and isNaN(null) all return true.)
    1040             :  *
    1041             :  * \return true if this string represents a valid number
    1042             :  */
    1043      510407 : bool String::is_number() const
    1044             : {
    1045             :     // floats support integers so this is true if this string is an int64
    1046      510407 :     return is_int64() || is_float64();
    1047             : }
    1048             : 
    1049             : 
    1050             : /** \brief Convert a string to an integer number.
    1051             :  *
    1052             :  * This function verifies that the string represents a valid integer
    1053             :  * number, if so, it converts it to such and returns the result.
    1054             :  *
    1055             :  * If the string does not represent a valid integer, then the function
    1056             :  * should return NaN. Unfortunately, there is not NaN integer. Instead
    1057             :  * it will return zero (0) or it will raise an exception.
    1058             :  *
    1059             :  * \note
    1060             :  * When used by the lexer, it should always work since the lexer reads
    1061             :  * integers with the same expected syntax.
    1062             :  *
    1063             :  * \exception exception_internal_error
    1064             :  * The string is not empty and it does not represent what is considered
    1065             :  * a valid JavaScript integer.
    1066             :  *
    1067             :  * \return The string converted to an integer.
    1068             :  */
    1069      510402 : Int64::int64_type String::to_int64() const
    1070             : {
    1071      510402 :     if(empty())
    1072             :     {
    1073           2 :         return 0;
    1074             :     }
    1075             : 
    1076      510400 :     if(is_int64())
    1077             :     {
    1078             :         // Check whether it is an hexadecimal number, because if so
    1079             :         // we use base 16. We want to force the base because we do
    1080             :         // not support base 8 which std::stoll() could otherwise
    1081             :         // switch to when we have a number that starts with zero.
    1082      500045 :         as_char_t const *s(c_str());
    1083      500045 :         if(*s == '+' || *s == '-')
    1084             :         {
    1085      300156 :             ++s;
    1086             :         }
    1087      500045 :         if(s[0] == '0' && (s[1] == 'x' || s[1] == 'X'))
    1088             :         {
    1089             :             // the strtoll() function supports the sign
    1090      200001 :             return std::stoll(to_utf8(), nullptr, 16);
    1091             :         }
    1092      300044 :         return std::stoll(to_utf8(), nullptr, 10);
    1093             :     }
    1094             : 
    1095             :     // this is invalid
    1096       10355 :     throw exception_internal_error("String::to_int64() called with an invalid integer");
    1097             : }
    1098             : 
    1099             : 
    1100             : /** \brief Convert a string to a floating point number.
    1101             :  *
    1102             :  * This function verifies that the string represents a valid floating
    1103             :  * point number, if so, it converts it to such and returns the result.
    1104             :  *
    1105             :  * If the string does not represent a valid floating point, then the
    1106             :  * function returns NaN.
    1107             :  *
    1108             :  * \warning
    1109             :  * On an empty string, this function returns 0.0 and not NaN as expected
    1110             :  * in JavaScript.
    1111             :  *
    1112             :  * \note
    1113             :  * When used by the lexer, it should always work since the lexer reads
    1114             :  * floating points with the same expected syntax.
    1115             :  *
    1116             :  * \return The string as a floating point.
    1117             :  */
    1118      732390 : Float64::float64_type String::to_float64() const
    1119             : {
    1120      732390 :     if(empty())
    1121             :     {
    1122           9 :         return 0.0;
    1123             :     }
    1124             : 
    1125      732381 :     if(is_float64())
    1126             :     {
    1127      532162 :         return std::stod(to_utf8(), 0);
    1128             :     }
    1129             : 
    1130      200219 :     return std::numeric_limits<double>::quiet_NaN();
    1131             : }
    1132             : 
    1133             : 
    1134             : /** \brief Check whether the string is considered true.
    1135             :  *
    1136             :  * A string that is empty is considered false. Any other string is
    1137             :  * considered true.
    1138             :  *
    1139             :  * \return true if the string is not empty.
    1140             :  */
    1141      510412 : bool String::is_true() const
    1142             : {
    1143      510412 :     if(empty())
    1144             :     {
    1145           9 :         return false;
    1146             :     }
    1147             : // Not too sure where I picked that up, but the documentation clearly says
    1148             : // that an empty string is false, anything else is true...
    1149             : //    if(is_int64())
    1150             : //    {
    1151             : //        return to_int64() != 0;
    1152             : //    }
    1153             : //    if(is_float64())
    1154             : //    {
    1155             : //#pragma GCC diagnostic push
    1156             : //#pragma GCC diagnostic ignored "-Wfloat-equal"
    1157             : //        return strtod(to_utf8().c_str(), 0) != 0.0;
    1158             : //#pragma GCC diagnostic pop
    1159             : //    }
    1160      510403 :     return true;
    1161             : }
    1162             : 
    1163             : 
    1164             : /** \brief Calculate the length if converted to UTF-8.
    1165             :  *
    1166             :  * This function calculates the length necessary to convert the string
    1167             :  * to UTF-8.
    1168             :  *
    1169             :  * \return The length if converted to UTF-8.
    1170             :  */
    1171     2226907 : ssize_t String::utf8_length() const
    1172             : {
    1173     2226907 :     ssize_t     r(0);
    1174             :     as_char_t   c;
    1175             : 
    1176     4608613 :     for(as_char_t const *wc(c_str()); *wc != '\0'; ++wc)
    1177             :     {
    1178             :         // get one wide character
    1179     2384206 :         c = *wc;
    1180     2384206 :         if(!valid_character(c))
    1181             :         {
    1182             :             // character is not valid UTF-32
    1183        2500 :             return -1;
    1184             :         }
    1185             : 
    1186             :         // simulate encoding
    1187     2381706 :         if(c < 0x80)
    1188             :         {
    1189         331 :             r += 1;
    1190             :         }
    1191     2381375 :         else if(c < 0x800)
    1192             :         {
    1193        4126 :             r += 2;
    1194             :         }
    1195     2377249 :         else if(c < 0x10000)
    1196             :         {
    1197      131555 :             r += 3;
    1198             :         }
    1199             :         else //if(c < 0x200000)
    1200             :         {
    1201     2245694 :             r += 4;
    1202             :         }
    1203             :     }
    1204             : 
    1205     2224407 :     return r;
    1206             : }
    1207             : 
    1208             : 
    1209             : /** \brief Convert a string to UTF-8 and return the result.
    1210             :  *
    1211             :  * This function converts this String in UTF-8 using an std::string
    1212             :  * and then returns the result.
    1213             :  *
    1214             :  * \warning
    1215             :  * Remember that you cannot use a UTF-8 as direct input of a constructor
    1216             :  * or assignment operator of the String class. Instead, make sure to use
    1217             :  * the from_utf8() function.
    1218             :  *
    1219             :  * \note
    1220             :  * The function skips any character considered invalid. If you want to
    1221             :  * know whether the resulting UTF-8 string is an exact representation
    1222             :  * of this String, then first call the valid() function on the source.
    1223             :  *
    1224             :  * \todo
    1225             :  * This String object is expected to not have any invalid characters
    1226             :  * so this function always returns the conversion even if it finds
    1227             :  * invalid characters.
    1228             :  *
    1229             :  * \return The String converted to UTF-8 and saved in an std::string.
    1230             :  */
    1231     9495015 : std::string String::to_utf8() const
    1232             : {
    1233     9495015 :     std::string     result;
    1234             :     as_char_t       c;
    1235             : 
    1236             :     // make sure we always have a null at the end...
    1237   206284876 :     for(as_char_t const *wc = c_str(); *wc != '\0'; ++wc)
    1238             :     {
    1239             :         // get one wide character
    1240   196789861 :         c = *wc;
    1241   196789861 :         if(valid_character(c))
    1242             :         {
    1243             :             // only encode characters considered valid
    1244   196789861 :             if(c < 0x80)
    1245             :             {
    1246             :                 /* this will also encode '\0'... */
    1247   193738252 :                 result.append(1, c);
    1248             :             }
    1249     3051609 :             else if(c < 0x800)
    1250             :             {
    1251      830504 :                 result.append(1, (c >> 6) | 0xC0);
    1252      830504 :                 result.append(1, (c & 0x3F) | 0x80);
    1253             :             }
    1254     2221105 :             else if(c < 0x10000)
    1255             :             {
    1256      122940 :                 result.append(1, (c >> 12) | 0xE0);
    1257      122940 :                 result.append(1, ((c >> 6) & 0x3F) | 0x80);
    1258      122940 :                 result.append(1, (c & 0x3F) | 0x80);
    1259             :             }
    1260             :             else
    1261             :             {
    1262     2098165 :                 result.append(1, (c >> 18) | 0xF0);
    1263     2098165 :                 result.append(1, ((c >> 12) & 0x3F) | 0x80);
    1264     2098165 :                 result.append(1, ((c >> 6) & 0x3F) | 0x80);
    1265     2098165 :                 result.append(1, (c & 0x3F) | 0x80);
    1266             :             }
    1267             :         }
    1268             :     }
    1269             : 
    1270     9495015 :     return result;
    1271             : }
    1272             : 
    1273             : 
    1274             : /** \brief Make a simplified copy of this string.
    1275             :  *
    1276             :  * This function makes a copy of this string while removing spaces
    1277             :  * from the start, the end, and within the string keep a single
    1278             :  * space.
    1279             :  *
    1280             :  * If the string starts with a number, then only the number is kept.
    1281             :  *
    1282             :  * \note
    1283             :  * This function is primarily used to compare a string using the
    1284             :  * smart match operator.
    1285             :  *
    1286             :  * \return The simplified string.
    1287             :  */
    1288          31 : String String::simplified() const
    1289             : {
    1290          31 :     String result;
    1291             : 
    1292             :     // TBD: should we limit the space check to spaces recognized by EMCAScript?
    1293          31 :     as_char_t const *wc = c_str();
    1294          99 :     while(*wc != '\0' && iswspace(*wc))
    1295             :     {
    1296          37 :         ++wc;
    1297             :     }
    1298             : 
    1299             :     // accept a signed number
    1300          31 :     if(*wc == '-' || *wc == '+')
    1301             :     {
    1302           6 :         result += *wc;
    1303           6 :         ++wc;
    1304             :     }
    1305          31 :     if(*wc >= '0' && *wc <= '9')
    1306             :     {
    1307             :         // read the number, ignore the rest
    1308          17 :         result += *wc;
    1309          17 :         ++wc;
    1310          49 :         while(*wc >= '0' && *wc <= '9')
    1311             :         {
    1312          15 :             result += *wc;
    1313          15 :             ++wc;
    1314             :         }
    1315          34 :         if(*wc == '.')
    1316             :         {
    1317          15 :             result += *wc;
    1318          15 :             ++wc;
    1319          70 :             while(*wc >= '0' && *wc <= '9')
    1320             :             {
    1321          40 :                 result += *wc;
    1322          40 :                 ++wc;
    1323             :             }
    1324          15 :             if(*wc == 'e' || *wc == 'E')
    1325             :             {
    1326           3 :                 result += *wc;
    1327           3 :                 ++wc;
    1328           3 :                 if(*wc == '+' || *wc == '-')
    1329             :                 {
    1330           2 :                     result += *wc;
    1331           2 :                     ++wc;
    1332             :                 }
    1333           9 :                 while(*wc >= '0' && *wc <= '9')
    1334             :                 {
    1335           3 :                     result += *wc;
    1336           3 :                     ++wc;
    1337             :                 }
    1338             :             }
    1339             :         }
    1340             :         // ignore anything else
    1341             :     }
    1342             :     else
    1343             :     {
    1344             :         // read the string, but simplify the spaces
    1345          14 :         bool found_space(false);
    1346          90 :         for(; *wc != '\0'; ++wc)
    1347             :         {
    1348          76 :             if(iswspace(*wc))
    1349             :             {
    1350          25 :                 found_space = true;
    1351             :             }
    1352             :             else
    1353             :             {
    1354          51 :                 if(found_space)
    1355             :                 {
    1356           3 :                     result += ' ';
    1357           3 :                     found_space = false;
    1358             :                 }
    1359          51 :                 result += *wc;
    1360             :             }
    1361             :         }
    1362             :     }
    1363             : 
    1364          31 :     if(result.empty())
    1365             :     {
    1366             :         // make an empty string similar to zero
    1367           2 :         result = "0";
    1368             :     }
    1369             : 
    1370          31 :     return result;
    1371             : }
    1372             : 
    1373             : 
    1374             : /** \brief Send string to output stream.
    1375             :  *
    1376             :  * This function sends this String to the specified output buffer. It is
    1377             :  * to ease the output of a string to stream such as std::cout and std::cerr.
    1378             :  *
    1379             :  * \param[in,out] out  Stream where the string is printed.
    1380             :  * \param[in] str  The string to be printed out.
    1381             :  *
    1382             :  * \return A reference to the \p out stream.
    1383             :  */
    1384      627083 : std::ostream& operator << (std::ostream& out, String const& str)
    1385             : {
    1386             :     // Note: under MS-Windows we'd need to use str.to_wchar() instead
    1387      627083 :     out << str.to_utf8();
    1388      627083 :     return out;
    1389             : }
    1390             : 
    1391             : 
    1392          63 : }
    1393             : // namespace as2js
    1394             : 
    1395             : // vim: ts=4 sw=4 et

Generated by: LCOV version 1.10