Bug Summary

File:platform/mac/avmshell/../../../core/StringObject.cpp
Location:line 1257, column 13
Description:Value stored to 'charsUsed' is never read

Annotated Source Code

1/* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2/* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
5 *
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
10 *
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
15 *
16 * The Original Code is[Open Source Virtual Machine.].
17 *
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2008
21 * the Initial Developer. All Rights Reserved.
22 *
23 * Contributor(s):
24 * Michel Daumling
25 *
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
37 *
38 * ***** END LICENSE BLOCK ***** */
39
40#include "avmplus.h"
41
42// This is the maximum value for the characters Left field in the
43// m_bitsAndFlags field of the String instance. Strings grow by
44// by doubling the available buffer size until the characters left
45// exceeds the size of the field. From there on, the buffer grows
46// linear. Note that this number is divided by two to make sure that
47// the counter actually, spans the entire buffer area; if this would
48// not be done, the calculation of characters left (using GC::Size())
49// would return a wrong number.
50#define TSTR_MAX_CHARSLEFT((uint32_t)TSTR_CHARSLEFT_MASK >> TSTR_CHARSLEFT_SHIFT >>
1)
((uint32_t)TSTR_CHARSLEFT_MASK >> TSTR_CHARSLEFT_SHIFT >> 1)
51
52// This is the maximum value of extra bytes to allocate during a
53// string concatenation operation if PleaseAlloc() fails. The number
54// is quite small to not kill the app just because of a large extra
55// chunk of RAM being allocated just in case there are more concatenation
56// operations.
57#define TSTR_MAX_LOMEM_EXTRABYTES16384 16384
58
59// Never allocate dynamic strings smaller than this (in bytes)
60#define TSTR_MIN_DYNAMIC_ALLOCATION32 32
61
62// in fixDependentString, don't bother going dependent->dynamic if the memory
63// we would save is < this (in bytes)
64#define TSTR_DEPENDENT_STRING_NUISANCE_SAVINGSint32_t(sizeof(String)) int32_t(sizeof(String))
65
66using namespace MMgc;
67
68namespace avmplus
69{
70 // Use this constant for 16 and 32 bit strings of zero length
71 // we do not want any string to contain NULL buffer pointers
72 union Zero
73 {
74 uint32_t u32;
75 wchar u16;
76 uint8_t u8;
77 char c8;
78 };
79 static const Zero k_zero = { 0 };
80
81/////////////////////////// Helpers: Widening //////////////////////////////
82
83 REALLY_INLINEinline __attribute__((always_inline)) void _widen8_16(const uint8_t* src, wchar* dst, int32_t len)
84 {
85 while (len-- > 0)
86 *dst++ = wchar(*src++);
87 }
88
89/////////////////////////// Helpers: Narrowing //////////////////////////////
90
91 REALLY_INLINEinline __attribute__((always_inline)) void _narrow16_8(const wchar* src, uint8_t* dst, int32_t len)
92 {
93 while (len-- > 0)
94 {
95 AvmAssert(*src <= 0xFF)do { } while (0);
96 *dst++ = (uint8_t) *src++;
97 }
98 }
99
100/////////////////////////// Helpers: Copying ///////////////////////////////
101
102 // Copy the contents of the given buffer to another buffer, given two widths.
103
104 static void* _copyBuffers(const void* src, void* dst, int32_t srcLen, String::Width srcWidth, String::Width dstWidth)
105 {
106 if (srcWidth == dstWidth)
107 VMPI_memcpy::memcpy(dst, src, srcLen << srcWidth);
108 else if (srcWidth == String::k8)
109 _widen8_16((const uint8_t*) src, (wchar*) dst, srcLen);
110 else
111 _narrow16_8((const wchar*) src, (uint8_t*) dst, srcLen);
112 // return the new buffer pointer
113 return (char*) dst + (srcLen << dstWidth);
114 }
115
116#ifdef _DEBUG
117 void String::verify7bit() const
118 {
119 if (!(m_bitsAndFlags & TSTR_7BIT_FLAG))
120 return;
121
122 if (getWidth() == k8)
123 {
124 Pointers ptrs(this);
125 for (int32_t i = 0; i < m_length; ++i)
126 {
127 if (ptrs.p8[i] > 127)
128 {
129 AvmAssert(!"hi bit found on string with TSTR_7BIT_FLAG set")do { } while (0);
130 }
131 }
132 }
133 else
134 {
135 AvmAssert(!"only k8 strings should set TSTR_7BIT_FLAG")do { } while (0);
136 }
137 }
138#endif
139
140/////////////////////////// Helper: get AvmCore /////////////////////////////
141
142 REALLY_INLINEinline __attribute__((always_inline)) MMgc::GC* _gc(const String* s)
143 {
144 return MMgc::GC::GetGC(s);
145 }
146
147 REALLY_INLINEinline __attribute__((always_inline)) AvmCore* _core(const String* s)
148 {
149 MMgc::GC *gc = MMgc::GC::GetGC(s);
150 return gc->core();
151 }
152
153/////////////////////////// Helpers: templated functions /////////////////////////////
154
155 //
156 // use typetraits to generate a compile-time error if you attempt to use a
157 // non-unsigned char type as an argument to these.
158 //
159 // Note that we deliberately are using a bitwise &, not a logical && here...
160 // GCC 4.3 and later, with -Wlogical-op enabled, will complain about the &&
161 // construct with
162 //
163 // "warning: logical '&&' with non-zero constant will always evaluate as true"
164 //
165 // and since many embedders compile with warnings-as-errors enabled, this crushes
166 // the build. This seems like a highly questionable warning to me, but since there
167 // is a valid workaround, we'll go with it rather than requiring embedders to disable
168 // that warning.
169 //
170 #define PREVENT_SIGNED_CHAR_PTR(TYPE){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, TYPE> is_char; typedef ::
MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_170; }
\
171 { \
172 typedef MMgc::is_same<char, uint8_t> is_char_unsigned; \
173 typedef MMgc::is_same<char, TYPE> is_char; \
174 MMGC_STATIC_ASSERT(!(is_char::value & !is_char_unsigned::value))typedef ::MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_174
; \
175 }
176
177 // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions.
178 template <typename STR1, typename STR2>
179 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) boolbool equalsImpl(const STR1* str1, const STR2* str2, int32_t len)
180 {
181 PREVENT_SIGNED_CHAR_PTR(STR1){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR1> is_char; typedef ::
MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_181; }
182 PREVENT_SIGNED_CHAR_PTR(STR2){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR2> is_char; typedef ::
MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_182; }
183
184 for (int32_t j = 0; j < len; j++)
185 {
186 if (str1[j] != str2[j])
187 return falsefalse;
188 }
189 return truetrue;
190 }
191
192 // NOTE: this routine cannot use memcmp(), as the test
193 // ecma3/String/localeCompare_rt.as depends on the difference
194 // in character values, and memcmp() does not guarantee to
195 // return this value
196
197 // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions.
198 template <typename STR1, typename STR2>
199 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t compareImpl(const STR1* str1, const STR2* str2, int32_t len)
200 {
201 PREVENT_SIGNED_CHAR_PTR(STR1){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR1> is_char; typedef ::
MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_201; }
202 PREVENT_SIGNED_CHAR_PTR(STR2){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR2> is_char; typedef ::
MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_202; }
203
204 int32_t res = 0;
205 while (len-- > 0 && !res)
206 {
207 res = int32_t(*str2++ - *str1++);
208 }
209 return res;
210 }
211
212 // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions.
213 template <typename STR, typename PATTERN>
214 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t indexOfImpl(const STR* str, int32_t start, int32_t right, const PATTERN* pat, int32_t patlen)
215 {
216 PREVENT_SIGNED_CHAR_PTR(STR){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR> is_char; typedef ::MMgc
::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_216; }
217 PREVENT_SIGNED_CHAR_PTR(PATTERN){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, PATTERN> is_char; typedef
::MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_217; }
218 AvmAssert(patlen > 0)do { } while (0);
219
220 // even with REALLY_INLINE, some compilers will be reluctant to inline equalsImpl here,
221 // so we explicitly repeat the code here.
222 const PATTERN pat0 = pat[0];
223 const STR* const end = str + right;
224 for (const STR* probe = str + start; probe <= end; ++probe)
225 {
226 if (probe[0] == pat0)
227 {
228 for (int32_t j = 1; j < patlen; j++)
229 {
230 if (probe[j] != pat[j])
231 goto no_match;
232 }
233 return int32_t(uintptr_t(probe - str));
234 }
235 no_match:
236 // some compilers will complain about a label with no statement following,
237 // hence the continue
238 continue;
239 }
240
241 return -1;
242 }
243
244 // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions.
245 template <typename STR>
246 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t indexOfCharCodeImpl(const STR* str, int32_t start, int32_t right, wchar c)
247 {
248 PREVENT_SIGNED_CHAR_PTR(STR){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR> is_char; typedef ::MMgc
::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_248; }
249
250 const STR* p = str + start - 1;
251 const STR* const end = str + right;
252 while (++p <= end)
253 if (*p == c)
254 return int32_t(p - str);
255
256 return -1;
257 }
258
259 // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions.
260 template <typename STR, typename PATTERN>
261 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t lastIndexOfImpl(const STR* str, int32_t const start, const PATTERN* pat, int32_t patlen)
262 {
263 PREVENT_SIGNED_CHAR_PTR(STR){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR> is_char; typedef ::MMgc
::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_263; }
264 PREVENT_SIGNED_CHAR_PTR(PATTERN){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, PATTERN> is_char; typedef
::MMgc::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_264; }
265 AvmAssert(patlen > 0)do { } while (0);
266 AvmAssert(start >= 0)do { } while (0);
267
268 // even with REALLY_INLINE, some compilers will be reluctant to inline equalsImpl here,
269 // so we explicitly repeat the code here.
270 const PATTERN pat0 = pat[0];
271 for (const STR* probe = str + start; probe >= str; --probe)
272 {
273 if (probe[0] == pat0)
274 {
275 for (int32_t j = 1; j < patlen; j++)
276 {
277 if (pat[j] != probe[j])
278 goto no_match;
279 }
280 return int32_t(uintptr_t(probe - str));
281 }
282 no_match:
283 // some compilers will complain about a label with no statement following,
284 // hence the continue
285 continue;
286 }
287 return -1;
288 }
289
290 // apparently SunPro compiler doesn't like combining REALLY_INLINE with static functions.
291 template <typename STR>
292 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t hashCodeImpl(const STR* str, int32_t len)
293 {
294 PREVENT_SIGNED_CHAR_PTR(STR){ typedef MMgc::is_same<char, uint8_t> is_char_unsigned
; typedef MMgc::is_same<char, STR> is_char; typedef ::MMgc
::static_assert_MMgc<sizeof (::MMgc::STATIC_ASSERTION_FAILED
<(bool)(!(is_char::value & !is_char_unsigned::value))>
)> MMgc_static_assert_line_294; }
295
296 // must be same signed-ness as other hashcode functions.
297 // experimentation shows better results from signed (vs unsigned).
298 int32_t hashCode = 0;
299 while (len--)
300 hashCode = (hashCode >> 28) ^ (hashCode << 4) ^ *str++;
301 return hashCode;
302 }
303
304 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t hashCodeAddChar(int32_t hashCode, char val)
305 {
306 // must be same signed-ness as other hashcode functions.
307 // (match sign propagation in right-shift >> 28, for better or worse)
308 return ((hashCode >> 28) ^ (hashCode << 4) ^ val);
309 }
310
311 /*static*/ REALLY_INLINEinline __attribute__((always_inline)) int32_t stringLengthUInt(uint32_t value)
312 {
313 if (value < 10) return 1;
314 else if (value < 100) return 2;
315 else if (value < 1000) return 3;
316 else if (value < 10000) return 4;
317 else if (value < 100000) return 5;
318 else if (value < 1000000) return 6;
319 else if (value < 10000000) return 7;
320 else if (value < 100000000) return 8;
321 else if (value < 1000000000) return 9;
322 else return 10;
323 }
324
325////////////////////////////// Constructors ////////////////////////////////
326
327 // ctor for a static string.
328
329 REALLY_INLINEinline __attribute__((always_inline)) String::String(const void* buffer, Width w, int32_t length, boolbool is7bit) :
330#ifdef DEBUGGER
331 AvmPlusScriptableObject(sotString()),
332#endif
333 m_buffer(buffer), // static data - no WB() needed
334 m_extra(NULL__null),
335 m_length(length),
336 m_bitsAndFlags(w | (kStatic << TSTR_TYPE_SHIFT) | (uint32_t(is7bit) << TSTR_7BIT_SHIFT))
337 {
338 AvmAssert(m_length >= 0)do { } while (0);
339 AvmAssert((uint64_t(m_length) << getWidth()) <= 0x7FFFFFFFU)do { } while (0);
340 }
341
342 // ctor for a dynamic string.
343
344 REALLY_INLINEinline __attribute__((always_inline)) String::String(MMgc::GC* gc, void* buffer, Width w, int32_t length, int32_t charsLeft, boolbool is7bit) :
345#ifdef DEBUGGER
346 AvmPlusScriptableObject(sotString()),
347 m_buffer((void*)NULL__null),
348#else
349 m_buffer(buffer),
350#endif
351 m_extra(NULL__null),
352 m_length(length),
353 m_bitsAndFlags(w | (kDynamic << TSTR_TYPE_SHIFT) | (charsLeft << TSTR_CHARSLEFT_SHIFT) | (uint32_t(is7bit) << TSTR_7BIT_SHIFT))
354 {
355 AvmAssert(m_length >= 0)do { } while (0);
356 AvmAssert((uint64_t(m_length) << getWidth()) <= 0x7FFFFFFFU)do { } while (0);
357#ifdef DEBUGGER
358 /*
359 an explicit WB is only necessary if there's the possibility of an allocation between the time
360 the container (String) is allocated and time we set the field. In nondebugger builds, there is no
361 such possibility, but in DEBUGGER builds, the superclass ctor (AvmPlusScriptableObject) can
362 cause an allocation if sampling is enabled. ("Icky and brittle", but that's how it is.)
363 */
364 WB(gc, this, &this->m_buffer.pv, buffer)gc->privateWriteBarrier(this, &this->m_buffer.pv, (
const void *) (buffer))
;
365#else
366 (void)gc;
367#endif
368 }
369
370 // ctor for a dependent string.
371
372 REALLY_INLINEinline __attribute__((always_inline)) String::String(MMgc::GC* gc, Stringp master, int32_t start, int32_t length) :
373#ifdef DEBUGGER
374 AvmPlusScriptableObject(sotString()),
375#endif
376 m_buffer(uintptr_t(start << master->getWidth())),
377#ifdef DEBUGGER
378 m_extra(NULL__null),
379#else
380 m_extra(master),
381#endif
382 m_length(length),
383 // note that we propagate TSTR_7BIT_FLAG: if the entire master is TSTR_7BIT_FLAG, then so is the dependent string.
384 // @todo: a dependent string could qualify for TSTR_7BIT_FLAG even if it's master is not. worth checking for?
385 m_bitsAndFlags((master->m_bitsAndFlags & (TSTR_WIDTH_MASK | TSTR_7BIT_FLAG)) | (kDependent << TSTR_TYPE_SHIFT))
386 {
387 AvmAssert(m_length >= 0)do { } while (0);
388 AvmAssert((uint64_t(m_length) << getWidth()) <= 0x7FFFFFFFU)do { } while (0);
389#ifdef DEBUGGER
390 WBRC(gc, this, &this->m_extra.master, master)gc->privateWriteBarrierRC(this, &this->m_extra.master
, (const void *) (master))
;
391#else
392 /*
393 an explicit WBRC is only necessary if there's the possibility of an allocation between the time
394 the container (String) is allocated and time we set the field. In nondebugger builds, there is no
395 such possibility, but in DEBUGGER builds, the superclass ctor (AvmPlusScriptableObject) can
396 cause an allocation if sampling is enabled. ("Icky and brittle", but that's how it is.)
397 */
398 AvmAssert(master != NULL)do { } while (0);
399 master->IncrementRef();
400 (void)gc;
401#endif
402 }
403
404 // add a and b and check for overflow
405
406 static int32_t int32AddChecked(int32_t a, int32_t b)
407 {
408 if ((a | b) >= 0) // both nonnegative?
409 {
410 uint64_t x = uint64_t(a) + uint64_t(b);
411 if (x <= 0x7FFFFFFFU)
412 return int32_t(x);
413 }
414 GCHeap::SignalObjectTooLarge();
415 /*NOTREACHED*/
416 return 0;
417 }
418
419 // shift a left by b and check for overflow
420
421 static int32_t int32ShlChecked(int32_t a, int32_t b)
422 {
423 AvmAssert(b < 32)do { } while (0); // ok for this to be DEBUG-only, the shift amounts are known constants for practical purposes
424 if ((a | b) >= 0) // both nonnegative?
425 {
426 uint64_t x = uint64_t(a) << uint64_t(b);
427 if (x <= 0x7FFFFFFFU)
428 return int32_t(x);
429 }
430 GCHeap::SignalObjectTooLarge();
431 /*NOTREACHED*/
432 return 0;
433 }
434
435 static uint32_t uint32ShlChecked(uint32_t a, uint32_t b)
436 {
437 AvmAssert(b < 32)do { } while (0); // ok for this to be DEBUG-only, the shift amounts are known constants for practical purposes
438 uint64_t x = uint64_t(a) << uint64_t(b);
439 if (x <= 0xFFFFFFFFU)
440 return uint32_t(x);
441 GCHeap::SignalObjectTooLarge();
442 /*NOTREACHED*/
443 return 0;
444 }
445
446 // Private static method to create a dependent string
447
448 // in nondebug builds this is a trivial wrapper around the ctor (which is itself trivial), so REALLY_INLINE it
449 REALLY_INLINEinline __attribute__((always_inline)) /*static*/ Stringp String::createDependent(GC* gc, Stringp master, int32_t start, int32_t len)
450 {
451 AvmAssert(len >= 0)do { } while (0);
452 // master cannot be a dependent string
453 AvmAssert(!master->isDependent())do { } while (0);
454 MMGC_MEM_TAG( "Strings" );
455 Stringp s = new(gc, MMgc::kExact) String(gc, master, start, len);
456 VERIFY_7BIT(s)do { } while (0);
457 return s;
458 }
459
460 // Private static method to create a dynamic string, given a buffer and its size in characters
461
462 /*static*/ Stringp String::createDynamic(GC* gc, const void* data, int32_t len, Width w, boolbool is7bit, int32_t extra)
463 {
464 AvmAssert(w != kAuto)do { } while (0);
465 AvmAssert(len >= 0)do { } while (0);
466
467 // only 8-bit strings should set the 7-bit flag
468 if (w != k8)
469 is7bit = falsefalse;
470
471 // a zero-length dynamic string is legal, but a zero-length GC allocation is not.
472 int32_t alloc = int32AddChecked(len, extra);
473
474 MMGC_MEM_TAG( "Strings" );
475
476 // First, use PleaseAlloc(), and if the call fails, reduce the amount of extra data
477 // to TSTR_MAX_EXTRA_BYTES_IN_LOW_MEMORY and do an Alloc(), which may fail.
478 void* buffer = gc->PleaseAlloc(int32ShlChecked(alloc, w), 0);
479 if (buffer == NULL__null)
480 {
481 if (extra > (TSTR_MAX_LOMEM_EXTRABYTES16384 >> w))
482 extra = TSTR_MAX_LOMEM_EXTRABYTES16384 >> w;
483 alloc = len + extra; // This is safe because the new value of 'extra' is smaller than the old, and the old was checked
484 buffer = gc->Alloc(alloc << w, 0); // Ditto
485 }
486
487 int32_t bufLen = (int32_t) (GC::Size(buffer) >> w); // Note bufLen may be larger than (alloc << w)
488 int32_t charsLeft = bufLen - len;
489 // the extra character must not exceed the available field size.
490 //
491 // ok for this to be an assertion because we will not have problems with mmgc rounding up
492 // to a 4KB boundary; the max chars left is about 4KB.
493 AvmAssert(charsLeft <= int32_t((uint32_t) TSTR_CHARSLEFT_MASK >> TSTR_CHARSLEFT_SHIFT))do { } while (0);
494
495 Stringp s = new(gc, MMgc::kExact) String(gc, buffer, w, len, charsLeft, is7bit);
496
497 if (data != NULL__null && len != 0)
498 VMPI_memcpy::memcpy(buffer, data, size_t(len << w)); // This is safe because alloc >= len and buffer size is alloc << w and that has been checked.
499#ifdef _DEBUG
500 // Terminate string with 0 for better debugging display
501 if (charsLeft)
502 {
503 if (w == k8)
504 s->m_buffer.p8[len] = 0;
505 else
506 s->m_buffer.p16[len] = 0;
507 }
508 if (data != NULL__null)
509 {
510 // don't try to verify if data==null, buffer will be random! (caller will verify)
511 VERIFY_7BIT(s)do { } while (0);
512 }
513#endif
514 return s;
515 }
516
517 // Private static method to create a string, given a static buffer and its size in characters
518
519 /*static*/ Stringp String::createStatic(GC* gc, const void* data, int32_t len, Width w, boolbool is7bit)
520 {
521 AvmAssert(w != kAuto)do { } while (0);
522 AvmAssert(len >= 0)do { } while (0);
523 MMGC_MEM_TAG( "Strings" );
524 // only 8-bit strings should set the 7-bit flag
525 if (w != k8)
526 is7bit = falsefalse;
527 Stringp s = new (gc, MMgc::kExact) String(data, w, len, is7bit);
528 VERIFY_7BIT(s)do { } while (0);
529 return s;
530 }
531
532 // Create a string out of an 8bit buffer. Characters are just widened and copied, not interpreted as UTF8.
533
534 Stringp String::createLatin1(AvmCore* core, const char* buffer, int32_t len, Width desiredWidth, boolbool staticBuf)
535 {
536 if (buffer == NULL__null)
537 {
538 len = 0;
539 buffer = &k_zero.c8;
540 staticBuf = truetrue;
541 }
542 if (len < 0)
543 len = Length(buffer);
544
545 if (desiredWidth == kAuto)
546 desiredWidth = k8;
547
548 if (desiredWidth == k8 && core->kEmptyString != NULL__null)
549 {
550 // core has been initialized, check for cached characters
551 if (len == 0)
552 return core->kEmptyString;
553
554 if (len == 1 && *((uint8_t*) buffer) < 128)
555 return core->cachedChars[*((uint8_t*) buffer)];
556 }
557
558 const boolbool is7bit = falsefalse; // actually, might be, we just haven't checked yet.
559
560 Stringp s = NULL__null;
561 GC* gc = core->GetGC();
562 if (staticBuf && desiredWidth == k8)
563 {
564 s = createStatic(gc, buffer, len, k8, is7bit);
565 }
566 else
567 {
568 s = createDynamic(gc, NULL__null, len, desiredWidth, is7bit);
569 _copyBuffers(buffer, s->m_buffer.p8, len, k8, desiredWidth);
570 VERIFY_7BIT(s)do { } while (0);
571 }
572 return s;
573 }
574
575 Stringp String::getFixedWidthString(Width w) const
576 {
577 if (w == getWidth())
578 return (Stringp) this;
579
580 if (w == kAuto)
581 return NULL__null;
582
583 const boolbool is7bit = falsefalse;
584 Stringp newStr = createDynamic(_gc(this), NULL__null, m_length, w, is7bit);
585
586 Pointers ptrs(this);
587 Pointers new_ptrs(newStr);
588 _copyBuffers(ptrs.pv, new_ptrs.pv, m_length, getWidth(), w);
589
590 VERIFY_7BIT(newStr)do { } while (0);
591 return newStr;
592 }
593
594/////////////////////////////// Destructors & tracers ////////////////////////////////
595
596 String::~String()
597 {
598 switch (getType())
599 {
600 case kDynamic: {
601 // Never necessary to WB() when we store NULL over a non-RC pointer
602 void* pv = m_buffer.pv;
603 m_buffer.pv = NULL__null; // Avoid dangling the pointer
604 _gc(this)->FreeNotNull(pv);
605 break;
606 }
607 case kDependent:
608 // WBRC() is however necessary when we store NULL over an RC pointer
609 WBRC_NULL(&m_extra.master)MMgc::GC::WriteBarrierRC_dtor(&m_extra.master);
610 break;
611 default: ; // kStatic
612 }
613 m_extra.master = NULL__null; // might have already been cleared above, but that's ok
614 m_buffer.p8 = NULL__null; // might have already been cleared above, but that's ok
615 m_length = 0;
616 m_bitsAndFlags = 0;
617 }
618
619 /*virtual*/
620 boolbool String::gcTrace(GC* gc, size_t cursor)
621 {
622 (void)cursor;
623 switch (getType())
624 {
625 case kDynamic:
626 gc->TraceLocation(&m_buffer.pv);
627 break;
628 case kDependent:
629 gc->TraceLocation(&m_extra.master);
630 break;
631 }
632 return falsefalse;
633 }
634
635 /////////////////////////////// Conversions ////////////////////////////////
636
637 void String::makeDynamic(const uint8_t* dataStart, uint32_t dataSize)
638 {
639 AvmAssert(dataStart != NULL)do { } while (0);
640 AvmAssert(dataSize > 0)do { } while (0);
641 switch(getType())
642 {
643 case kStatic:
644 // do not convert if range is given and data falls within
645 if ((uint32_t) (m_buffer.p8 - dataStart) >= dataSize)
646 break;
647 // else fall thru
648 case kDependent:
649 convertToDynamic();
650 break;
651 default:;
652 }
653 }
654
655 void String::fixDependentString()
656 {
657 if (!isDependent())
658 return;
659
660 // If the length of this instance is > the master length, this
661 // string is the result of an auto-concat; we keep the master in
662 // in that case - there is little to no memory gain to be expected.
663 // Also, forget about conversion if the memory to be gained is minimal.
664 if (m_length >= (m_extra.master->m_length - (TSTR_DEPENDENT_STRING_NUISANCE_SAVINGSint32_t(sizeof(String)) >> getWidth())))
665 return;
666
667 // Do not convert to a static string here, because a static string
668 // may be converted to a dynamic string, which would leave leave an interior
669 // pointer to the static content in the string instance.
670 convertToDynamic();
671 }
672
673 void String::convertToDynamic()
674 {
675 AvmAssert(getType() != kDynamic)do { } while (0);
676 // Convert this string to be a dynamic string
677 int32_t bytes = m_length << getWidth(); // No overflow by definition
678 GC* gc = _gc(this);
679 MMGC_MEM_TYPE( this );
680 void* buf = gc->Alloc(bytes, 0);
681 VMPI_memcpy::memcpy(buf, Pointers(this).pv, bytes);
682 WB(gc, this, &this->m_buffer.pv, buf)gc->privateWriteBarrier(this, &this->m_buffer.pv, (
const void *) (buf))
;
683 if (isDependent())
684 WBRC_NULL(&m_extra.master)MMgc::GC::WriteBarrierRC_dtor(&m_extra.master);
685 setType(kDynamic);
686 }
687
688/////////////////////////////// Comparison /////////////////////////////////
689
690 int32_t String::Compare(String& other, int32_t other_start, int32_t other_length) const
691 {
692 if (this == &other)
693 return 0;
694
695 if (other_start >= other.m_length)
696 return -1;
697
698 if (other_length < 1 || other_length > other.m_length)
699 other_length = other.m_length;
700 if (other_start < 0)
701 other_start = 0;
702
703 int32_t result = 0;
704
705 int32_t len = (m_length < other_length) ? m_length : other_length; // choose smaller of two
706 if (len > 0)
707 {
708 Pointers ptrs(this);
709 Pointers other_ptrs(&other);
710 Width const w1 = this->getWidth();
711 Width const w2 = other.getWidth();
712 switch ((w1 << 1) + w2)
713 {
714 case (k8 << 1) + k8:
715 result = compareImpl(ptrs.p8, other_ptrs.p8 + other_start, len);
716 break;
717
718 case (k8 << 1) + k16:
719 result = compareImpl(ptrs.p8, other_ptrs.p16 + other_start, len);
720 break;
721
722 case (k16 << 1) + k8:
723 result = compareImpl(ptrs.p16, other_ptrs.p8 + other_start, len);
724 break;
725
726 case (k16 << 1) + k16:
727 result = compareImpl(ptrs.p16, other_ptrs.p16 + other_start, len);
728 break;
729 }
730 }
731
732 if (result == 0)
733 {
734 // catch substring compares
735 result = (other_length - m_length);
736 if (result < 0)
737 result = -1;
738 else if (result > 0)
739 result = 1;
740 }
741 return result;
742 }
743
744 boolbool String::equalsLatin1(const char* p, int32_t len) const
745 {
746 if (len < 0)
747 len = Length(p);
748
749 if (len != length())
750 return falsefalse;
751
752 Pointers ptrs(this);
753 if (getWidth() == k8)
754 {
755 return equalsImpl(ptrs.p8, (const uint8_t*)p, len);
756 }
757 else
758 {
759 return equalsImpl(ptrs.p16, (const uint8_t*)p, len);
760 }
761 }
762
763 boolbool String::equalsUTF16(const wchar* p, int32_t len) const
764 {
765 if (len != length())
766 return falsefalse;
767
768 Pointers ptrs(this);
769 if (getWidth() == k8)
770 {
771 return equalsImpl(ptrs.p8, p, len);
772 }
773 else
774 {
775 return equalsImpl(ptrs.p16, p, len);
776 }
777 }
778
779 boolbool String::equals(Stringp that) const
780 {
781 if (this == that)
782 return truetrue;
783
784 int32_t const len1 = this->length();
785 int32_t const len2 = that->length();
786 if (len1 != len2)
787 return falsefalse;
788
789 Width const w1 = this->getWidth();
790 Width const w2 = that->getWidth();
791
792 Pointers thisbuf(this);
793 Pointers thatbuf(that);
794
795 switch ((w1 << 1) + w2)
796 {
797 case (k8 << 1) + k8:
798 return equalsImpl(thisbuf.p8, thatbuf.p8, len1);
799
800 case (k8 << 1) + k16:
801 return equalsImpl(thisbuf.p8, thatbuf.p16, len1);
802
803 case (k16 << 1) + k8:
804 return equalsImpl(thisbuf.p16, thatbuf.p8, len1);
805
806 case (k16 << 1) + k16:
807 return equalsImpl(thisbuf.p16, thatbuf.p16, len1);
808 }
809 AvmAssert(0)do { } while (0);
810 return truetrue;
811 }
812
813#ifdef VMCFG_NANOJIT
814 /*static*/ int32_t String::equalsWithNullChecks(Stringp s1, Stringp s2)
815 {
816 if (s1 == s2)
817 return truetrue;
818
819 if (!s1 || !s2)
820 return falsefalse;
821
822 int32_t const len1 = s1->length();
823 int32_t const len2 = s2->length();
824 if (len1 != len2)
825 return falsefalse;
826
827 Width const w1 = s1->getWidth();
828 Width const w2 = s2->getWidth();
829
830 Pointers thisbuf(s1);
831 Pointers thatbuf(s2);
832
833 switch ((w1 << 1) + w2)
834 {
835 case (k8 << 1) + k8:
836 return equalsImpl(thisbuf.p8, thatbuf.p8, len1);
837
838 case (k8 << 1) + k16:
839 return equalsImpl(thisbuf.p8, thatbuf.p16, len1);
840
841 case (k16 << 1) + k8:
842 return equalsImpl(thisbuf.p16, thatbuf.p8, len1);
843
844 case (k16 << 1) + k16:
845 return equalsImpl(thisbuf.p16, thatbuf.p16, len1);
846 }
847 AvmAssert(0)do { } while (0);
848 return truetrue;
849 }
850#endif // VMCFG_NANOJIT
851
852/////////////////////////////// Hash Codes /////////////////////////////////
853
854 // The hashing algorithm uses the full character width
855
856 int32_t String::hashCodeLatin1(const char* buf, int32_t len)
857 {
858 return hashCodeImpl((const utf8_t*)buf, len);
859 }
860
861 int32_t String::hashCodeUTF16(const wchar* buf, int32_t len)
862 {
863 return hashCodeImpl(buf, len);
864 }
865
866 int32_t String::hashCodeUInt(uint32_t value)
867 {
868 // Below is largely partial-evaluated composition of
869 // MathUtils::convertIntegerToStringBuffer and
870 // String::hashCodeLatin1
871
872 int32_t radix = 10;
873
874 int32_t hashCode = 0;
875
876 int32_t len = stringLengthUInt(value);
877
878 if (value == 0)
879 {
880 hashCode = hashCodeAddChar(hashCode, '0');
881 }
882 else
883 {
884 uintptr_t uVal = (uintptr_t)value;
885 uintptr_t factor = 1;
886 uintptr_t cursor = len;
887 while (--cursor)
888 factor *= radix;
889 cursor = len;
890
891 while (cursor > 0)
892 {
893 uintptr_t j = uVal;
894 j /= factor;
895 uVal %= factor;
896 factor /= radix;
897 AvmAssert( j < 10 )do { } while (0);
898 hashCode = hashCodeAddChar(hashCode, (char)(j+'0'));
899 cursor--;
900 }
901 }
902
903 return hashCode;
904 }
905
906 int32_t String::hashCode() const
907 {
908 if (m_length != 0)
909 {
910 Pointers ptrs(this);
911 if (getWidth() == k8)
912 {
913 return hashCodeImpl(ptrs.p8, m_length);
914 }
915 else
916 {
917 return hashCodeImpl(ptrs.p16, m_length);
918 }
919 }
920 return 0;
921 }
922
923//////////////////////////////// Accessors /////////////////////////////////
924
925 wchar FASTCALL__attribute__((fastcall)) String::charAt(int32_t index) const
926 {
927 AvmAssert(index >= 0 && index < m_length)do { } while (0);
928
929 Pointers ptrs(this);
930 return (getWidth() == k8) ? ptrs.p8[index] : ptrs.p16[index];
931 }
932
933 int32_t String::indexOf(Stringp substr, int32_t start) const
934 {
935 if (substr == NULL__null)
936 return -1;
937
938 // bug 78346: argv[1] might be less than zero.
939 // We clamp it to zero for two reasons:
940 // 1. A movie created prior to this fix with a small negative value probably worked,
941 // so let's fix it without breaking them.
942 // 2. I am told this is what java does.
943 if (start < 0)
944 start = 0;
945
946 int32_t len = this->length();
947
948 if (start > len)
949 start = len;
950
951 int32_t sublen = substr->length();
952
953 if (sublen == 0)
954 return start;
955
956 // right is the last character in selfString subStr could be found at
957 // (and further, and there isn't enough of selfString remaining for a match to be possible)
958 const int32_t right = len - sublen;
959 if (right < 0)
960 return -1;
961
962 Width const w1 = getWidth();
963 Width const w2 = substr->getWidth();
964
965 Pointers selfBuf(this);
966 Pointers subBuf(substr);
967
968 // For maximum performance, use different cases for k8/k16 combinations
969 if (sublen == 1)
970 {
971 switch ((w1 << 1) + w2)
972 {
973 case (k8 << 1) + k8:
974 return indexOfCharCodeImpl(selfBuf.p8, start, right, subBuf.p8[0]);
975
976 case (k8 << 1) + k16:
977 return indexOfCharCodeImpl(selfBuf.p8, start, right, subBuf.p16[0]);
978
979 case (k16 << 1) + k8:
980 return indexOfCharCodeImpl(selfBuf.p16, start, right, subBuf.p8[0]);
981
982 case (k16 << 1) + k16:
983 return indexOfCharCodeImpl(selfBuf.p16, start, right, subBuf.p16[0]);
984 }
985 }
986 else
987 {
988 switch ((w1 << 1) + w2)
989 {
990 case (k8 << 1) + k8:
991 return indexOfImpl(selfBuf.p8, start, right, subBuf.p8, sublen);
992
993 case (k8 << 1) + k16:
994 return indexOfImpl(selfBuf.p8, start, right, subBuf.p16, sublen);
995
996 case (k16 << 1) + k8:
997 return indexOfImpl(selfBuf.p16, start, right, subBuf.p8, sublen);
998
999 case (k16 << 1) + k16:
1000 return indexOfImpl(selfBuf.p16, start, right, subBuf.p16, sublen);
1001 }
1002 }
1003 AvmAssert(0)do { } while (0);
1004 return -1;
1005 }
1006
1007 int32_t String::lastIndexOf(Stringp substr, int32_t start) const
1008 {
1009 if (substr == NULL__null)
1010 return -1;
1011
1012 // lastIndexOf("anything", negative-number) can't match anything, ever:
1013 // match FP10's behavior and return -1 immediately.
1014 if (start < 0)
1015 return -1;
1016
1017 int32_t len = this->length();
1018
1019 if (start > len)
1020 start = len;
1021
1022 int32_t sublen = substr->length();
1023
1024 if (sublen == 0)
1025 return start;
1026
1027 // Any match starting after right must necessarily fail, as there will be
1028 // insufficient characters remaining to match.
1029 const int32_t right = len - sublen;
1030
1031 // The substring is longer than selfString, and therefore no match is possible.
1032 // We must avoid setting start to a negative value below.
1033 if (right < 0)
1034 return -1;
1035
1036 // bug 78346: argv[1] might be greater than right
1037 // (similar reasons to above apply).
1038 if (start > right)
1039 start = right;
1040
1041 Width w1 = getWidth();
1042 Width w2 = substr->getWidth();
1043
1044 Pointers selfBuf(this);
1045 Pointers subBuf(substr);
1046
1047 // For maximum performance, use different cases for k8/k16 combinations
1048 switch ((w1 << 1) + w2)
1049 {
1050 case (k8 << 1) + k8:
1051 return lastIndexOfImpl(selfBuf.p8, start, subBuf.p8, sublen);
1052
1053 case (k8 << 1) + k16:
1054 return lastIndexOfImpl(selfBuf.p8, start, subBuf.p16, sublen);
1055
1056 case (k16 << 1) + k8:
1057 return lastIndexOfImpl(selfBuf.p16, start, subBuf.p8, sublen);
1058
1059 case (k16 << 1) + k16:
1060 return lastIndexOfImpl(selfBuf.p16, start, subBuf.p16, sublen);
1061 }
1062 AvmAssert(0)do { } while (0);
1063 return -1;
1064 }
1065
1066 int32_t String::indexOfLatin1(const char* p, int32_t sublen, int32_t start, int32_t end) const
1067 {
1068 if (start < 0)
1069 start = 0;
1070 if (end < 0)
1071 end = 0;
1072 if (end > m_length)
1073 end = m_length;
1074
1075 if (p == NULL__null || end <= start)
1076 return -1;
1077
1078 if (sublen < 0)
1079 sublen = Length(p);
1080
1081 if (sublen == 0)
1082 return start;
1083
1084 int32_t right = end - sublen;
1085 if (right < 0)
1086 return -1;
1087
1088 Width w = getWidth();
1089
1090 Pointers selfBuf(this);
1091 if (w == k8)
1092 {
1093 return indexOfImpl(selfBuf.p8, start, right, (const uint8_t*)p, sublen);
1094 }
1095 else
1096 {
1097 return indexOfImpl(selfBuf.p16, start, right, (const uint8_t*)p, sublen);
1098 }
1099 }
1100
1101 int32_t String::indexOfCharCode(wchar c, int32_t start, int32_t end) const
1102 {
1103 if (start < 0)
1104 start = 0;
1105 if (end < 0)
1106 end = 0;
1107 if (end > m_length)
1108 end = m_length;
1109
1110 if (end <= start)
1111 return -1;
1112
1113 int32_t right = end - 1;
1114 if (right < 0)
1115 return -1;
1116
1117 Pointers ptrs(this);
1118 if (getWidth() == String::k8)
1119 {
1120 return indexOfCharCodeImpl(ptrs.p8, start, right, c);
1121 }
1122 else
1123 {
1124 return indexOfCharCodeImpl(ptrs.p16, start, right, c);
1125 }
1126 }
1127
1128 boolbool String::matchesLatin1(const char* p, int32_t len, int32_t pos)
1129 {
1130 if (p == NULL__null || pos >= m_length)
1131 return falsefalse;
1132
1133 if (pos < 0)
1134 pos = 0;
1135 if (len < 0)
1136 len = Length(p);
1137
1138 Pointers ptrs(this);
1139 if (getWidth() == k8)
1140 {
1141 return equalsImpl(ptrs.p8 + pos, (const uint8_t*)p, len);
1142 }
1143 else
1144 {
1145 return equalsImpl(ptrs.p16 + pos, (const uint8_t*)p, len);
1146 }
1147 }
1148
1149 boolbool String::matchesLatin1_caseless(const char* p, int32_t len, int32_t pos)
1150 {
1151 if (p == NULL__null || pos >= m_length)
1152 return falsefalse;
1153
1154 if (pos < 0)
1155 pos = 0;
1156 if (len < 0)
1157 len = Length(p);
1158
1159 StringIndexer self(this);
1160 while (len--)
1161 {
1162 wchar const ch1 = wCharToUpper(self[pos++]);
1163 wchar const ch2 = wCharToUpper(uint8_t(*p++));
1164 if (ch1 != ch2)
1165 return falsefalse;
1166 }
1167 return truetrue;
1168 }
1169
1170///////////////////////////// Concatenation ////////////////////////////////
1171
1172 /*static*/ Stringp String::concatStrings(Stringp leftStr, Stringp rightStr)
1173 {
1174 if (leftStr == NULL__null || leftStr->m_length == 0)
1175 return rightStr;
1176
1177 return leftStr->_append(rightStr, Pointers(rightStr), rightStr->length(), rightStr->getWidth());
1178 }
1179
1180 Stringp String::append(Stringp rightStr)
1181 {
1182 if (rightStr == NULL__null || rightStr->m_length == 0)
1183 return this;
1184
1185 return _append(rightStr, Pointers(rightStr), rightStr->length(), rightStr->getWidth());
1186 }
1187
1188 // NB: if rightStrPtr is nonnull, it is assumed to be the source of rightStr.
1189 // In this case, rightStr is assumed to be invalidate by any possible GC activity,
1190 // and will be re-created from rightStrPtr.
1191 Stringp String::_append(Stringp rightStrPtr, const Pointers& rightStr, int32_t numChars, Width charWidth)
1192 {
1193 if (numChars <= 0)
1194 return this;
1195
1196 AvmAssert(charWidth != kAuto)do { } while (0);
1197
1198 MMgc::GC* gc = _gc(this);
1199 // fromCharCode() optimization: If this string is empty, and the buffer is a single
1200 // ASCII character, return the cached character
1201 if (m_length == 0 && numChars == 1)
1202 {
1203 // Sun studio generated wrong code for the following Conditional operator.
1204 // Add type conversion to wchar as a workaround.
1205 wchar ch = (charWidth == k8) ? (wchar)rightStr.p8[0] : (wchar)rightStr.p16[0];
1206 if (ch < 128)
1207 return gc->core()->cachedChars[ch];
1208 }
1209
1210 Width thisWidth = getWidth();
1211 Width newWidth = (thisWidth < charWidth) ? charWidth : thisWidth;
1212
1213 int32_t newLen = int32AddChecked(m_length, numChars);
1214
1215 Stringp master = (isDependent()) ? m_extra.master : this;
1216 // check for characters left in leftStr's buffer, or if leftStr has spent its padding already
1217 // string types other than kDynamic have charsLeft == 0
1218 int32_t charsLeft = 0;
1219 int32_t charsUsed = 0;
1220 if (thisWidth >= charWidth)
1221 {
1222 // in-place append only if rightStr's width fits into leftStr
1223 charsLeft = master->getCharsLeft();
1224 if (!master->isStatic())
1225 // charsUsed is the number of chars already spent behind m_length
1226 // Usage of m_buffer is OK - master can never be dependent
1227 charsUsed = int32_t((GC::Size(master->m_buffer.pv) >> thisWidth) - master->m_length - charsLeft);
1228 }
1229 int32_t start = 0; // string start for dependent strings
1230
1231 // it is possible to append in-place if
1232 // 1) this is a kDynamic string and charsUsed == 0
1233 // 2) this is a kDependent string and the end matches the master's real end
1234 switch (getType())
1235 {
1236 case kDynamic:
1237 if (charsUsed != 0)
1238 // someone else has already appended in-place
1239 charsLeft = 0;
1240 break;
1241 case kDependent:
1242 start = (int32_t) m_buffer.offset_bytes >> thisWidth;
1243 if ((start + m_length) != master->m_length + charsUsed)
1244 charsLeft = 0;
1245 break;
1246 default:; // kStatic
1247 }
1248
1249 // the big check: are there enough chars left?
1250 if (numChars <= charsLeft)
1251 {
1252 // the right-hand string fits into the buffer end
1253 _copyBuffers(rightStr.pv,
1254 Pointers(this).p8 + (m_length << thisWidth), // m_length << thiswidth is safe by definition
1255 numChars, charWidth, newWidth);
1256
1257 charsUsed += numChars;
Value stored to 'charsUsed' is never read
1258 charsLeft -= numChars;
1259 master->setCharsLeft(charsLeft);
1260#ifdef _DEBUG
1261 // Terminate string with 0 for better debugging display
1262 if (charsLeft && !master->isStatic())
1263 {
1264 int32_t end = master->m_length + charsUsed;
1265 if (newWidth == k8)
1266 master->m_buffer.p8[end] = 0;
1267 else
1268 master->m_buffer.p16[end] = 0;
1269 }
1270#endif
1271 Stringp s = createDependent(gc, master, start, newLen);
1272 // createDependent propagates the 7-bit flag, which is a good idea for substrings.
1273 // however, if we are adding data, it may no longer qualify. rather than checking
1274 // the appended data, just clear the flag (it will be lazily recalculated as needed)
1275 s->m_bitsAndFlags &= ~TSTR_7BIT_FLAG;
1276 return s;
1277 }
1278
1279 // fall thru - string does not fit
1280 // create a new kDynamic string containing the concatenated string
1281 // See the definition of TSTR_MAX_CHARSLEFT above for an explanation
1282 // of this algorithm
1283 int32_t newSize = (newLen < TSTR_MIN_DYNAMIC_ALLOCATION32) ? TSTR_MIN_DYNAMIC_ALLOCATION32 : int32ShlChecked(newLen, 1);
1284 int32_t extra = newSize - newLen;
1285 if (extra > (int32_t) TSTR_MAX_CHARSLEFT((uint32_t)TSTR_CHARSLEFT_MASK >> TSTR_CHARSLEFT_SHIFT >>
1)
)
1286 extra = (int32_t) TSTR_MAX_CHARSLEFT((uint32_t)TSTR_CHARSLEFT_MASK >> TSTR_CHARSLEFT_SHIFT >>
1)
;
1287
1288 const boolbool is7bit = falsefalse; // actually, might be, we just haven't checked yet.
1289 Stringp newStr = createDynamic(gc, NULL__null, newLen, newWidth, is7bit, extra);
1290
1291 // note that createDynamic has invalidated any existing Pointers structs...
1292 const void* srcLeft = Pointers(this).pv;
1293 const void* srcRight = rightStrPtr ? Pointers(rightStrPtr).pv : rightStr.pv;
1294
1295 // copy leftStr
1296 void* ptr = _copyBuffers(srcLeft,
1297 newStr->m_buffer.pv,
1298 m_length,
1299 thisWidth, newWidth);
1300
1301 // append src
1302 _copyBuffers(srcRight,
1303 ptr,
1304 numChars,
1305 charWidth, newWidth);
1306
1307#ifdef _DEBUG
1308 // Terminate string with 0 for better debugging display
1309 if (newStr->getCharsLeft())
1310 {
1311 if (newWidth == k8)
1312 newStr->m_buffer.p8[newStr->m_length] = 0;
1313 else
1314 newStr->m_buffer.p16[newStr->m_length] = 0;
1315 }
1316#endif
1317 VERIFY_7BIT(newStr)do { } while (0);
1318 return newStr;
1319 }
1320
1321/////////////////////////////// substrings /////////////////////////////////
1322
1323 // Remember: string runs from start up to, but not including, the end!
1324 Stringp String::substring(int32_t start, int32_t end)
1325 {
1326 if (start < 0)
1327 start = 0;
1328 else if (start > m_length)
1329 start = m_length;
1330 if (end < 0)
1331 end = 0;
1332 else if (end > m_length)
1333 end = m_length;
1334
1335 // does the substring span this string?
1336 if (start == 0 && end == m_length)
1337 return this;
1338
1339 MMgc::GC* gc = _gc(this);
1340 AvmCore* core = gc->core();
1341 // for empty strings, return the standard Empty String instance
1342 if (end <= start)
1343 return core->kEmptyString;
1344
1345 // for single characters < 128, return the cached string
1346 if (end == (start + 1))
1347 {
1348 wchar const ch = charAt(start);
1349 if (ch < 128)
1350 return core->cachedChars[ch];
1351 }
1352
1353 // otherwise, create a dependent string
1354 Stringp master = this;
1355 if (isDependent())
1356 {
1357 // get the string offset
1358 master = m_extra.master;
1359 // TODO: possible 32-bit overflow for a very huge dependent string
1360 int32_t offset = (int32_t) m_buffer.offset_bytes >> getWidth();
1361 start += (int32_t) offset;
1362 end += (int32_t) offset;
1363 }
1364 AvmAssert(!master->isDependent())do { } while (0);
1365 // you may be tempted to optimize for static strings, by creating
1366 // a new static string pointing to the substring... do not do this.
1367 // it will break static strings that point directly into ABC buffers.
1368 return createDependent(gc, master, start, end - start);
1369 }
1370
1371 Stringp String::substr(int32_t start, int32_t len)
1372 {
1373 start = (int32_t) NativeObjectHelpers::ClampIndexInt(start, m_length);
1374 len = (int32_t) NativeObjectHelpers::ClampIndexInt(len, m_length);
1375 int32_t end = (m_length-len<=start)? m_length:start+len; // no need to further clamp
1376 return substring(start, end);
1377 }
1378
1379 Stringp String::slice(int32_t start, int32_t end)
1380 {
1381 start = (int32_t) NativeObjectHelpers::ClampIndexInt(start, m_length);
1382 end = (int32_t) NativeObjectHelpers::ClampIndexInt(end, m_length);
1383 if (end < start)
1384 end = start;
1385
1386 return substr(start, end-start);
1387 }
1388
1389////////////////////////////////// Parsing ////////////////////////////////////
1390
1391 // This routine is a very specific parser to generate a positive integer from a string.
1392 // The following are supported:
1393 // "0" - one single digit for zero - NOT "00" or any other form of zero
1394 //[1-9]+[0-9]* up to 2^32-2 (4294967294)
1395 // 2^32-1 (4294967295) is not supported (see ECMA quote below).
1396 // The ECMA that we're supporting with this routine is...
1397 // cn: the ES3 test for a valid array index is
1398 // "A property name P (in the form of a string value) is an array index if and
1399 // only if ToString(ToUint32(P)) is equal to P and ToUint32(P) is not equal to 2^32-1."
1400
1401 // Don't support 000000 as 0.
1402 // We don't support 0x1234 as 1234 in hex since string(1234) doesn't equal '0x1234')
1403 // No leading zeros are supported
1404
1405 boolbool String::parseIndex(uint32_t& result) const
1406 {
1407 // avoid parsing if this string is definitely not a candidate
1408 // A string that is more than 10 digits (and does not start with 0)
1409 // will always be greater than 2^32-1 (4294967295) or not a numeric string
1410 if (m_bitsAndFlags & TSTR_NOUINT_FLAG)
1411 return falsefalse;
1412
1413 if (m_bitsAndFlags & (TSTR_UINT28_FLAG | TSTR_UINT32_FLAG))
1414 {
1415 result = m_extra.index;
1416 return truetrue;
1417 }
1418
1419 int64_t n = 0;
1420 wchar wch;
1421 Pointers ptrs(this);
1422 Width w = getWidth();
1423 if (m_length == 0 || m_length > 10)
1424 goto bad;
1425
1426 // collect the value
1427 for (int32_t i = 0; i < m_length; i++)
1428 {
1429 if (w == k8)
1430 wch = wchar(*ptrs.p8++);
1431 else
1432 wch = *ptrs.p16++;
1433 // bad character, or leading zero?
1434 if (wch < '0' || wch > '9' || (i == 1 && n == 0))
1435 goto bad;
1436 n = (n * 10) + wch - '0';
1437 }
1438 // out of range?
1439 if (n >= 0xFFFFFFFF)
1440 goto bad;
1441 if (n & ScriptObject::MAX_INTEGER_MASK)
1442 m_bitsAndFlags |= TSTR_NOINT_FLAG;
1443 if (!isDependent())
1444 {
1445 m_bitsAndFlags |= TSTR_UINT32_FLAG;
1446 m_extra.index = uint32_t(n);
1447 }
1448 result = uint32_t(n);
1449 return truetrue;
1450 bad:
1451 m_bitsAndFlags |= TSTR_NOINT_FLAG | TSTR_NOUINT_FLAG;
1452 return falsefalse;
1453 }
1454
1455 Atom String::getIntAtom() const
1456 {
1457 // The call uses a simplified version of parseIndex with 32-bit arithmetic,
1458 // since we can only fit 28 bits.
1459 if (m_bitsAndFlags & TSTR_NOINT_FLAG)
1460 return 0;
1461
1462 if (m_bitsAndFlags & TSTR_UINT28_FLAG)
1463 return Atom((m_extra.index << 3) | AtomConstants::kIntptrType);
1464
1465 int32_t n = 0;
1466 wchar wch;
1467 Pointers ptrs(this);
1468 Width w = getWidth();
1469 if (m_length == 0 || m_length > 10)
1470 goto bad;
1471
1472 // collect the value
1473 for (int32_t i = 0; i < m_length; i++)
1474 {
1475 if (w == k8)
1476 wch = wchar(*ptrs.p8++);
1477 else
1478 wch = *ptrs.p16++;
1479 // bad character, or leading zero?
1480 if (wch < '0' || wch > '9' || (i == 1 && n == 0))
1481 goto bad;
1482 n = (n * 10) + wch - '0';
1483 if (n & ScriptObject::MAX_INTEGER_MASK)
1484 goto bad;
1485 }
1486 if (!isDependent())
1487 {
1488 m_bitsAndFlags |= TSTR_UINT28_FLAG;
1489 m_extra.index = n;
1490 }
1491 return Atom((n << 3) | AtomConstants::kIntptrType);
1492 bad:
1493 m_bitsAndFlags |= TSTR_NOINT_FLAG;
1494 return 0;
1495 }
1496
1497/////////////////////////////// Case conversion /////////////////////////////////
1498
1499
1500 static const wchar lowerCaseBase[] =
1501 {
1502 /*0x0061, 0x0062, 0x0063, 0x0064, 0x0065, 0x0066, 0x0067, 0x0068, 0x0069, 0x006A,
1503 0x006B, 0x006C, 0x006D, 0x006E, 0x006F, 0x0070, 0x0071, 0x0072, 0x0073, 0x0074,
1504 0x0075, 0x0076, 0x0077, 0x0078, 0x0079, 0x007A,*/ 0x00B5, /*0x00E0, 0x00E1, 0x00E2,
1505 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7, 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC,
1506 0x00ED, 0x00EE, 0x00EF, 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6,
1507 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE,*/ 0x00FF, /*0x0101, 0x0103,
1508 0x0105, 0x0107, 0x0109, 0x010B, 0x010D, 0x010F, 0x0111, 0x0113, 0x0115, 0x0117,
1509 0x0119, 0x011B, 0x011D, 0x011F, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012B,
1510 0x012D, 0x012F,*/ 0x0131, 0x0133, 0x0135, 0x0137, /*0x013A, 0x013C, 0x013E, 0x0140,
1511 0x0142, 0x0144, 0x0146, 0x0148,*/ /*0x014B, 0x014D, 0x014F, 0x0151, 0x0153, 0x0155,
1512 0x0157, 0x0159, 0x015B, 0x015D, 0x015F, 0x0161, 0x0163, 0x0165, 0x0167, 0x0169,
1513 0x016B, 0x016D, 0x016F, 0x0171, 0x0173, 0x0175, 0x0177,*/ 0x017A, 0x017C, 0x017E,
1514 0x017F, 0x0183, 0x0185, 0x0188, 0x018C, 0x0192, 0x0195, 0x0199, 0x01A1, 0x01A3,
1515 0x01A5, 0x01A8, 0x01AD, 0x01B0, 0x01B4, 0x01B6, 0x01B9, 0x01BD, 0x01BF, 0x01C5,
1516 0x01C6, 0x01C8, 0x01C9, 0x01CB, 0x01CC, 0x01CE, 0x01D0, 0x01D2, 0x01D4, 0x01D6,
1517 0x01D8, 0x01DA, 0x01DC, 0x01DD, 0x01DF, 0x01E1, 0x01E3, 0x01E5, 0x01E7, 0x01E9,
1518 0x01EB, 0x01ED, 0x01EF, 0x01F2, 0x01F3, 0x01F5, 0x01F9, 0x01FB, 0x01FD, 0x01FF,
1519 /*0x0201, 0x0203, 0x0205, 0x0207, 0x0209, 0x020B, 0x020D, 0x020F, 0x0211, 0x0213,
1520 0x0215, 0x0217, 0x0219, 0x021B, 0x021D, 0x021F, 0x0223, 0x0225, 0x0227, 0x0229,
1521 0x022B, 0x022D, 0x022F, 0x0231, 0x0233,*/ 0x0253, 0x0254, 0x0256, 0x0257, 0x0259,
1522 0x025B, 0x0260, 0x0263, 0x0268, 0x0269, 0x026F, 0x0272, 0x0275, 0x0280, 0x0283,
1523 0x0288, 0x028A, 0x028B, 0x0292, 0x0345, 0x03AC, 0x03AD, 0x03AE, 0x03AF, /*0x03B1,
1524 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB,
1525 0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5,
1526 0x03C6, 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB,*/ 0x03CC, 0x03CD, 0x03CE, 0x03D0,
1527 0x03D1, 0x03D5, 0x03D6, /*0x03DB, 0x03DD, 0x03DF, 0x03E1, 0x03E3, 0x03E5, 0x03E7,
1528 0x03E9, 0x03EB, 0x03ED, 0x03EF,*/ 0x03F0, 0x03F1, 0x03F2, 0x03F5, /*0x0430, 0x0431,
1529 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437, 0x0438, 0x0439, 0x043A, 0x043B,
1530 0x043C, 0x043D, 0x043E, 0x043F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445,
1531 0x0446, 0x0447, 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,*/
1532 /*0x0450, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459,
1533 0x045A, 0x045B, 0x045C, 0x045D, 0x045E, 0x045F,*/ /*0x0461, 0x0463, 0x0465, 0x0467,
1534 0x0469, 0x046B, 0x046D, 0x046F, 0x0471, 0x0473, 0x0475, 0x0477, 0x0479, 0x047B,
1535 0x047D, 0x047F, 0x0481, 0x048D, 0x048F, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499,
1536 0x049B, 0x049D, 0x049F, 0x04A1, 0x04A3, 0x04A5, 0x04A7, 0x04A9, 0x04AB, 0x04AD,
1537 0x04AF, 0x04B1, 0x04B3, 0x04B5, 0x04B7, 0x04B9, 0x04BB, 0x04BD, 0x04BF,*/ 0x04C2,
1538 0x04C4, 0x04C8, 0x04CC, /*0x04D1, 0x04D3, 0x04D5, 0x04D7, 0x04D9, 0x04DB, 0x04DD,
1539 0x04DF, 0x04E1, 0x04E3, 0x04E5, 0x04E7, 0x04E9, 0x04EB, 0x04ED, 0x04EF, 0x04F1,
1540 0x04F3, 0x04F5, 0x04F9, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567,
1541 0x0568, 0x0569, 0x056A, 0x056B, 0x056C, 0x056D, 0x056E, 0x056F, 0x0570, 0x0571,
1542 0x0572, 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, 0x0578, 0x0579, 0x057A, 0x057B,
1543 0x057C, 0x057D, 0x057E, 0x057F, 0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585,
1544 0x0586, 0x1E01, 0x1E03, 0x1E05, 0x1E07, 0x1E09, 0x1E0B, 0x1E0D, 0x1E0F, 0x1E11,
1545 0x1E13, 0x1E15, 0x1E17, 0x1E19, 0x1E1B, 0x1E1D, 0x1E1F, 0x1E21, 0x1E23, 0x1E25,
1546 0x1E27, 0x1E29, 0x1E2B, 0x1E2D, 0x1E2F, 0x1E31, 0x1E33, 0x1E35, 0x1E37, 0x1E39,
1547 0x1E3B, 0x1E3D, 0x1E3F, 0x1E41, 0x1E43, 0x1E45, 0x1E47, 0x1E49, 0x1E4B, 0x1E4D,
1548 0x1E4F, 0x1E51, 0x1E53, 0x1E55, 0x1E57, 0x1E59, 0x1E5B, 0x1E5D, 0x1E5F, 0x1E61,
1549 0x1E63, 0x1E65, 0x1E67, 0x1E69, 0x1E6B, 0x1E6D, 0x1E6F, 0x1E71, 0x1E73, 0x1E75,
1550 0x1E77, 0x1E79, 0x1E7B, 0x1E7D, 0x1E7F, 0x1E81, 0x1E83, 0x1E85, 0x1E87, 0x1E89,
1551 0x1E8B, 0x1E8D, 0x1E8F, 0x1E91, 0x1E93, 0x1E95,*/ 0x1E9B, /*0x1EA1, 0x1EA3, 0x1EA5,
1552 0x1EA7, 0x1EA9, 0x1EAB, 0x1EAD, 0x1EAF, 0x1EB1, 0x1EB3, 0x1EB5, 0x1EB7, 0x1EB9,
1553 0x1EBB, 0x1EBD, 0x1EBF, 0x1EC1, 0x1EC3, 0x1EC5, 0x1EC7, 0x1EC9, 0x1ECB, 0x1ECD,
1554 0x1ECF, 0x1ED1, 0x1ED3, 0x1ED5, 0x1ED7, 0x1ED9, 0x1EDB, 0x1EDD, 0x1EDF, 0x1EE1,
1555 0x1EE3, 0x1EE5, 0x1EE7, 0x1EE9, 0x1EEB, 0x1EED, 0x1EEF, 0x1EF1, 0x1EF3, 0x1EF5,
1556 0x1EF7, 0x1EF9,*/ /*0x1F00, 0x1F01, 0x1F02, 0x1F03, 0x1F04, 0x1F05, 0x1F06, 0x1F07,*/
1557 /*0x1F10, 0x1F11, 0x1F12, 0x1F13, 0x1F14, 0x1F15, 0x1F20, 0x1F21, 0x1F22, 0x1F23,
1558 0x1F24, 0x1F25, 0x1F26, 0x1F27, 0x1F30, 0x1F31, 0x1F32, 0x1F33, 0x1F34, 0x1F35,
1559 0x1F36, 0x1F37,*/ 0x1F40, 0x1F41, 0x1F42, 0x1F43, 0x1F44, 0x1F45, 0x1F51, 0x1F53,
1560 0x1F55, 0x1F57, 0x1F60, 0x1F61, 0x1F62, 0x1F63, 0x1F64, 0x1F65, 0x1F66, 0x1F67,
1561 0x1F70, 0x1F71, 0x1F72, 0x1F73, 0x1F74, 0x1F75, 0x1F76, 0x1F77, 0x1F78, 0x1F79,
1562 0x1F7A, 0x1F7B, 0x1F7C, 0x1F7D, 0x1F80, 0x1F81, 0x1F82, 0x1F83, 0x1F84, 0x1F85,
1563 0x1F86, 0x1F87, 0x1F90, 0x1F91, 0x1F92, 0x1F93, 0x1F94, 0x1F95, 0x1F96, 0x1F97,
1564 0x1FA0, 0x1FA1, 0x1FA2, 0x1FA3, 0x1FA4, 0x1FA5, 0x1FA6, 0x1FA7, 0x1FB0, 0x1FB1,
1565 0x1FB3, 0x1FBE, 0x1FC3, 0x1FD0, 0x1FD1, 0x1FE0, 0x1FE1, 0x1FE5, 0x1FF3 /*0x2170,
1566 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0x217A,
1567 0x217B, 0x217C, 0x217D, 0x217E, 0x217F, 0x24D0, 0x24D1, 0x24D2, 0x24D3, 0x24D4,
1568 0x24D5, 0x24D6, 0x24D7, 0x24D8, 0x24D9, 0x24DA, 0x24DB, 0x24DC, 0x24DD, 0x24DE,
1569 0x24DF, 0x24E0, 0x24E1, 0x24E2, 0x24E3, 0x24E4, 0x24E5, 0x24E6, 0x24E7, 0x24E8,
1570 0x24E9 0xFF41, 0xFF42, 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49,
1571 0xFF4A, 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0xFF50, 0xFF51, 0xFF52, 0xFF53,
1572 0xFF54, 0xFF55, 0xFF56, 0xFF57, 0xFF58, 0xFF59, 0xFF5A*/
1573 };
1574
1575 static const wchar upperCaseConversion[] =
1576 {
1577 /*0x0041, 0x0042, 0x0043, 0x0044, 0x0045, 0x0046, 0x0047, 0x0048, 0x0049, 0x004A,
1578 0x004B, 0x004C, 0x004D, 0x004E, 0x004F, 0x0050, 0x0051, 0x0052, 0x0053, 0x0054,
1579 0x0055, 0x0056, 0x0057, 0x0058, 0x0059, 0x005A,*/ 0x039C, /*0x00C0, 0x00C1, 0x00C2,
1580 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7, 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC,
1581 0x00CD, 0x00CE, 0x00CF, 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6,
1582 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE,*/ 0x0178, /*0x0100, 0x0102,
1583 0x0104, 0x0106, 0x0108, 0x010A, 0x010C, 0x010E, 0x0110, 0x0112, 0x0114, 0x0116,
1584 0x0118, 0x011A, 0x011C, 0x011E, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012A,
1585 0x012C, 0x012E,*/ 0x0049, 0x0132, 0x0134, 0x0136, /*0x0139, 0x013B, 0x013D, 0x013F,
1586 0x0141, 0x0143, 0x0145, 0x0147,*/ /*0x014A, 0x014C, 0x014E, 0x0150, 0x0152, 0x0154,
1587 0x0156, 0x0158, 0x015A, 0x015C, 0x015E, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168,
1588 0x016A, 0x016C, 0x016E, 0x0170, 0x0172, 0x0174, 0x0176,*/ 0x0179, 0x017B, 0x017D,
1589 0x0053, 0x0182, 0x0184, 0x0187, 0x018B, 0x0191, 0x01F6, 0x0198, 0x01A0, 0x01A2,
1590 0x01A4, 0x01A7, 0x01AC, 0x01AF, 0x01B3, 0x01B5, 0x01B8, 0x01BC, 0x01F7, 0x01C4,
1591 0x01C4, 0x01C7, 0x01C7, 0x01CA, 0x01CA, 0x01CD, 0x01CF, 0x01D1, 0x01D3, 0x01D5,
1592 0x01D7, 0x01D9, 0x01DB, 0x018E, 0x01DE, 0x01E0, 0x01E2, 0x01E4, 0x01E6, 0x01E8,
1593 0x01EA, 0x01EC, 0x01EE, 0x01F1, 0x01F1, 0x01F4, 0x01F8, 0x01FA, 0x01FC, 0x01FE,
1594 /*0x0200, 0x0202, 0x0204, 0x0206, 0x0208, 0x020A, 0x020C, 0x020E, 0x0210, 0x0212,
1595 0x0214, 0x0216, 0x0218, 0x021A, 0x021C, 0x021E, 0x0222, 0x0224, 0x0226, 0x0228,
1596 0x022A, 0x022C, 0x022E, 0x0230, 0x0232,*/ 0x0181, 0x0186, 0x0189, 0x018A, 0x018F,
1597 0x0190, 0x0193, 0x0194, 0x0197, 0x0196, 0x019C, 0x019D, 0x019F, 0x01A6, 0x01A9,
1598 0x01AE, 0x01B1, 0x01B2, 0x01B7, 0x0399, 0x0386, 0x0388, 0x0389, 0x038A, /*0x0391,
1599 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B,
1600 0x039C, 0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0x03A3, 0x03A3, 0x03A4, 0x03A5,
1601 0x03A6, 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB,*/ 0x038C, 0x038E, 0x038F, 0x0392,
1602 0x0398, 0x03A6, 0x03A0, /*0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x03E2, 0x03E4, 0x03E6,
1603 0x03E8, 0x03EA, 0x03EC, 0x03EE,*/ 0x039A, 0x03A1, 0x03A3, 0x0395, /*0x0410, 0x0411,
1604 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417, 0x0418, 0x0419, 0x041A, 0x041B,
1605 0x041C, 0x041D, 0x041E, 0x041F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425,
1606 0x0426, 0x0427, 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,*/
1607 /*0x0400, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409,
1608 0x040A, 0x040B, 0x040C, 0x040D, 0x040E, 0x040F,*//*0x0460, 0x0462, 0x0464, 0x0466,
1609 0x0468, 0x046A, 0x046C, 0x046E, 0x0470, 0x0472, 0x0474, 0x0476, 0x0478, 0x047A,
1610 0x047C, 0x047E, 0x0480, 0x048C, 0x048E, 0x0490, 0x0492, 0x0494, 0x0496, 0x0498,
1611 0x049A, 0x049C, 0x049E, 0x04A0, 0x04A2, 0x04A4, 0x04A6, 0x04A8, 0x04AA, 0x04AC,
1612 0x04AE, 0x04B0, 0x04B2, 0x04B4, 0x04B6, 0x04B8, 0x04BA, 0x04BC, 0x04BE,*/ 0x04C1,
1613 0x04C3, 0x04C7, 0x04CB, /*0x04D0, 0x04D2, 0x04D4, 0x04D6, 0x04D8, 0x04DA, 0x04DC,
1614 0x04DE, 0x04E0, 0x04E2, 0x04E4, 0x04E6, 0x04E8, 0x04EA, 0x04EC, 0x04EE, 0x04F0,
1615 0x04F2, 0x04F4, 0x04F8, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537,
1616 0x0538, 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F, 0x0540, 0x0541,
1617 0x0542, 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B,
1618 0x054C, 0x054D, 0x054E, 0x054F, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555,
1619 0x0556, 0x1E00, 0x1E02, 0x1E04, 0x1E06, 0x1E08, 0x1E0A, 0x1E0C, 0x1E0E, 0x1E10,
1620 0x1E12, 0x1E14, 0x1E16, 0x1E18, 0x1E1A, 0x1E1C, 0x1E1E, 0x1E20, 0x1E22, 0x1E24,
1621 0x1E26, 0x1E28, 0x1E2A, 0x1E2C, 0x1E2E, 0x1E30, 0x1E32, 0x1E34, 0x1E36, 0x1E38,
1622 0x1E3A, 0x1E3C, 0x1E3E, 0x1E40, 0x1E42, 0x1E44, 0x1E46, 0x1E48, 0x1E4A, 0x1E4C,
1623 0x1E4E, 0x1E50, 0x1E52, 0x1E54, 0x1E56, 0x1E58, 0x1E5A, 0x1E5C, 0x1E5E, 0x1E60,
1624 0x1E62, 0x1E64, 0x1E66, 0x1E68, 0x1E6A, 0x1E6C, 0x1E6E, 0x1E70, 0x1E72, 0x1E74,
1625 0x1E76, 0x1E78, 0x1E7A, 0x1E7C, 0x1E7E, 0x1E80, 0x1E82, 0x1E84, 0x1E86, 0x1E88,
1626 0x1E8A, 0x1E8C, 0x1E8E, 0x1E90, 0x1E92, 0x1E94,*/ 0x1E60, /*0x1EA0, 0x1EA2, 0x1EA4,
1627 0x1EA6, 0x1EA8, 0x1EAA, 0x1EAC, 0x1EAE, 0x1EB0, 0x1EB2, 0x1EB4, 0x1EB6, 0x1EB8,
1628 0x1EBA, 0x1EBC, 0x1EBE, 0x1EC0, 0x1EC2, 0x1EC4, 0x1EC6, 0x1EC8, 0x1ECA, 0x1ECC,
1629 0x1ECE, 0x1ED0, 0x1ED2, 0x1ED4, 0x1ED6, 0x1ED8, 0x1EDA, 0x1EDC, 0x1EDE, 0x1EE0,
1630 0x1EE2, 0x1EE4, 0x1EE6, 0x1EE8, 0x1EEA, 0x1EEC, 0x1EEE, 0x1EF0, 0x1EF2, 0x1EF4,
1631 0x1EF6, 0x1EF8,*/ /*0x1F08, 0x1F09, 0x1F0A, 0x1F0B, 0x1F0C, 0x1F0D, 0x1F0E, 0x1F0F,*/
1632 /*0x1F18, 0x1F19, 0x1F1A, 0x1F1B, 0x1F1C, 0x1F1D, 0x1F28, 0x1F29, 0x1F2A, 0x1F2B,
1633 0x1F2C, 0x1F2D, 0x1F2E, 0x1F2F, 0x1F38, 0x1F39, 0x1F3A, 0x1F3B, 0x1F3C, 0x1F3D,
1634 0x1F3E, 0x1F3F,*/ 0x1F48, 0x1F49, 0x1F4A, 0x1F4B, 0x1F4C, 0x1F4D, 0x1F59, 0x1F5B,
1635 0x1F5D, 0x1F5F, 0x1F68, 0x1F69, 0x1F6A, 0x1F6B, 0x1F6C, 0x1F6D, 0x1F6E, 0x1F6F,
1636 0x1FBA, 0x1FBB, 0x1FC8, 0x1FC9, 0x1FCA, 0x1FCB, 0x1FDA, 0x1FDB, 0x1FF8, 0x1FF9,
1637 0x1FEA, 0x1FEB, 0x1FFA, 0x1FFB, 0x1F88, 0x1F89, 0x1F8A, 0x1F8B, 0x1F8C, 0x1F8D,
1638 0x1F8E, 0x1F8F, 0x1F98, 0x1F99, 0x1F9A, 0x1F9B, 0x1F9C, 0x1F9D, 0x1F9E, 0x1F9F,
1639 0x1FA8, 0x1FA9, 0x1FAA, 0x1FAB, 0x1FAC, 0x1FAD, 0x1FAE, 0x1FAF, 0x1FB8, 0x1FB9,
1640 0x1FBC, 0x0399, 0x1FCC, 0x1FD8, 0x1FD9, 0x1FE8, 0x1FE9, 0x1FEC, 0x1FFC /*0x2160,
1641 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A,
1642 0x216B, 0x216C, 0x216D, 0x216E, 0x216F 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA,
1643 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4,
1644 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE,
1645 0x24CF 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29,
1646 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F, 0xFF30, 0xFF31, 0xFF32, 0xFF33,
1647 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A*/
1648 };
1649
1650
1651 // 12sep02 grandma : table driven inline function is 14x faster than original function,
1652 // Using first 100 movies of ATS, HashKey alone calls CharToUpper() 360,000 times.
1653
1654 // Only entries 0-254 are used in these tables in unicharToUpper and unicharToLower
1655 // so entry 255 has been removed. Entry 255 is handled by the lowerCaseBase code.
1656
1657 static const uint8_t tolower_map[] =
1658 {
1659 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0-15
1660 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //16-31
1661 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //32-47
1662 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //48-63
1663 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, //64-79
1664 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, //80-95
1665 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //96-111
1666 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //112-127
1667 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //128-143
1668 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //144-159
1669 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //160-175
1670 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //176-191
1671 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, //192-207
1672 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, //208-223
1673 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //224-239
1674 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00 //240-254
1675 };
1676
1677 static const uint8_t toupper_map[] =
1678 {
1679 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //0-15
1680 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //16-31
1681 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //32-47
1682 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //48-63
1683 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //64-79
1684 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //80-95
1685 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, //96-111
1686 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, //112-127
1687 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //128-143
1688 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //144-159
1689 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //160-175
1690 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //176-191
1691 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //192-207
1692 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, //208-223
1693 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, //224-239
1694 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x00, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20, 0x20 //240-254
1695 };
1696
1697
1698 // WARNING: This is used by the core flash code. Any change to this utility, or the tables
1699 // it relies on, will break legacy Flash content.
1700 /*static*/ uint32_t String::unicharToUpper(uint32_t ch)
1701 {
1702 if (ch < 0xFF)
1703 return toupper_map[ch] ^ ch;
1704
1705 // offset 0x1C60
1706 /*
1707 if ( (ch>=0x2D00 && ch<=0x2D25) ) // Georgian
1708 {
1709 return (ch - 0x1C60);
1710 }
1711 */
1712
1713 // offset 80
1714 if ( (ch>=0x0450 && ch<=0x045F) )
1715 {
1716 return (ch - 0x50);
1717 }
1718
1719 // offset 48
1720 if ( (ch>=0x0561 && ch<=0x0586) )
1721 {
1722 return (ch - 0x30);
1723 }
1724
1725 // offset -32
1726 if ( (ch>=0x03B1 && ch<=0x03CB) ||
1727 (ch>=0x0430 && ch<=0x044F) ||
1728 (ch>=0xFF41 && ch<=0xFF5A) )
1729 {
1730 return (ch - 0x20);
1731 }
1732
1733 // offset 26
1734 if ( (ch>=0x24D0 && ch<=0x24E9) )
1735 {
1736 return (ch - 0x1A);
1737 }
1738
1739 // offset 16
1740 if ( (ch>=0x2170 && ch<=0x217F) )
1741 {
1742 return (ch - 0x10);
1743 }
1744
1745 // offset +8(positive)
1746 if ( (ch>=0x1F00 && ch<=0X1F07) ||
1747 (ch>=0x1F10 && ch<=0x1F15) ||
1748 (ch>=0x1F20 && ch<=0x1F27) ||
1749 (ch>=0x1F30 && ch<=0x1F37) )
1750 {
1751 return (ch + 0x8);
1752 }
1753
1754 // offset -1
1755 if ( (ch>=0x0101 && ch<=0x0233) )
1756 {
1757 if ( ((ch<=0x012F) && (ch&0x1)) || // odd only
1758 ((ch>=0x013A && ch<=0x0148) && !(ch&0x1)) || // even only
1759 ((ch>=0x014B && ch<=0x0177) && (ch&1)) || // odd only
1760 ((ch>=0x0201 && ch<=0x0233) && (ch&1) && ch!=0x0221) ) // odd only
1761 {
1762 return (ch - 1);
1763 }
1764 }
1765
1766 if ( ch&1 ) // only looking for odd chars here
1767 {
1768 // GREEK
1769 if ( ch>=0x03D9 && ch<=0x03EF )
1770 return (ch - 1);
1771 // CYRILLIC
1772 if ( ((ch>=0x0461 && ch<=0x04BF) && !(ch==0x0483 || ch==0x0485 || ch==0x487 || ch==0x0489)) ||
1773 (ch>=0x04D1 && ch<=0x04F9) )
1774 {
1775 if (ch!=0x0483 && ch!=0x0485 && ch!=0x487 && ch!=0x0489)
1776 return (ch - 1);
1777 }
1778 // LATIN_EXT_ADD
1779 if ( (ch>=0x1E01 && ch<=0x1E95) || (ch>=0x1EA1 && ch<=0x1EF9) )
1780 return (ch - 1);
1781 }
1782
1783
1784 AvmAssert(sizeof(lowerCaseBase) == sizeof(upperCaseConversion))do { } while (0);
1785
1786 uint32_t result = ch;
1787 // Do a binary search in lowerCaseBase for char
1788 int32_t lo = 0;
1789 int32_t hi = (sizeof(lowerCaseBase) / sizeof(lowerCaseBase[0])) - 1;
1790
1791 while (lo <= hi)
1792 {
1793 int32_t pivot = (lo+hi)>>1;
1794 uint32_t testChar = lowerCaseBase[pivot];
1795
1796 if (ch == testChar)
1797 {
1798 // Use that index into lowerCaseConversion for a return value
1799 result = upperCaseConversion[pivot];
1800 break;
1801 }
1802 else if (ch < testChar)
1803 {
1804 hi = pivot-1;
1805 }
1806 else
1807 {
1808 lo = pivot+1;
1809 }
1810 }
1811
1812 return result;
1813 }
1814
1815 static const wchar upperCaseBase[] =
1816 {
1817 /*0x0100, 0x0102, 0x0104, 0x0106,
1818 0x0108, 0x010A, 0x010C, 0x010E, 0x0110, 0x0112, 0x0114, 0x0116, 0x0118, 0x011A,
1819 0x011C, 0x011E, 0x0120, 0x0122, 0x0124, 0x0126, 0x0128, 0x012A, 0x012C, 0x012E,*/
1820 0x0130, 0x0132, 0x0134, 0x0136, /*0x0139, 0x013B, 0x013D, 0x013F, 0x0141, 0x0143,
1821 0x0145, 0x0147, 0x014A, 0x014C, 0x014E, 0x0150, 0x0152, 0x0154, 0x0156, 0x0158,
1822 0x015A, 0x015C, 0x015E, 0x0160, 0x0162, 0x0164, 0x0166, 0x0168, 0x016A, 0x016C,
1823 0x016E, 0x0170, 0x0172, 0x0174, 0x0176,*/ 0x0178, 0x0179, 0x017B, 0x017D, 0x0181,
1824 0x0182, 0x0184, 0x0186, 0x0187, 0x0189, 0x018A, 0x018B, 0x018E, 0x018F, 0x0190,
1825 0x0191, 0x0193, 0x0194, 0x0196, 0x0197, 0x0198, 0x019C, 0x019D, 0x019F, 0x01A0,
1826 0x01A2, 0x01A4, 0x01A6, 0x01A7, 0x01A9, 0x01AC, 0x01AE, 0x01AF, 0x01B1, 0x01B2,
1827 0x01B3, 0x01B5, 0x01B7, 0x01B8, 0x01BC, 0x01C4, 0x01C5, 0x01C7, 0x01C8, 0x01CA,
1828 0x01CB, 0x01CD, 0x01CF, 0x01D1, 0x01D3, 0x01D5, 0x01D7, 0x01D9, 0x01DB, 0x01DE,
1829 0x01E0, 0x01E2, 0x01E4, 0x01E6, 0x01E8, 0x01EA, 0x01EC, 0x01EE, 0x01F1, 0x01F2,
1830 0x01F4, 0x01F6, 0x01F7, 0x01F8, 0x01FA, 0x01FC, 0x01FE, /*0x0200, 0x0202, 0x0204,
1831 0x0206, 0x0208, 0x020A, 0x020C, 0x020E, 0x0210, 0x0212, 0x0214, 0x0216, 0x0218,
1832 0x021A, 0x021C, 0x021E, 0x0222, 0x0224, 0x0226, 0x0228, 0x022A, 0x022C, 0x022E,
1833 0x0230, 0x0232,*/ 0x0386, 0x0388, 0x0389, 0x038A, 0x038C, 0x038E, 0x038F, /*0x0391,
1834 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397, 0x0398, 0x0399, 0x039A, 0x039B,
1835 0x039C, 0x039D, 0x039E, 0x039F, 0x03A0, 0x03A1, 0x03A3, 0x03A4, 0x03A5, 0x03A6,
1836 0x03A7, 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03DA, 0x03DC, 0x03DE, 0x03E0, 0x03E2,
1837 0x03E4, 0x03E6, 0x03E8, 0x03EA, 0x03EC, 0x03EE,*/ 0x03F4, /*0x0400, 0x0401, 0x0402,
1838 0x0403, 0x0404, 0x0405, 0x0406, 0x0407, 0x0408, 0x0409, 0x040A, 0x040B, 0x040C,
1839 0x040D, 0x040E, 0x040F,*/ /*0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416,
1840 0x0417, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F, 0x0420,
1841 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427, 0x0428, 0x0429, 0x042A,
1842 0x042B, 0x042C, 0x042D, 0x042E, 0x042F, 0x0460, 0x0462, 0x0464, 0x0466, 0x0468,
1843 0x046A, 0x046C, 0x046E, 0x0470, 0x0472, 0x0474, 0x0476, 0x0478, 0x047A, 0x047C,
1844 0x047E, 0x0480, 0x048C, 0x048E, 0x0490, 0x0492, 0x0494, 0x0496, 0x0498, 0x049A,
1845 0x049C, 0x049E, 0x04A0, 0x04A2, 0x04A4, 0x04A6, 0x04A8, 0x04AA, 0x04AC, 0x04AE,
1846 0x04B0, 0x04B2, 0x04B4, 0x04B6, 0x04B8, 0x04BA, 0x04BC, 0x04BE,*/ 0x04C1, 0x04C3,
1847 0x04C7, 0x04CB, /*0x04D0, 0x04D2, 0x04D4, 0x04D6, 0x04D8, 0x04DA, 0x04DC, 0x04DE,
1848 0x04E0, 0x04E2, 0x04E4, 0x04E6, 0x04E8, 0x04EA, 0x04EC, 0x04EE, 0x04F0, 0x04F2,
1849 0x04F4, 0x04F8, 0x0531, 0x0532, 0x0533, 0x0534, 0x0535, 0x0536, 0x0537, 0x0538,
1850 0x0539, 0x053A, 0x053B, 0x053C, 0x053D, 0x053E, 0x053F, 0x0540, 0x0541, 0x0542,
1851 0x0543, 0x0544, 0x0545, 0x0546, 0x0547, 0x0548, 0x0549, 0x054A, 0x054B, 0x054C,
1852 0x054D, 0x054E, 0x054F, 0x0550, 0x0551, 0x0552, 0x0553, 0x0554, 0x0555, 0x0556,*/
1853 /*
1854 // cn: added Georgian. Not in the UnicodeData-3.2.0 spreadsheet, but was added later
1855 0x10a0, 0x10a1, 0x10a2, 0x10a3, 0x10a4, 0x10a5, 0x10a6, 0x10a7, 0x10a8, 0x10a9,
1856 0x10aa, 0x10ab, 0x10ac, 0x10ad, 0x10ae, 0x10af, 0x10b0, 0x10b1, 0x10b2, 0x10b3,
1857 0x10b4, 0x10b5, 0x10b6, 0x10b7, 0x10b8, 0x10b9, 0x10ba, 0x10bb, 0x10bc, 0x10bd,
1858 0x10be, 0x10bf, 0x10c0, 0x10c1, 0x10c2, 0x10c3, 0x10c4, 0x10c5,
1859 // cn: end Georgian.
1860 */
1861 /*0x1E00, 0x1E02, 0x1E04, 0x1E06, 0x1E08, 0x1E0A, 0x1E0C, 0x1E0E, 0x1E10, 0x1E12,
1862 0x1E14, 0x1E16, 0x1E18, 0x1E1A, 0x1E1C, 0x1E1E, 0x1E20, 0x1E22, 0x1E24, 0x1E26,
1863 0x1E28, 0x1E2A, 0x1E2C, 0x1E2E, 0x1E30, 0x1E32, 0x1E34, 0x1E36, 0x1E38, 0x1E3A,
1864 0x1E3C, 0x1E3E, 0x1E40, 0x1E42, 0x1E44, 0x1E46, 0x1E48, 0x1E4A, 0x1E4C, 0x1E4E,
1865 0x1E50, 0x1E52, 0x1E54, 0x1E56, 0x1E58, 0x1E5A, 0x1E5C, 0x1E5E, 0x1E60, 0x1E62,
1866 0x1E64, 0x1E66, 0x1E68, 0x1E6A, 0x1E6C, 0x1E6E, 0x1E70, 0x1E72, 0x1E74, 0x1E76,
1867 0x1E78, 0x1E7A, 0x1E7C, 0x1E7E, 0x1E80, 0x1E82, 0x1E84, 0x1E86, 0x1E88, 0x1E8A,
1868 0x1E8C, 0x1E8E, 0x1E90, 0x1E92, 0x1E94, 0x1EA0, 0x1EA2, 0x1EA4, 0x1EA6, 0x1EA8,
1869 0x1EAA, 0x1EAC, 0x1EAE, 0x1EB0, 0x1EB2, 0x1EB4, 0x1EB6, 0x1EB8, 0x1EBA, 0x1EBC,
1870 0x1EBE, 0x1EC0, 0x1EC2, 0x1EC4, 0x1EC6, 0x1EC8, 0x1ECA, 0x1ECC, 0x1ECE, 0x1ED0,
1871 0x1ED2, 0x1ED4, 0x1ED6, 0x1ED8, 0x1EDA, 0x1EDC, 0x1EDE, 0x1EE0, 0x1EE2, 0x1EE4,
1872 0x1EE6, 0x1EE8, 0x1EEA, 0x1EEC, 0x1EEE, 0x1EF0, 0x1EF2, 0x1EF4, 0x1EF6, 0x1EF8,
1873 0x1F08, 0x1F09, 0x1F0A, 0x1F0B, 0x1F0C, 0x1F0D, 0x1F0E, 0x1F0F, 0x1F18, 0x1F19,
1874 0x1F1A, 0x1F1B, 0x1F1C, 0x1F1D, 0x1F28, 0x1F29, 0x1F2A, 0x1F2B, 0x1F2C, 0x1F2D,
1875 0x1F2E, 0x1F2F, 0x1F38, 0x1F39, 0x1F3A, 0x1F3B, 0x1F3C, 0x1F3D, 0x1F3E, 0x1F3F,*/
1876 0x1F48, 0x1F49, 0x1F4A, 0x1F4B, 0x1F4C, 0x1F4D, 0x1F59, 0x1F5B, 0x1F5D, 0x1F5F,
1877 0x1F68, 0x1F69, 0x1F6A, 0x1F6B, 0x1F6C, 0x1F6D, 0x1F6E, 0x1F6F, 0x1F88, 0x1F89,
1878 0x1F8A, 0x1F8B, 0x1F8C, 0x1F8D, 0x1F8E, 0x1F8F, 0x1F98, 0x1F99, 0x1F9A, 0x1F9B,
1879 0x1F9C, 0x1F9D, 0x1F9E, 0x1F9F, 0x1FA8, 0x1FA9, 0x1FAA, 0x1FAB, 0x1FAC, 0x1FAD,
1880 0x1FAE, 0x1FAF, 0x1FB8, 0x1FB9, 0x1FBA, 0x1FBB, 0x1FBC, 0x1FC8, 0x1FC9, 0x1FCA,
1881 0x1FCB, 0x1FCC, 0x1FD8, 0x1FD9, 0x1FDA, 0x1FDB, 0x1FE8, 0x1FE9, 0x1FEA, 0x1FEB,
1882 0x1FEC, 0x1FF8, 0x1FF9, 0x1FFA, 0x1FFB, 0x1FFC, 0x2126, 0x212A, 0x212B /*0x2160,
1883 0x2161, 0x2162, 0x2163, 0x2164, 0x2165, 0x2166, 0x2167, 0x2168, 0x2169, 0x216A,
1884 0x216B, 0x216C, 0x216D, 0x216E, 0x216F, 0x24B6, 0x24B7, 0x24B8, 0x24B9, 0x24BA,
1885 0x24BB, 0x24BC, 0x24BD, 0x24BE, 0x24BF, 0x24C0, 0x24C1, 0x24C2, 0x24C3, 0x24C4,
1886 0x24C5, 0x24C6, 0x24C7, 0x24C8, 0x24C9, 0x24CA, 0x24CB, 0x24CC, 0x24CD, 0x24CE,
1887 0x24CF, 0xFF21, 0xFF22, 0xFF23, 0xFF24, 0xFF25, 0xFF26, 0xFF27, 0xFF28, 0xFF29,
1888 0xFF2A, 0xFF2B, 0xFF2C, 0xFF2D, 0xFF2E, 0xFF2F, 0xFF30, 0xFF31, 0xFF32, 0xFF33,
1889 0xFF34, 0xFF35, 0xFF36, 0xFF37, 0xFF38, 0xFF39, 0xFF3A*/
1890 };
1891
1892 static const wchar lowerCaseConversion[] =
1893 {
1894 /*0x0101, 0x0103, 0x0105, 0x0107,
1895 0x0109, 0x010B, 0x010D, 0x010F, 0x0111, 0x0113, 0x0115, 0x0117, 0x0119, 0x011B,
1896 0x011D, 0x011F, 0x0121, 0x0123, 0x0125, 0x0127, 0x0129, 0x012B, 0x012D, 0x012F,*/
1897 0x0069, 0x0133, 0x0135, 0x0137, /*0x013A, 0x013C, 0x013E, 0x0140, 0x0142, 0x0144,
1898 0x0146, 0x0148, 0x014B, 0x014D, 0x014F, 0x0151, 0x0153, 0x0155, 0x0157, 0x0159,
1899 0x015B, 0x015D, 0x015F, 0x0161, 0x0163, 0x0165, 0x0167, 0x0169, 0x016B, 0x016D,
1900 0x016F, 0x0171, 0x0173, 0x0175, 0x0177,*/ 0x00FF, 0x017A, 0x017C, 0x017E, 0x0253,
1901 0x0183, 0x0185, 0x0254, 0x0188, 0x0256, 0x0257, 0x018C, 0x01DD, 0x0259, 0x025B,
1902 0x0192, 0x0260, 0x0263, 0x0269, 0x0268, 0x0199, 0x026F, 0x0272, 0x0275, 0x01A1,
1903 0x01A3, 0x01A5, 0x0280, 0x01A8, 0x0283, 0x01AD, 0x0288, 0x01B0, 0x028A, 0x028B,
1904 0x01B4, 0x01B6, 0x0292, 0x01B9, 0x01BD, 0x01C6, 0x01C6, 0x01C9, 0x01C9, 0x01CC,
1905 0x01CC, 0x01CE, 0x01D0, 0x01D2, 0x01D4, 0x01D6, 0x01D8, 0x01DA, 0x01DC, 0x01DF,
1906 0x01E1, 0x01E3, 0x01E5, 0x01E7, 0x01E9, 0x01EB, 0x01ED, 0x01EF, 0x01F3, 0x01F3,
1907 0x01F5, 0x0195, 0x01BF, 0x01F9, 0x01FB, 0x01FD, 0x01FF, /*0x0201, 0x0203, 0x0205,
1908 0x0207, 0x0209, 0x020B, 0x020D, 0x020F, 0x0211, 0x0213, 0x0215, 0x0217, 0x0219,
1909 0x021B, 0x021D, 0x021F, 0x0223, 0x0225, 0x0227, 0x0229, 0x022B, 0x022D, 0x022F,
1910 0x0231, 0x0233,*/ 0x03AC, 0x03AD, 0x03AE, 0x03AF, 0x03CC, 0x03CD, 0x03CE, /*0x03B1,
1911 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7, 0x03B8, 0x03B9, 0x03BA, 0x03BB,
1912 0x03BC, 0x03BD, 0x03BE, 0x03BF, 0x03C0, 0x03C1, 0x03C3, 0x03C4, 0x03C5, 0x03C6,
1913 0x03C7, 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03DB, 0x03DD, 0x03DF, 0x03E1, 0x03E3,
1914 0x03E5, 0x03E7, 0x03E9, 0x03EB, 0x03ED, 0x03EF,*/ 0x03B8, /*0x0450, 0x0451, 0x0452,
1915 0x0453, 0x0454, 0x0455, 0x0456, 0x0457, 0x0458, 0x0459, 0x045A, 0x045B, 0x045C,
1916 0x045D, 0x045E, 0x045F,*/ /*0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436,
1917 0x0437, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F, 0x0440,
1918 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447, 0x0448, 0x0449, 0x044A,
1919 0x044B, 0x044C, 0x044D, 0x044E, 0x044F, 0x0461, 0x0463, 0x0465, 0x0467, 0x0469,
1920 0x046B, 0x046D, 0x046F, 0x0471, 0x0473, 0x0475, 0x0477, 0x0479, 0x047B, 0x047D,
1921 0x047F, 0x0481, 0x048D, 0x048F, 0x0491, 0x0493, 0x0495, 0x0497, 0x0499, 0x049B,
1922 0x049D, 0x049F, 0x04A1, 0x04A3, 0x04A5, 0x04A7, 0x04A9, 0x04AB, 0x04AD, 0x04AF,
1923 0x04B1, 0x04B3, 0x04B5, 0x04B7, 0x04B9, 0x04BB, 0x04BD, 0x04BF,*/ 0x04C2, 0x04C4,
1924 0x04C8, 0x04CC, /*0x04D1, 0x04D3, 0x04D5, 0x04D7, 0x04D9, 0x04DB, 0x04DD, 0x04DF,
1925 0x04E1, 0x04E3, 0x04E5, 0x04E7, 0x04E9, 0x04EB, 0x04ED, 0x04EF, 0x04F1, 0x04F3,
1926 0x04F5, 0x04F9, 0x0561, 0x0562, 0x0563, 0x0564, 0x0565, 0x0566, 0x0567, 0x0568,
1927 0x0569, 0x056A, 0x056B, 0x056C, 0x056D, 0x056E, 0x056F, 0x0570, 0x0571, 0x0572,
1928 0x0573, 0x0574, 0x0575, 0x0576, 0x0577, 0x0578, 0x0579, 0x057A, 0x057B, 0x057C,
1929 0x057D, 0x057E, 0x057F, 0x0580, 0x0581, 0x0582, 0x0583, 0x0584, 0x0585, 0x0586,*/
1930 // cn: added Georgian. Not in the UnicodeData-3.2.0 spreadsheet, but was added later
1931 /*
1932 0x10d0, 0x10d1, 0x10d2, 0x10d3, 0x10d4, 0x10d5, 0x10d6, 0x10d7, 0x10d8, 0x10d9,
1933 0x10da, 0x10db, 0x10dc, 0x10dd, 0x10de, 0x10df, 0x10e0, 0x10e1, 0x10e2, 0x10e3,
1934 0x10e4, 0x10e5, 0x10e6, 0x10e7, 0x10e8, 0x10e9, 0x10ea, 0x10eb, 0x10ec, 0x10ed,
1935 0x10ee, 0x10ef, 0x10f0, 0x10f1, 0x10f2, 0x10f3, 0x10f4, 0x10f5,
1936 // cn: end Georgian.
1937 */
1938 /*0x1E01, 0x1E03, 0x1E05, 0x1E07, 0x1E09, 0x1E0B, 0x1E0D, 0x1E0F, 0x1E11, 0x1E13,
1939 0x1E15, 0x1E17, 0x1E19, 0x1E1B, 0x1E1D, 0x1E1F, 0x1E21, 0x1E23, 0x1E25, 0x1E27,
1940 0x1E29, 0x1E2B, 0x1E2D, 0x1E2F, 0x1E31, 0x1E33, 0x1E35, 0x1E37, 0x1E39, 0x1E3B,
1941 0x1E3D, 0x1E3F, 0x1E41, 0x1E43, 0x1E45, 0x1E47, 0x1E49, 0x1E4B, 0x1E4D, 0x1E4F,
1942 0x1E51, 0x1E53, 0x1E55, 0x1E57, 0x1E59, 0x1E5B, 0x1E5D, 0x1E5F, 0x1E61, 0x1E63,
1943 0x1E65, 0x1E67, 0x1E69, 0x1E6B, 0x1E6D, 0x1E6F, 0x1E71, 0x1E73, 0x1E75, 0x1E77,
1944 0x1E79, 0x1E7B, 0x1E7D, 0x1E7F, 0x1E81, 0x1E83, 0x1E85, 0x1E87, 0x1E89, 0x1E8B,
1945 0x1E8D, 0x1E8F, 0x1E91, 0x1E93, 0x1E95, 0x1EA1, 0x1EA3, 0x1EA5, 0x1EA7, 0x1EA9,
1946 0x1EAB, 0x1EAD, 0x1EAF, 0x1EB1, 0x1EB3, 0x1EB5, 0x1EB7, 0x1EB9, 0x1EBB, 0x1EBD,
1947 0x1EBF, 0x1EC1, 0x1EC3, 0x1EC5, 0x1EC7, 0x1EC9, 0x1ECB, 0x1ECD, 0x1ECF, 0x1ED1,
1948 0x1ED3, 0x1ED5, 0x1ED7, 0x1ED9, 0x1EDB, 0x1EDD, 0x1EDF, 0x1EE1, 0x1EE3, 0x1EE5,
1949 0x1EE7, 0x1EE9, 0x1EEB, 0x1EED, 0x1EEF, 0x1EF1, 0x1EF3, 0x1EF5, 0x1EF7, 0x1EF9,
1950 0x1F00, 0x1F01, 0x1F02, 0x1F03, 0x1F04, 0x1F05, 0x1F06, 0x1F07, 0x1F10, 0x1F11,
1951 0x1F12, 0x1F13, 0x1F14, 0x1F15, 0x1F20, 0x1F21, 0x1F22, 0x1F23, 0x1F24, 0x1F25,
1952 0x1F26, 0x1F27, 0x1F30, 0x1F31, 0x1F32, 0x1F33, 0x1F34, 0x1F35, 0x1F36, 0x1F37,*/
1953 0x1F40, 0x1F41, 0x1F42, 0x1F43, 0x1F44, 0x1F45, 0x1F51, 0x1F53, 0x1F55, 0x1F57,
1954 0x1F60, 0x1F61, 0x1F62, 0x1F63, 0x1F64, 0x1F65, 0x1F66, 0x1F67, 0x1F80, 0x1F81,
1955 0x1F82, 0x1F83, 0x1F84, 0x1F85, 0x1F86, 0x1F87, 0x1F90, 0x1F91, 0x1F92, 0x1F93,
1956 0x1F94, 0x1F95, 0x1F96, 0x1F97, 0x1FA0, 0x1FA1, 0x1FA2, 0x1FA3, 0x1FA4, 0x1FA5,
1957 0x1FA6, 0x1FA7, 0x1FB0, 0x1FB1, 0x1F70, 0x1F71, 0x1FB3, 0x1F72, 0x1F73, 0x1F74,
1958 0x1F75, 0x1FC3, 0x1FD0, 0x1FD1, 0x1F76, 0x1F77, 0x1FE0, 0x1FE1, 0x1F7A, 0x1F7B,
1959 0x1FE5, 0x1F78, 0x1F79, 0x1F7C, 0x1F7D, 0x1FF3, 0x03C9, 0x006B, 0x00E5 /*0x2170,
1960 0x2171, 0x2172, 0x2173, 0x2174, 0x2175, 0x2176, 0x2177, 0x2178, 0x2179, 0x217A,
1961 0x217B, 0x217C, 0x217D, 0x217E, 0x217F, 0x24D0, 0x24D1, 0x24D2, 0x24D3, 0x24D4,
1962 0x24D5, 0x24D6, 0x24D7, 0x24D8, 0x24D9, 0x24DA, 0x24DB, 0x24DC, 0x24DD, 0x24DE,
1963 0x24DF, 0x24E0, 0x24E1, 0x24E2, 0x24E3, 0x24E4, 0x24E5, 0x24E6, 0x24E7, 0x24E8,
1964 0x24E9, 0xFF41, 0xFF42, 0xFF43, 0xFF44, 0xFF45, 0xFF46, 0xFF47, 0xFF48, 0xFF49,
1965 0xFF4A, 0xFF4B, 0xFF4C, 0xFF4D, 0xFF4E, 0xFF4F, 0xFF50, 0xFF51, 0xFF52, 0xFF53,
1966 0xFF54, 0xFF55, 0xFF56, 0xFF57, 0xFF58, 0xFF59, 0xFF5A*/
1967 };
1968
1969
1970 // WARNING: This is used by the core flash code. Any change to this utility, or the tables
1971 // it relies on, will break legacy Flash content.
1972 /*static*/ uint32_t String::unicharToLower(uint32_t ch)
1973 {
1974 if (ch < 0xFF)
1975 return tolower_map[ch] ^ ch;
1976
1977 // offset x1C60
1978 if( ch>=0x10A0 && ch<=0x10C5 ) // Georgian
1979 {
1980 return (ch + 48); // WRONG
1981 //return (ch + 0x1C60); // CORRECT
1982 }
1983
1984 // offset 80
1985 if ( (ch>=0x0400 && ch<=0x040F) )
1986 {
1987 return (ch + 0x50);
1988 }
1989
1990 // offset 48
1991 if ( (ch>=0x0531 && ch<=0x0556) )
1992 {
1993 return (ch + 0x30);
1994 }
1995
1996 // offset 32
1997 if ( (ch>=0x0391 && ch<=0x03AB) ||
1998 (ch>=0x0410 && ch<=0x042F) ||
1999 (ch>=0xFF21 && ch<=0xFF3A) )
2000 {
2001 return (ch + 0x20);
2002 }
2003
2004 // offset 26
2005 if ( (ch>=0x24B6 && ch<=0x24CF) )
2006 {
2007 return (ch + 0x1A);
2008 }
2009
2010 // offset 16
2011 if ( (ch>=0x2160 && ch<=0x216F) )
2012 {
2013 return (ch + 0x10);
2014 }
2015
2016 // offset -8(negative)
2017 if ( (ch>=0x1F08 && ch<=0X1F0F) ||
2018 (ch>=0x1F18 && ch<=0x1F1D) ||
2019 (ch>=0x1F28 && ch<=0x1F2F) ||
2020 (ch>=0x1F38 && ch<=0x1F3F) )
2021 {
2022 return (ch - 0x8);
2023 }
2024
2025 // offset +1
2026 if ( (ch>=0x0100 && ch<=0x0232) )
2027 {
2028 // only even numbers
2029 if ( ((ch<=0x012E) && !(ch&0x1)) ||
2030 ((ch>=0x0139 && ch<=0x0147) && (ch&0x1)) ||
2031 ((ch>=0x014A && ch<=0x0176) && !(ch&1)) ||
2032 ((ch>=0x0200 && ch<=0x0232) && !(ch&1) && (ch!=0x0220)) )
2033 {
2034 return (ch + 1);
2035 }
2036 }
2037
2038 if ( !(ch&1) ) // only looking for even chars
2039 {
2040 if ( (ch>=0x03D8 && ch<=0x03EE) )
2041 return (ch + 1);
2042 if ( ((ch>=0x0460 && ch<=0x04BE) && !(ch==0x0482 || ch==0x0484 || ch==0x486 || ch==0x0488)) ||
2043 (ch>=0x04D0 && ch<=0x04F8) )
2044 {
2045 if (ch!=0x0482 && ch!=0x0484 && ch!=0x486 && ch!=0x0488)
2046 return (ch + 1);
2047 }
2048 if ( (ch>=0x1E00 && ch<=0x1E94) || (ch>=0x1EA0 && ch<=0x1EF8) )
2049 return (ch + 1);
2050 }
2051
2052
2053 AvmAssert(sizeof(upperCaseBase) == sizeof(lowerCaseConversion))do { } while (0);
2054
2055 uint32_t result = ch;
2056 // Do a binary search in upperCaseBase for char
2057 int32_t lo = 0;
2058 int32_t hi = (sizeof(upperCaseBase) / sizeof(upperCaseBase[0])) - 1;
2059
2060 while (lo <= hi)
2061 {
2062 int32_t pivot = (lo+hi)>>1;
2063 uint32_t testChar = upperCaseBase[pivot];
2064
2065 if (ch == testChar)
2066 {
2067 // Use that index into lowerCaseConversion for a return value
2068 result = lowerCaseConversion[pivot];
2069 break;
2070 }
2071 else if (ch < testChar)
2072 {
2073 hi = pivot-1;
2074 }
2075 else
2076 {
2077 lo = pivot+1;
2078 }
2079 }
2080
2081 return result;
2082 }
2083
2084 Stringp String::toUpperCase()
2085 {
2086 return caseChange(unicharToUpper);
2087 }
2088
2089 Stringp String::toLowerCase()
2090 {
2091 return caseChange(unicharToLower);
2092 }
2093
2094 Stringp String::caseChange(uint32_t(*unimapper)(uint32_t))
2095 {
2096 if (!this->length())
2097 return this;
2098
2099 // Flag to detect whether any changes were made
2100 boolbool changed = falsefalse;
2101
2102 // 0xFF is a special case: ToUpper(0xFF) == 0x178, so we need a wider string
2103 // if the string contains 0xFF
2104 Width w = getWidth();
2105 if (w == k8 && VMPI_memchr::memchr(Pointers(this).p8, 0xFF, m_length) != 0)
2106 w = k16;
2107
2108 GC* gc = _gc(this);
2109 const boolbool is7bit = falsefalse;
2110 Stringp newStr = createDynamic(gc, NULL__null, m_length, w, is7bit);
2111
2112 Pointers src(this); // can't re-use the Pointers from VMPI_memchr; createDynamic may have invalidated it
2113 Pointers dst(newStr);
2114
2115 int32_t i;
2116 uint32_t ch1, ch2;
2117 if (getWidth() == k8)
2118 {
2119 for (i = 0; i < m_length; i++)
2120 {
2121 ch1 = uint32_t(*src.p8++);
2122 ch2 = unimapper (ch1);
2123 if (w == k16)
2124 *dst.p16++ = wchar(ch2);
2125 else
2126 *dst.p8++ = uint8_t(ch2);
2127 if (ch1 != ch2)
2128 changed = truetrue;
2129 }
2130 }
2131 else
2132 {
2133 for (i = 0; i < m_length; i++)
2134 {
2135 ch1 = uint32_t(*src.p16++);
2136 ch2 = unimapper(ch1);
2137 *dst.p16++ = wchar(ch2);
2138 if (ch1 != ch2)
2139 changed = truetrue;
2140 }
2141 }
2142 VERIFY_7BIT(this)do { } while (0);
2143 VERIFY_7BIT(newStr)do { } while (0);
2144 return changed ? newStr : this;
2145 }
2146
2147 double String::toNumber()
2148 {
2149 return MathUtils::convertStringToNumber(this);
2150 }
2151
2152 boolbool String::isWhitespace() const
2153 {
2154 StringIndexer self((Stringp) this);
2155 for (int32_t i = 0 ; i < length(); i++)
2156 {
2157 wchar ch = self[i];
2158 if (!isSpace(ch))
2159 return falsefalse;
2160 }
2161 return truetrue;
2162 }
2163
2164 /*static*/ int32_t FASTCALL__attribute__((fastcall)) String::Length(const wchar* str)
2165 {
2166 AvmAssert(str != NULL)do { } while (0);
2167
2168 const wchar* s = str;
2169 while (*s)
2170 s++;
2171 ptrdiff_t len = s - str;
2172 if (len <= 0x7FFFFFFF)
2173 return int32_t(len);
2174 GCHeap::SignalObjectTooLarge();
2175 /*NOTREACHED*/
2176 return 0;
2177 }
2178
2179 /*static*/ int32_t FASTCALL__attribute__((fastcall)) String::Length(const char* str)
2180 {
2181 AvmAssert(str != NULL)do { } while (0);
2182
2183 size_t len = VMPI_strlen::strlen(str);
2184 if (len <= 0x7FFFFFFF)
2185 return int32_t(len);
2186 GCHeap::SignalObjectTooLarge();
2187 /*NOTREACHED*/
2188 return 0;
2189 }
2190
2191 int String::_indexOf(Stringp substr, int startPos)
2192 {
2193 return (int) indexOf(substr, (int32_t) startPos);
2194 }
2195
2196 // Coded as a macro for best performance with MSVC 2008 32-bit
2197 // -1 (or negative infinity) - position equals -1 (out of bounds)
2198 // >length (or positive infinity) - position equals length
2199 // non-NaN - integer cast to determine position
2200 // NaN - use NaNValue (usually zero but length for lastIndexOf)
2201 #define INTCLAMP(iPos, dPos, length, NaNValue){ if (dPos <= -1) iPos = -1; else if (dPos >= length) iPos
= length; else if (!MathUtils::isNaNInline(dPos)) { iPos = (
int32_t) dPos; } else { iPos = NaNValue; } }
\
2202 { \
2203 if (dPos <= -1) \
2204 iPos = -1; \
2205 else if (dPos >= length) \
2206 iPos = length; \
2207 else if (!MathUtils::isNaNInline(dPos)) { \
2208 iPos = (int32_t) dPos; \
2209 } \
2210 else { \
2211 iPos = NaNValue; \
2212 } \
2213 }
2214
2215 int32_t String::AS3_indexOf(Stringp substr, double dStartPos)
2216 {
2217 int32_t iPos;
2218 INTCLAMP(iPos, dStartPos, this->length(), 0){ if (dStartPos <= -1) iPos = -1; else if (dStartPos >=
this->length()) iPos = this->length(); else if (!MathUtils
::isNaNInline(dStartPos)) { iPos = (int32_t) dStartPos; } else
{ iPos = 0; } }
;
2219 return indexOf(substr, iPos);
2220 }
2221
2222 int String::_lastIndexOf(Stringp substr, int iStartPos)
2223 {
2224 return (int) lastIndexOf(substr, (int32_t) iStartPos);
2225 }
2226
2227 int32_t String::AS3_lastIndexOf(Stringp substr, double dStartPos)
2228 {
2229 // unlike most other calls, this one has nan->length rather than nan->0
2230 int32_t iStartPos;
2231 INTCLAMP(iStartPos, dStartPos, this->length(), this->length()){ if (dStartPos <= -1) iStartPos = -1; else if (dStartPos >=
this->length()) iStartPos = this->length(); else if (!
MathUtils::isNaNInline(dStartPos)) { iStartPos = (int32_t) dStartPos
; } else { iStartPos = this->length(); } }
;
2232 return lastIndexOf(substr, iStartPos);
2233 }
2234
2235 Stringp String::_substr(int start, int count)
2236 {
2237 int len = this->length();
2238 start = (int)NativeObjectHelpers::ClampIndexInt(start, len);
2239 // ClampIndex takes a double (not int or uint) for first parm...
2240 // we must cast these to double before addition, otherwise we
2241 // can have numeric overflow with the default arg (end=0x7fffffff)
2242 // and wrap to negative, which would be bad...
2243
2244 // Do some sanity checks on our ints to see if they will fall within a valid integer range
2245 // !!@what about negative values?
2246 int end;
2247 if (count == 0x7fffffff) // largest positive int value, also the default arg
2248 {
2249 end = len;
2250 }
2251 else if ((count > 0x3fffffff) || (start > 0x3fffffff)) // might overflow - use doubles
2252 {
2253 end = (int)NativeObjectHelpers::ClampIndex(double(count) + double(start), len);
2254 }
2255 else
2256 {
2257 end = (int)NativeObjectHelpers::ClampIndexInt(count + start, len);
2258 }
2259
2260 if (end < start)
2261 end = start;
2262
2263 return substr(start, end-start);
2264 }
2265
2266 Stringp String::AS3_substr(double d_start, double d_count)
2267 {
2268 int32_t len = this->length();
2269 int32_t start = (int32_t) NativeObjectHelpers::ClampIndex(MathUtils::toInt(d_start), len);
2270 // ClampIndex takes a double (not int or uint) for first parm...
2271 // we must cast these to double before addition, otherwise we
2272 // can have numeric overflow with the default arg (end=0x7fffffff)
2273 // and wrap to negative, which would be bad...
2274 int32_t end = (int32_t)NativeObjectHelpers::ClampIndex(MathUtils::toInt(d_count) + (double)start, len);
2275 if (end < start)
2276 end = start;
2277
2278 return substr(start, end-start);
2279 }
2280
2281 Stringp String::_substring(int start, int end)
2282 {
2283 NativeObjectHelpers::ClampBInt(start, end, this->length());
2284 return substring((int32_t) start, (int32_t) end);
2285 }
2286
2287 Stringp String::AS3_substring(double d_start, double d_end)
2288 {
2289 double start = MathUtils::toInt(d_start);
2290 double end = MathUtils::toInt(d_end);
2291 NativeObjectHelpers::ClampB(start, end, length());
2292 return substring((int32_t) start, (int32_t) end);
2293 }
2294
2295 Stringp String::_slice(int start, int end)
2296 {
2297 return slice((int32_t) start, (int32_t) end);
2298 }
2299
2300 Stringp String::AS3_slice(double d_start, double d_end)
2301 {
2302 int32_t len = this->length();
2303 int32_t start = (int32_t)NativeObjectHelpers::ClampIndex(MathUtils::toInt(d_start), len);
2304 int32_t end = (int32_t)NativeObjectHelpers::ClampIndex(MathUtils::toInt(d_end), len);
2305 if (end < start)
2306 end = start;
2307
2308 return substring(start, end);
2309 }
2310
2311 Stringp String::_charAtI(int32_t iPos)
2312 {
2313 AvmCore* core = _core(this);
2314 // use unsigned compare as faster equivalent to >= 0 && < m_length
2315 if (uint32_t(iPos) < uint32_t(m_length)) {
2316 Pointers ptrs(this);
2317 wchar const ch = (wchar) (getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos];
2318
2319 // newStringUTF16 does the cachedChar optimization internally, but short-circuiting the test
2320 // here is worthwhile because it's a pretty common case
2321 if (ch < 128)
2322 return core->cachedChars[ch];
2323 return core->newStringUTF16(&ch, 1);
2324 }
2325 else {
2326 return core->kEmptyString;
2327 }
2328 }
2329
2330 Stringp String::_charAtU(uint32_t iPos)
2331 {
2332 AvmCore* core = _core(this);
2333 if (iPos < uint32_t(m_length)) {
2334 Pointers ptrs(this);
2335 wchar const ch = (wchar) (getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos];
2336
2337 // newStringUTF16 does the cachedChar optimization internally, but short-circuiting the test
2338 // here is worthwhile because it's a pretty common case
2339 if (ch < 128)
2340 return core->cachedChars[ch];
2341 return core->newStringUTF16(&ch, 1);
2342 }
2343 else {
2344 return core->kEmptyString;
2345 }
2346 }
2347
2348 Stringp String::AS3_charAt(double dPos)
2349 {
2350 int32_t iPos;
2351 INTCLAMP(iPos, dPos, this->length(), 0){ if (dPos <= -1) iPos = -1; else if (dPos >= this->
length()) iPos = this->length(); else if (!MathUtils::isNaNInline
(dPos)) { iPos = (int32_t) dPos; } else { iPos = 0; } }
;
2352
2353 AvmCore* core = _core(this);
2354 // use unsigned compare as faster equivalent to >= 0 && < m_length
2355 if (uint32_t(iPos) < uint32_t(m_length)) {
2356 Pointers ptrs(this);
2357 wchar const ch = (wchar) (getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos];
2358
2359 // newStringUTF16 does the cachedChar optimization internally, but short-circuiting the test
2360 // here is worthwhile because it's a pretty common case
2361 if (ch < 128)
2362 return core->cachedChars[ch];
2363 return core->newStringUTF16(&ch, 1);
2364 }
2365 else {
2366 return core->kEmptyString;
2367 }
2368 }
2369
2370 Stringp String::AS3_toUpperCase()
2371 {
2372 return toUpperCase();
2373 }
2374
2375 Stringp String::AS3_toLowerCase()
2376 {
2377 return toLowerCase();
2378 }
2379
2380 double String::_charCodeAtFI(int32_t iPos)
2381 {
2382 double d;
2383 // use unsigned compare as faster equivalent to >= 0 && < m_length
2384 if (uint32_t(iPos) < uint32_t(m_length)) {
2385 Pointers ptrs(this);
2386 d = (double) ((getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos]);
2387 }
2388 else
2389 d = MathUtils::kNaN;
2390 return d;
2391 }
2392
2393 int32_t String::_charCodeAtII(int32_t iPos)
2394 {
2395 // use unsigned compare as faster equivalent to >= 0 && < m_length
2396 if (uint32_t(iPos) < uint32_t(m_length)) {
2397 Pointers ptrs(this);
2398 return (getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos];
2399 }
2400 else
2401 return 0;
2402 }
2403
2404 double String::_charCodeAtFU(uint32_t iPos)
2405 {
2406 double d;
2407 if (iPos < uint32_t(m_length)) {
2408 Pointers ptrs(this);
2409 d = (double) ((getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos]);
2410 }
2411 else
2412 d = MathUtils::kNaN;
2413 return d;
2414 }
2415
2416 int32_t String::_charCodeAtIU(uint32_t iPos)
2417 {
2418 if (iPos < uint32_t(m_length)) {
2419 Pointers ptrs(this);
2420 return (getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos];
2421 }
2422 else
2423 return 0;
2424 }
2425
2426 double String::AS3_charCodeAt(double dPos)
2427 {
2428 int32_t iPos;
2429 INTCLAMP(iPos, dPos, m_length, 0){ if (dPos <= -1) iPos = -1; else if (dPos >= m_length)
iPos = m_length; else if (!MathUtils::isNaNInline(dPos)) { iPos
= (int32_t) dPos; } else { iPos = 0; } }
;
2430 // use unsigned compare as faster equivalent to >= 0 && < m_length
2431 if (uint32_t(iPos) < uint32_t(m_length)) {
2432 Pointers ptrs(this);
2433 return (double) ((getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos]);
2434 }
2435 else
2436 return MathUtils::kNaN;
2437 }
2438
2439 int32_t String::_charCodeAtIF(double dPos)
2440 {
2441 int32_t iPos;
2442 INTCLAMP(iPos, dPos, m_length, 0){ if (dPos <= -1) iPos = -1; else if (dPos >= m_length)
iPos = m_length; else if (!MathUtils::isNaNInline(dPos)) { iPos
= (int32_t) dPos; } else { iPos = 0; } }
;
2443 // use unsigned compare as faster equivalent to >= 0 && < m_length
2444 if (uint32_t(iPos) < uint32_t(m_length)) {
2445 Pointers ptrs(this);
2446 return (getWidth() == k8) ? ptrs.p8[iPos] : ptrs.p16[iPos];
2447 }
2448 else
2449 return 0;
2450 }
2451
2452 int32_t String::AS3_localeCompare(Atom other)
2453 {
2454 Stringp otherStr;
2455
2456 if (AvmCore::isString(other))
2457 {
2458 otherStr = AvmCore::atomToString(other);
2459 }
2460 else
2461 {
2462 AvmCore* core = _core(this);
2463
2464 // legacy behavior special-cases NULL (including undefined).
2465 if (AvmCore::isNullOrUndefined(other))
2466 {
2467 if (!core->currentBugCompatibility()->bugzilla585791)
2468 return (m_length == 0) ? 1 : 0;
2469 }
2470
2471 // Proper ECMAScript behavior:
2472 // The 'this' object is converted to string, call it a.
2473 // The argument is converted to string, call it b.
2474 // Those strings are compared and a string comparison value is returned.
2475 otherStr = core->string(other);
2476 }
2477
2478 return otherStr->Compare(*this);
2479 }
2480
2481#ifdef DEBUGGER
2482 uint64_t String::bytesUsed() const
2483 {
2484 uint64_t bytesUsed = sizeof(String);
2485
2486 // If getType() == kDependent, the buffer's memory is account for by
2487 // the string upon which this string depends, so we should not include
2488 // it here.
2489 //
2490 // If getType() == kStatic, the buffer's memory is not on the GC heap,
2491 // and freeing the string would not free the buffer, so we should not
2492 // include its size here.
2493 if (getType() == kDynamic)
2494 bytesUsed += GC::Size(m_buffer.pv);
2495
2496 return bytesUsed;
2497 }
2498
2499 Stringp String::getMasterString() const
2500 {
2501 if (isDependent())
2502 return m_extra.master;
2503 else
2504 return NULL__null;
2505 }
2506#endif
2507
2508////////////////////////////// Helpers: Width Analysis /////////////////////////////////
2509
2510 // The character widths structure used by the _analyze() functions below
2511
2512 struct StringWidths
2513 {
2514 int32_t ascii, w8, w16, w32;
2515 };
2516
2517 // Analyze a UTF-8 string buffer and find out about the character widths.
2518 // Return false if the string contains malformed or too big sequences,
2519 // and strict mode is requested.
2520
2521 // RFC 2279
2522
2523 // 0000 0000-0000 007F 0xxxxxxx
2524 // 0000 0080-0000 07FF 110xxxxx 10xxxxxx
2525 // 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
2526
2527 // 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2528 // 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2529 // 0400 0000-7FFF FFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2530
2531 static boolbool _analyzeUtf8(const utf8_t* in, int32_t inLen, StringWidths& r, boolbool strict)
2532 {
2533 r.ascii = r.w8 = r.w16 = r.w32 = 0;
2534 while (inLen > 0)
2535 {
2536 uint32_t c = uint32_t (*in);
2537
2538 switch (c >> 4)
2539 {
2540 case 0: case 1: case 2: case 3: case 4: case 5: case 6: case 7:
2541 r.w8++;
2542 r.ascii++;
2543 in++;
2544 inLen--;
2545 break;
2546
2547 case 12: case 13:
2548 // 110xxxxx 10xxxxxx
2549 if (inLen < 2) {
2550 // Invalid
2551 goto invalid;
2552 }
2553 if ((in[1]&0xC0) != 0x80) {
2554 // Invalid
2555 goto invalid;
2556 }
2557 c = (((c<<6) & 0x7C0) | (in[1] & 0x3F));
2558 if (c < 0x80) {
2559 // Overlong sequence, reject as invalid.
2560 goto invalid;
2561 }
2562 in += 2;
2563 inLen -= 2;
2564 if (c < 0xFF)
2565 r.w8++;
2566 else
2567 r.w16++;
2568 break;
2569
2570 case 14:
2571 // 1110xxxx 10xxxxxx 10xxxxxx
2572 if (inLen < 3) {
2573 // Invalid
2574 goto invalid;
2575 }
2576 if ((in[1]&0xC0) != 0x80 || (in[2]&0xC0) != 0x80) {
2577 // Invalid
2578 goto invalid;
2579 }
2580 c = (((c<<12) & 0xF000) | ((in[1]<<6) & 0xFC0) | (in[2] & 0x3F));
2581 if (c < 0x800) {
2582 // Overlong sequence, reject as invalid.
2583 goto invalid;
2584 }
2585 in += 3;
2586 inLen -= 3;
2587 r.w16++;
2588 break;
2589
2590 case 15:
2591 // 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2592 // 111110xx ... is always invalid
2593 // 1111110x ... is always invalid
2594 // note: when 'strict' is false, we need to mimic the behavior of FP9/FP10,
2595 // which did not do the c&8 test. so skip it for bug-compatibility.
2596 if ((strict && (c & 0x08)) || (inLen < 4)) {
2597 // Invalid
2598 goto invalid;
2599 }
2600 if ((in[1]&0xC0) != 0x80 ||
2601 (in[2]&0xC0) != 0x80 ||
2602 (in[3]&0xC0) != 0x80)
2603 {
2604 goto invalid;
2605 }
2606
2607 c = (((c<<18) & 0x1C0000) |
2608 ((in[1]<<12) & 0x3F000) |
2609 ((in[2]<<6) & 0xFC0) |
2610 (in[3] & 0x3F));
2611 if (c < 0x10000) {
2612 // Overlong sequence, reject as invalid.
2613 goto invalid;
2614 }
2615 in += 4;
2616 inLen -= 4;
2617 r.w32++;
2618 break;
2619
2620 default:
2621 invalid:
2622 // if non-strict, simply treat as character
2623 if (strict)
2624 return falsefalse;
2625 else
2626 {
2627 r.w8++;
2628 in++;
2629 inLen--;
2630 }
2631 }
2632 }
2633 return truetrue;
2634 }
2635
2636 // Analyze a UTF-16 string buffer and find out about the character widths.
2637 // chars >= 0x10000 are encoded as 0xD800 + (upper 10 bits) and 0xDC00 + (lower 10 bits)
2638 // Returns false in strict mode if a starting surrogate pair character is not followed
2639 // by a correct 2nd pair character
2640
2641 static boolbool _analyzeUtf16(const wchar* p, int32_t len, StringWidths& r, boolbool strict)
2642 {
2643 r.ascii = r.w8 = r.w16 = r.w32 = 0;
2644 while (len-- > 0)
2645 {
2646 wchar ch = *p++;
2647 if (ch <= 0x7F)
2648 r.ascii++;
2649 if (ch <= 0xFF)
2650 r.w8++;
2651 else if (ch < 0xD800 || ch > 0xDBFF)
2652 r.w16++;
2653 else
2654 {
2655 if (len == 0 || *p < 0xDC00 || *p > 0xDFFF)
2656 {
2657 if (strict)
2658 return falsefalse;
2659 // found one or two characters depending on whether the 2nd surrpgate character was present
2660 r.w16 += len ? 2 : 1;
2661 }
2662 else
2663 r.w32++;
2664 p++; len--;
2665 }
2666 }
2667 return truetrue;
2668 }
2669
2670 // Create a string out of an UTF-8 buffer.
2671 Stringp String::createUTF8
2672 (AvmCore* core, const utf8_t* buffer, int32_t len, Width desiredWidth, boolbool staticBuf, boolbool strict)
2673 {
2674 if (buffer == NULL__null)
2675 {
2676 len = 0;
2677 buffer = &k_zero.u8;
2678 staticBuf = truetrue;
2679 }
2680 if (len < 0)
2681 len = Length((const char*)buffer);
2682
2683 // determine the string width to use
2684 StringWidths widths;
2685 if (!_analyzeUtf8(buffer, len, widths, strict))
2686 {
2687 // TODO: bad character sequence error
2688 AvmAssert(!"bad UTF8 character sequence")do { } while (0);
2689 return NULL__null;
2690 }
2691
2692 if (desiredWidth == kAuto)
2693 {
2694 if (widths.w32 != 0 || widths.w16 != 0)
2695 desiredWidth = String::k16;
2696 else
2697 desiredWidth = String::k8;
2698 }
2699
2700 if (desiredWidth == String::k8 && core->kEmptyString != NULL__null)
2701 {
2702 // core has been initialized, check for cached characters
2703 if (len == 0)
2704 return core->kEmptyString;
2705
2706 if (len == 1 && *buffer < 128)
2707 return core->cachedChars[*buffer];
2708 }
2709
2710 const boolbool is7bit = (widths.ascii == widths.w8) && !widths.w16 && !widths.w32;
2711
2712 Stringp s = NULL__null;
2713 GC* gc = core->GetGC();
2714 if (desiredWidth == k8)
2715 {
2716 if (widths.w16 != 0 || widths.w32 != 0)
2717 // cannot do 8-bit string with this data
2718 // TODO: string-too-wide error
2719 s = NULL__null;
2720 else if (staticBuf && is7bit)
2721 {
2722 // works, because we only have 7-bit ASCII
2723 s = createStatic(gc, buffer, widths.w8, String::k8, is7bit);
2724 }
2725 else
2726 {
2727 s = createDynamic(gc, buffer, widths.w8, String::k8, is7bit);
2728 uint32_t uch;
2729 int32_t bytesRead;
2730 Pointers dst(s); // NB, we assume that Utf8ToUcs4 doesn't allocate memory!
2731 boolbool bugzilla609416 = core->currentBugCompatibility()->bugzilla609416;
2732 while (len > 0)
2733 {
2734 if (*((int8_t*) buffer) > 0)
2735 {
2736 // ASCII
2737 *dst.p8++ = char (*buffer++);
2738 len--;
2739 }
2740 else
2741 {
2742 bytesRead = UnicodeUtils::Utf8ToUcs4(buffer, len, &uch, bugzilla609416);
2743 if (bytesRead == 0)
2744 {
2745 // invalid sequence (only if strict was false)
2746 *dst.p8++ = char (*buffer++);
2747 len--;
2748 }
2749 else
2750 {
2751 buffer += bytesRead;
2752 len -= bytesRead;
2753 *dst.p8++ = char (uch);
2754 }
2755 }
2756 }
2757 }
2758 }
2759 else
2760 {
2761 // surrogate pairs need 2 characters
2762 s = createDynamic(gc, NULL__null, int32AddChecked(int32AddChecked(widths.w8, widths.w16), int32AddChecked(widths.w32, widths.w32)), String::k16, is7bit);
2763 int32_t const convertedlen = UnicodeUtils::Utf8ToUtf16(buffer, len, s->m_buffer.p16, s->m_length, strict);
2764 // if this assert fires, _analyzeUtf8 didn't agree with UnicodeUtils::Utf8ToUtf16, which is bad.
2765 AvmAssert(convertedlen == s->m_length)do { } while (0);
2766 if (convertedlen < 0)
2767 return NULL__null;
2768 }
2769 VERIFY_7BIT(s)do { } while (0);
2770 return s;
2771 }
2772
2773 // Create a string out of an UTF-16 buffer.
2774 Stringp String::createUTF16
2775 (AvmCore* core, const wchar* buffer, int32_t len, Width desiredWidth, boolbool staticBuf, boolbool strict)
2776 {
2777 if (buffer == NULL__null)
2778 {
2779 len = 0;
2780 buffer = &k_zero.u16;
2781 staticBuf = truetrue;
2782 }
2783 if (len < 0)
2784 len = Length(buffer);
2785
2786 boolbool is7bit = falsefalse;
2787 int32_t stringLength = len;
2788 if (desiredWidth != k16)
2789 {
2790 // determine the string width to use
2791 StringWidths widths;
2792 if (!_analyzeUtf16(buffer, len, widths, strict))
2793 {
2794 // TODO: bad character sequence error
2795 return NULL__null;
2796 }
2797
2798 if (desiredWidth == kAuto)
2799 {
2800 if (widths.w32 != 0 || widths.w16 != 0)
2801 desiredWidth = k16;
2802 else
2803 desiredWidth = k8;
2804 }
2805 else if (desiredWidth == k8 && (widths.w16 != 0 || widths.w32 != 0))
2806 // cannot do 8-bit string with this data
2807 // TODO: string-too-wide error
2808 return NULL__null;
2809
2810 is7bit = (widths.ascii == widths.w8) && !widths.w16 && !widths.w32;
2811 }
2812
2813 if (desiredWidth == k8 && core->kEmptyString != NULL__null)
2814 {
2815 // core has been initialized, check for cached characters
2816 if (len == 0)
2817 return core->kEmptyString;
2818
2819 if (len == 1 && *buffer < 128)
2820 return core->cachedChars[*buffer];
2821 }
2822
2823 if (desiredWidth == k16 && staticBuf)
2824 {
2825 return createStatic(core->GetGC(), buffer, len, k16, is7bit);
2826 }
2827
2828 // found the width to use, now create that string
2829 Stringp s = createDynamic(core->GetGC(), NULL__null, stringLength, desiredWidth, is7bit);
2830
2831 String::Pointers ptrs(s);
2832 if (desiredWidth == k8)
2833 {
2834 while (len-- > 0)
2835 *ptrs.p8++ = (char) *buffer++;
2836 }
2837 else
2838 {
2839 VMPI_memcpy::memcpy(ptrs.pv, buffer, len << desiredWidth);
2840 }
2841 VERIFY_7BIT(s)do { } while (0);
2842 return s;
2843 }
2844
2845 PrintWriter& String::print(PrintWriter& prw) const
2846 {
2847 if (getWidth() == k8 && (m_bitsAndFlags & TSTR_7BIT_FLAG))
2848 {
2849 // fast form
2850 Pointers ptrs(this);
2851 prw.writeN( (const char*)ptrs.p8, m_length );
2852 }
2853 else
2854 {
2855 for (int32_t i=0, n=length(); i<n; i++)
2856 {
2857 prw << (wchar)charAt(i);
2858 }
2859 }
2860 return prw;
2861 }
2862
2863 ///////////////////////////////////////////////////////////////////////////////////
2864
2865 // ---------------------------------------------------------------------------
2866
2867 // 0000 0000-0000 007F 0xxxxxxx
2868 // 0000 0080-0000 07FF 110xxxxx 10xxxxxx
2869 // 0000 0800-0000 FFFF 1110xxxx 10xxxxxx 10xxxxxx
2870
2871 // 0001 0000-001F FFFF 11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
2872 // 0020 0000-03FF FFFF 111110xx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2873 // 0400 0000-7FFF FFFF 1111110x 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx 10xxxxxx
2874
2875 StUTF8String::StUTF8String(Stringp str)
2876 {
2877 if (!str || !str->length())
2878 {
2879 m_buffer = &k_zero.c8;
2880 m_length = 0;
2881 return;
2882 }
2883
2884 VERIFY_7BIT(str)do { } while (0);
2885
2886 int32_t len = 0;
2887
2888 MMgc::GC* gc = _gc(str);
2889 if (str->getWidth() == String::k8)
2890 {
2891 len = str->m_length;
2892
2893 // if we know it's 7-bit, we don't need to walk it.
2894 if (!(str->m_bitsAndFlags & String::TSTR_7BIT_FLAG))
2895 {
2896 const uint8_t* srcBuf = String::Pointers(str).p8;
2897 int32_t auxchars = 0;
2898 for (int32_t i = str->m_length; i--;)
2899 auxchars += (*srcBuf++ > 127);
2900
2901 len = int32AddChecked(len, auxchars);
2902
2903 // no hi bits? set the bit!
2904 if (len == str->m_length)
2905 str->m_bitsAndFlags |= String::TSTR_7BIT_FLAG;
2906 }
2907
2908 // Deliberately using gc'ed memory here (not mmfx, unmanaged memory)
2909 // so that longjmp's past our dtor won't cause a long-term leak
2910 char* dstBuf = (char*)gc->Alloc(uint32_t(len)+1, 0);
2911 // can't re-use the Pointers from 7BIT above; gc->Alloc may have invalidated it
2912 const uint8_t* srcBuf = String::Pointers(str).p8;
2913 m_buffer = dstBuf;
2914 m_length = len;
2915
2916 if (len == str->m_length)
2917 {
2918 VMPI_memcpy::memcpy(dstBuf, srcBuf, len);
2919 dstBuf[len] = 0;
2920 }
2921 else
2922 {
2923 for (int32_t i = str->m_length; i--;)
2924 {
2925 wchar ch = wchar(*srcBuf++);
2926 if (ch >= 128)
2927 {
2928 *dstBuf++ = char(0xC0 + ((ch >> 6) & 0x3));
2929 ch = 0x80 + (ch & 0x3F);
2930 // fall thru
2931 }
2932 *dstBuf++ = char(ch);
2933 }
2934 AvmAssert(dstBuf - m_buffer == len)do { } while (0);
2935 *dstBuf = 0;
2936 }
2937 }
2938 else
2939 {
2940 len = UnicodeUtils::Utf16ToUtf8(String::Pointers(str).p16, str->length(), NULL__null, 0);
2941 if (len < 0)
2942 len = 0;
2943 char* dstBuf = (char*) gc->Alloc(uint32_t(len)+1, 0);
2944 m_buffer = dstBuf;
2945 m_length = len;
2946 dstBuf[len] = 0;
2947 // can't re-use the Pointers from Utf16ToUtf8 above; gc->Alloc may have invalidated it
2948 UnicodeUtils::Utf16ToUtf8(String::Pointers(str).p16, str->length(), (uint8_t*) dstBuf, len);
2949 }
2950 }
2951
2952 StUTF8String::~StUTF8String()
2953 {
2954 if (m_buffer != &k_zero.c8) {
2955 // StrUTF8String is only ever stack allocated so it's OK just to free m_buffer without NULLing it first.
2956 GC::GetGC(m_buffer)->Free(m_buffer);
2957 }
2958 }
2959
2960 StUTF16String::StUTF16String(Stringp str)
2961 {
2962 if (!str || !str->length())
2963 {
2964 m_length = 0;
2965 m_buffer = &k_zero.u16;
2966 return;
2967 }
2968
2969 VERIFY_7BIT(str)do { } while (0);
2970
2971 MMgc::GC* gc = _gc(str);
2972
2973 m_length = str->m_length;
2974 // Deliberately using gc'ed memory here (not mmfx, unmanaged memory)
2975 // so that longjmp's past our dtor won't cause a long-term leak
2976 wchar* dst = (wchar*) gc->Alloc(uint32ShlChecked(uint32_t(m_length)+1, String::k16), 0);
2977 m_buffer = dst;
2978 dst[m_length] = 0;
2979 String::Pointers ptrs(str);
2980 _copyBuffers(ptrs.pv, dst, m_length, str->getWidth(), String::k16);
2981 }
2982
2983 StUTF16String::~StUTF16String()
2984 {
2985 if (m_buffer != &k_zero.u16) {
2986 // StrUTF16String is only ever stack allocated so it's OK just to free m_buffer without NULLing it first.
2987 GC::GetGC(m_buffer)->Free(m_buffer);
2988 }
2989 }
2990
2991 // The following table is the length of an UTF-8 sequence, indexed by the
2992 // first character masked with 0x3F. It is assumed that the data is good
2993 // UTF-8, which is true, since we just created it :) The table is only
2994 // used for characters >= 0x80.
2995 // 0000 0080-0000 07FF 000xxxxx
2996 // 0000 0800-0000 FFFF 0010xxxx
2997 // 0001 0000-001F FFFF 00110xxx
2998 // 0020 0000-03FF FFFF 001110xx
2999 // 0400 0000-7FFF FFFF 0011110x
3000
3001 static const uint8_t utf8Lengths[64] =
3002 {
3003 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3004 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
3005 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
3006 4,4,4,4,4,4,4,4,5,5,5,5,6,6,6,6
3007 };
3008
3009 StIndexableUTF8String::StIndexableUTF8String(Stringp s) :
3010 StUTF8String(s),
3011 m_lastPos(0),
3012 m_lastUtf8Pos(0)
3013 {
3014 m_indexable = (s->length() == this->length()); // can't init in init-list, generates warning :-(
3015 }
3016
3017 int32_t StIndexableUTF8String::toUtf8Index(int32_t pos)
3018 {
3019 if (pos <= 0 || pos >= length() || m_indexable)
3020 return pos;
3021
3022 // optimization: these two members kick in if this method
3023 // is called for positions in ascending order (which pcre usually does)
3024 if (pos < m_lastPos)
3025 m_lastPos = m_lastUtf8Pos = 0;
3026
3027 int32_t utf8Pos = m_lastUtf8Pos;
3028
3029 const utf8_t* p = (const utf8_t*) this->c_str() + m_lastUtf8Pos;
3030 for (int32_t i = m_lastPos; i < pos; i++)
3031 {
3032 utf8_t ch = *p;
3033 if (ch < 128)
3034 p++, utf8Pos++; // ASCII
3035 else
3036 {
3037 int32_t len = utf8Lengths[ch & 0x3F];
3038 p += len;
3039 utf8Pos += len;
3040 }
3041 }
3042 m_lastPos = pos;
3043 m_lastUtf8Pos = utf8Pos;
3044 return utf8Pos;
3045 }
3046
3047 int32_t StIndexableUTF8String::toIndex(int32_t utf8Pos)
3048 {
3049 if (utf8Pos <= 0 || m_indexable)
3050 return utf8Pos;
3051
3052 // optimization: these two members kick in if this method
3053 // is called for positions in ascending order (which pcre usually does)
3054 if (utf8Pos < m_lastUtf8Pos)
3055 m_lastPos = m_lastUtf8Pos = 0;
3056
3057 int32_t i = m_lastUtf8Pos;
3058 int32_t pos = m_lastPos;
3059
3060 const utf8_t* p = (const utf8_t*) this->c_str() + m_lastUtf8Pos;
3061 while (i < utf8Pos)
3062 {
3063 if (i >= length())
3064 break;
3065 utf8_t ch = *p;
3066 if (!(ch & 0x80))
3067 i++, p++; // ASCII
3068 else
3069 {
3070 int32_t len = utf8Lengths[ch & 0x3F];
3071 i += len;
3072 p += len;
3073 }
3074 pos++;
3075 }
3076 m_lastPos = pos;
3077 m_lastUtf8Pos = utf8Pos;
3078 return pos;
3079 }
3080
3081} // end namespace avmplus