#include <XMLParser16.h>
Public Types | |
| enum | { kNoError = 0, kEndOfDocument = -1, kUnterminatedCDataSection = -2, kUnterminatedXMLDeclaration = -3, kUnterminatedDocTypeDeclaration = -4, kUnterminatedComment = -5, kMalformedElement = -6, kOutOfMemory = -7, kUnterminatedAttributeValue = -8, kUnterminatedElement = -9, kElementNeverBegun = -10, kUnterminatedProcessingInstruction = -11 } |
Public Member Functions | |
| XMLParser (AvmCore *core) | |
| ~XMLParser () | |
| void | parse (Stringp source, bool ignoreWhite=false) |
| int | getNext (XMLTag &tag) |
| bool | getCondenseWhite () const |
| void | setCondenseWhite (bool flag) |
Public Attributes | |
| AvmCore * | core |
Private Member Functions | |
| Stringp | unescape (Stringp buffer, const wchar *start, int len, bool bIntern) |
| void | condenseWhitespace (Stringp text) |
Private Attributes | |
| Stringp | m_source |
| const wchar * | m_ptr |
| bool | m_ignoreWhite |
| bool | m_condenseWhite |
This XML parser is used to support E4X in AVM+.
Definition at line 101 of file XMLParser16.h.
|
|
Definition at line 119 of file XMLParser16.h. 00119 { 00120 kNoError = 0, 00121 kEndOfDocument = -1, 00122 kUnterminatedCDataSection = -2, 00123 kUnterminatedXMLDeclaration = -3, 00124 kUnterminatedDocTypeDeclaration = -4, 00125 kUnterminatedComment = -5, 00126 kMalformedElement = -6, 00127 kOutOfMemory = -7, 00128 kUnterminatedAttributeValue = -8, 00129 kUnterminatedElement = -9, 00130 kElementNeverBegun = -10, 00131 kUnterminatedProcessingInstruction = -11 00132 };
|
|
|
Definition at line 468 of file XMLParser16.cpp. References avmplus::Hashtable::add(), avmplus::AvmCore::constant(), core, MMgc::GCRoot::GetGC(), avmplus::AvmCore::intToAtom(), and avmplus::AvmCore::xmlEntities. 00469 { 00470 this->core = core; 00471 00472 if (!core->xmlEntities) 00473 { 00474 // Lazy creation of the XML entities table. 00475 core->xmlEntities = new (core->GetGC()) Hashtable(core->GetGC()); 00476 00477 const char *entities = "&\0\"quot\0'apos\0<lt\0>gt\0\xA0nbsp\0"; 00478 00479 while (*entities) 00480 { 00481 core->xmlEntities->add(core->constant(entities+1), 00482 (void*)core->intToAtom(*entities)); 00483 while (*entities++) { 00484 // do nothing 00485 } 00486 } 00487 } 00488 }
|
|
|
Definition at line 105 of file XMLParser16.h. References core, m_condenseWhite, m_ignoreWhite, m_ptr, m_source, and NULL. 00106 { 00107 core = NULL; 00108 m_source = NULL; 00109 m_ptr = NULL; 00110 m_ignoreWhite = false; 00111 m_condenseWhite = false; 00112 }
|
|
|
Definition at line 73 of file XMLParser16.cpp. References AvmAssert, avmplus::String::isInterned(), avmplus::String::isSpace(), avmplus::String::length(), avmplus::String::lockBuffer(), and avmplus::String::unlockBuffer(). Referenced by getNext(). 00074 { 00075 AvmAssert (!text->isInterned()); 00076 wchar *str = text->lockBuffer(); 00077 int len = text->length(); 00078 00079 wchar *dst = str; 00080 wchar *src = str; 00081 bool leadingWhite = true; 00082 wchar *lastChar = 0; 00083 00084 while (len--) { 00085 if (String::isSpace(*src)) { 00086 if (!leadingWhite) { 00087 *dst++ = *src; 00088 } 00089 src++; 00090 } else { 00091 leadingWhite = false; // first non-space char, no more 00092 lastChar = dst; 00093 *dst++ = *src++; 00094 } 00095 } 00096 00097 if (lastChar) 00098 lastChar[1] = 0; 00099 00100 *dst = 0; 00101 00102 text->unlockBuffer((int)((lastChar ? (lastChar + 1) : dst)-str)); 00103 }
|
|
|
Definition at line 136 of file XMLParser16.h. References m_condenseWhite. 00136 { return m_condenseWhite; }
|
|
|
Definition at line 105 of file XMLParser16.cpp. References condenseWhitespace(), core, MMgc::GCRoot::GetGC(), avmplus::String::isSpace(), avmplus::XMLTag::kCDataSection, avmplus::XMLTag::kComment, avmplus::XMLTag::kDocTypeDeclaration, avmplus::XMLTag::kElementType, kEndOfDocument, kMalformedElement, kNoError, avmplus::XMLTag::kProcessingInstruction, avmplus::XMLTag::kTextNodeType, kUnterminatedCDataSection, kUnterminatedComment, kUnterminatedDocTypeDeclaration, kUnterminatedProcessingInstruction, kUnterminatedXMLDeclaration, avmplus::XMLTag::kXMLDeclaration, m_condenseWhite, m_ignoreWhite, m_ptr, m_source, avmplus::XMLTag::nodeType, NULL, avmplus::XMLTag::reset(), avmplus::stripPrefix(), avmplus::XMLTag::text, and unescape(). Referenced by avmplus::XMLObject::XMLObject(). 00106 { 00107 tag.reset(); 00108 00109 // If there's nothing left, exit. 00110 if (!m_ptr || !*m_ptr) { 00111 return XMLParser::kEndOfDocument; 00112 } 00113 00114 // R41 00115 // If the ignore whitespace flag is on, don't produce 00116 // all-whitespace text nodes. 00117 if (m_ignoreWhite) { 00118 const wchar *ptr = m_ptr; 00119 while (String::isSpace(*ptr)) { 00120 ptr++; 00121 } 00122 if (*ptr == '<' || !*ptr) { 00123 // If we reached the end of the document, 00124 // or we reached a tag, skip all the 00125 // whitesapce, because it would turn into 00126 // an empty text node. 00127 m_ptr = ptr; 00128 } 00129 // If there's nothing left, exit. 00130 // But only do it for Flash 6 because we want 00131 // to exactly preserve Flash 5 behavior. 00132 if (!*m_ptr) { 00133 return XMLParser::kEndOfDocument; 00134 } 00135 } 00136 // end R41 00137 00138 // If it starts with <, it's an XML element. 00139 // If it doesn't, it must be a text element. 00140 if (*m_ptr != '<') { 00141 // Treat it as text. Scan up to the next < or until EOF. 00142 const wchar *start = m_ptr; 00143 while (*m_ptr && *m_ptr != '<') { 00144 m_ptr++; 00145 } 00146 tag.text = unescape(m_source, start, (int)(m_ptr-start), false); 00147 00148 // Condense whitespace if desired 00149 if (m_ignoreWhite && m_condenseWhite) { 00150 condenseWhitespace(tag.text); 00151 } 00152 00153 tag.nodeType = XMLTag::kTextNodeType; 00154 return XMLParser::kNoError; 00155 } 00156 00157 // Is this a <?xml> declaration? 00158 wchar *temp; 00159 if ((temp = stripPrefix(m_ptr, "<?xml ")) != NULL) { 00160 // Scan forward for "?>" 00161 const wchar *start = m_ptr; 00162 m_ptr = temp; 00163 while (*m_ptr) { 00164 if (m_ptr[0] == '?' && m_ptr[1] == '>') 00165 { 00166 // We have the end of the XML declaration 00167 // !!@ changed to not return <?...?> parts 00168 tag.text = new (core->GetGC()) String(start + 2, (int)(m_ptr - start - 2)); 00169 m_ptr += 2; 00170 tag.nodeType = XMLTag::kXMLDeclaration; 00171 return XMLParser::kNoError; 00172 } 00173 else 00174 { 00175 m_ptr++; 00176 } 00177 } 00178 return XMLParser::kUnterminatedXMLDeclaration; 00179 } 00180 00181 // Is this a <!DOCTYPE> declaration? 00182 if ((temp = stripPrefix(m_ptr, "<!DOCTYPE")) != NULL) { 00183 // Scan forward for '>'. 00184 const wchar *start = m_ptr; 00185 m_ptr = temp; 00186 int depth = 0; 00187 while (*m_ptr) { 00188 if (*m_ptr == '<') { 00189 depth++; 00190 } 00191 if (*m_ptr == '>') { 00192 if (!depth) { 00193 // We've reached the end of the DOCTYPE. 00194 m_ptr++; 00195 tag.text = new (core->GetGC()) String(start, (int)(m_ptr-start)); 00196 tag.nodeType = XMLTag::kDocTypeDeclaration; 00197 return XMLParser::kNoError; 00198 } 00199 depth--; 00200 } 00201 m_ptr++; 00202 } 00203 return XMLParser::kUnterminatedDocTypeDeclaration; 00204 } 00205 00206 // Is this a CDATA section? 00207 wchar *cdata; 00208 if ((cdata = stripPrefix(m_ptr, "<![CDATA[")) != NULL) { 00209 // Scan forward for "]]>" 00210 m_ptr = cdata; 00211 while (*m_ptr) { 00212 if (m_ptr[0] == ']' && m_ptr[1] == ']' && m_ptr[2] == '>') { 00213 // We have the end of the CDATA section. 00214 tag.text = new (core->GetGC()) String(cdata, (int)(m_ptr-cdata)); 00215 tag.nodeType = XMLTag::kCDataSection; 00216 m_ptr += 3; 00217 return XMLParser::kNoError; 00218 } 00219 m_ptr++; 00220 } 00221 return XMLParser::kUnterminatedCDataSection; 00222 } 00223 00224 // Is this a processing instruction? 00225 wchar *pi; 00226 if ((pi = stripPrefix(m_ptr, "<?")) != NULL) { 00227 // Scan forward for "?>" 00228 m_ptr = pi; 00229 while (*m_ptr) { 00230 if (m_ptr[0] == '?' && m_ptr[1] == '>') { 00231 // We have the end of the processing instruction. 00232 tag.text = new (core->GetGC()) String(pi, (int)(m_ptr - pi)); 00233 tag.nodeType = XMLTag::kProcessingInstruction; 00234 m_ptr += 2; 00235 return XMLParser::kNoError; 00236 } 00237 m_ptr++; 00238 } 00239 return XMLParser::kUnterminatedProcessingInstruction; 00240 } 00241 00242 // Advance past the "<" 00243 m_ptr++; 00244 00245 // Is this a comment? Return a comment tag-> 00246 const wchar *comment; 00247 if (m_ptr[0] == '!' && m_ptr[1] == '-' && m_ptr[2] == '-') { 00248 // Skip up to '-->'. 00249 m_ptr += 3; 00250 comment = m_ptr; 00251 while (*m_ptr) { 00252 if (m_ptr[0] == '-' && m_ptr[1] == '-' && m_ptr[2] == '>') 00253 { 00254 tag.text = new (core->GetGC()) String(comment, (int)(m_ptr-comment)); 00255 tag.nodeType = XMLTag::kComment; 00256 m_ptr += 3; 00257 return XMLParser::kNoError; 00258 } 00259 m_ptr++; 00260 } 00261 // Got to the end of the buffer without finding a new tag-> 00262 return XMLParser::kUnterminatedComment; 00263 } 00264 00265 00266 // Extract the tag name. Scan up to ">" or whitespace. 00267 const wchar *tagStart = m_ptr; 00268 while (!String::isSpace(*m_ptr) && *m_ptr != '>') { 00269 if (*m_ptr == '/' && *(m_ptr+1) == '>') { 00270 // Found close of an empty element. 00271 // Exit! 00272 break; 00273 } 00274 if (!*m_ptr) { 00275 // Premature end! 00276 return XMLParser::kMalformedElement; 00277 } 00278 m_ptr++; 00279 } 00280 00281 // Give up if tag name is empty 00282 if (m_ptr == tagStart) { 00283 return XMLParser::kMalformedElement; 00284 } 00285 00286 tag.text = unescape(m_source, tagStart, (int)(m_ptr-tagStart), true); 00287 00288 tag.nodeType = XMLTag::kElementType; 00289 00290 // Extract attributes. 00291 for (;;) { 00292 if (!*m_ptr) { 00293 // Premature end! 00294 return XMLParser::kMalformedElement; 00295 } 00296 00297 // Skip any whitespace. 00298 while (String::isSpace(*m_ptr)) { 00299 m_ptr++; 00300 } 00301 00302 if (*m_ptr == '>') { 00303 break; 00304 } 00305 00306 if (*m_ptr == '/' && *(m_ptr+1) == '>') { 00307 // Found close of an empty element. 00308 // Exit! 00309 tag.empty = true; 00310 m_ptr++; 00311 break; 00312 } 00313 00314 // Extract the attribute name. 00315 const wchar *nameStart = m_ptr; 00316 while (!String::isSpace(*m_ptr) && *m_ptr != '=' && *m_ptr != '>') { 00317 if (!*m_ptr) { 00318 // Premature end! 00319 return XMLParser::kMalformedElement; 00320 } 00321 m_ptr++; 00322 } 00323 if (m_ptr == nameStart) { 00324 // Empty attribute name? 00325 return XMLParser::kMalformedElement; 00326 } 00327 00328 Stringp attributeName = unescape(m_source, nameStart, (int)(m_ptr-nameStart), true); 00329 00330 while (String::isSpace(*m_ptr)) { 00331 m_ptr++; 00332 } 00333 if (*m_ptr != '=') { 00334 // No '=' sign, no attribute value, error! 00335 return XMLParser::kMalformedElement; 00336 } else { 00337 // Skip over whitespace. 00338 while (String::isSpace(*++m_ptr)) 00339 ; 00340 const wchar *attrStart = m_ptr; 00341 // Extract the attribute value. 00342 if (*m_ptr != '"' && *m_ptr != '\'') { 00343 // Error; no opening quote for attribute value. 00344 return XMLParser::kMalformedElement; 00345 } 00346 wchar delimiter = *m_ptr; 00347 // Extract up to the next quote. 00348 attrStart++; 00349 while (*++m_ptr != delimiter) { 00350 if (*m_ptr == '<') { 00351 // '<' is not permitted in an attribute value 00352 // Changed this from kMalformedElement to kUnterminatedAttributeValue for bug 117058(105422) 00353 return XMLParser::kUnterminatedAttributeValue; 00354 } 00355 if (!*m_ptr) { 00356 // If at end of file, 00357 // we have an unterminated attribute value on our hands. 00358 return XMLParser::kUnterminatedAttributeValue; 00359 } 00360 } 00361 const wchar *attrEnd = m_ptr; 00362 m_ptr++; 00363 00364 Stringp attributeValue = unescape(m_source, attrStart, (int)(attrEnd-attrStart), false); 00365 00366 AvmAssert (attributeName->isInterned()); 00367 tag.attributes.add(attributeName); 00368 tag.attributes.add(attributeValue); 00369 } 00370 } 00371 00372 // Advance past the end > of this element. 00373 if (*m_ptr == '>') { 00374 m_ptr++; 00375 } 00376 00377 return XMLParser::kNoError; 00378 }
|
|
||||||||||||
|
Definition at line 490 of file XMLParser16.cpp. References avmplus::String::c_str(), m_ignoreWhite, and m_source. Referenced by avmplus::XMLObject::XMLObject(). 00492 { 00493 m_source = source; 00494 m_ptr = m_source->c_str(); 00495 m_ignoreWhite = ignoreWhite; 00496 }
|
|
|
Definition at line 137 of file XMLParser16.h. References m_condenseWhite. Referenced by avmplus::XMLObject::XMLObject(). 00137 { m_condenseWhite = flag; }
|
|
||||||||||||||||||||
|
Definition at line 380 of file XMLParser16.cpp. References util::threadpool::i. Referenced by getNext(). 00381 { 00382 bool bUseSubString = true; 00383 for (int i = 0; i < len; i++) 00384 { 00385 if (startChar[i] == '&') 00386 { 00387 bUseSubString = false; 00388 break; 00389 } 00390 } 00391 00392 if (bUseSubString) 00393 { 00394 if (bIntern) 00395 { 00396 return core->internAlloc (startChar, len); 00397 } 00398 else 00399 { 00400 MMgc::GC* gc = MMgc::GC::GetGC(text); 00401 int start = (int)(startChar - text->c_str()); 00402 AvmAssert (start < text->length()); 00403 return new (gc) String (text, start, len); 00404 } 00405 } 00406 00407 MMgc::GC* gc = MMgc::GC::GetGC(text); 00408 Stringp news = new (gc) String (startChar, len); 00409 wchar *buffer = news->lockBuffer(); 00410 00411 // Remove XML &#xx; escape entities, and < > & ' 00412 wchar *dst = buffer; 00413 wchar *src = buffer; 00414 00415 while (*src) { 00416 if (*src == '&') { 00417 bool success = false; 00418 // Scan forward to the ';' 00419 wchar *endPtr = src; 00420 while (*endPtr && *endPtr != ';') { 00421 endPtr++; 00422 } 00423 if (*endPtr) { 00424 *endPtr = 0; 00425 int len = (int)(endPtr-src-1); 00426 00427 if (*(src+1) == '#') { 00428 // Parse a &#xx; decimal sequence. Or a Ý hex sequence 00429 double value = MathUtils::parseInt(src+2, len-1); 00430 if (MathUtils::isNaN(value)) { 00431 if (len > 2 && src[2] == 'x') { 00432 // Handle xFF hex encoded tags, too 00433 value = MathUtils::parseInt(src+3, len-2, 16); 00434 } 00435 } 00436 if (!MathUtils::isNaN(value)) { 00437 *dst++ = (wchar) (int) value; 00438 success = true; 00439 } 00440 } else if (len <= 4) // Our xmlEntities are only 4 characters or less 00441 { 00442 Atom entityAtom = core->internAlloc(src+1, len)->atom(); 00443 Atom result = core->xmlEntities->get(entityAtom); 00444 if (result != undefinedAtom) { 00445 *dst++ = (wchar)(result>>3); 00446 success = true; 00447 } 00448 } 00449 *endPtr = ';'; 00450 } 00451 if (success) { 00452 // If successful, advance past the sequence 00453 src = endPtr+1; 00454 } else { 00455 // Otherwise copy the sequence literally 00456 *dst++ = *src++; 00457 } 00458 } else { 00459 *dst++ = *src++; 00460 } 00461 } 00462 *dst = 0; 00463 00464 news->unlockBuffer((int)(dst-buffer)); 00465 return (bIntern) ? core->internString (news) : news; 00466 }
|
|
|
Definition at line 134 of file XMLParser16.h. Referenced by getNext(), XMLParser(), and ~XMLParser(). |
|
|
Definition at line 146 of file XMLParser16.h. Referenced by getCondenseWhite(), getNext(), setCondenseWhite(), and ~XMLParser(). |
|
|
Definition at line 145 of file XMLParser16.h. Referenced by getNext(), parse(), and ~XMLParser(). |
|
|
Definition at line 143 of file XMLParser16.h. Referenced by getNext(), and ~XMLParser(). |
|
|
Definition at line 142 of file XMLParser16.h. Referenced by getNext(), parse(), and ~XMLParser(). |
1.4.6