avmplus::XMLParser Class Reference

#include <XMLParser16.h>

List of all members.

Public Types

enum  {
  kNoError = 0, kEndOfDocument = -1, kUnterminatedCDataSection = -2, kUnterminatedXMLDeclaration = -3,
  kUnterminatedDocTypeDeclaration = -4, kUnterminatedComment = -5, kMalformedElement = -6, kOutOfMemory = -7,
  kUnterminatedAttributeValue = -8, kUnterminatedElement = -9, kElementNeverBegun = -10, kUnterminatedProcessingInstruction = -11
}

Public Member Functions

 XMLParser (AvmCore *core)
 ~XMLParser ()
void parse (Stringp source, bool ignoreWhite=false)
int getNext (XMLTag &tag)
bool getCondenseWhite () const
void setCondenseWhite (bool flag)

Public Attributes

AvmCorecore

Private Member Functions

Stringp unescape (Stringp buffer, const wchar *start, int len, bool bIntern)
void condenseWhitespace (Stringp text)

Private Attributes

Stringp m_source
const wcharm_ptr
bool m_ignoreWhite
bool m_condenseWhite


Detailed Description

XMLParser is a XML parser which takes 16-bit wide characters as input. The parser operates in "pull" fashion, returning a single tag or text node on each call to the GetNext method.

This XML parser is used to support E4X in AVM+.

Definition at line 101 of file XMLParser16.h.


Member Enumeration Documentation

anonymous enum
 

Enumerator:
kNoError 
kEndOfDocument 
kUnterminatedCDataSection 
kUnterminatedXMLDeclaration 
kUnterminatedDocTypeDeclaration 
kUnterminatedComment 
kMalformedElement 
kOutOfMemory 
kUnterminatedAttributeValue 
kUnterminatedElement 
kElementNeverBegun 
kUnterminatedProcessingInstruction 

Definition at line 119 of file XMLParser16.h.

00119              {
00120             kNoError                           = 0,
00121             kEndOfDocument                     = -1,
00122             kUnterminatedCDataSection          = -2,
00123             kUnterminatedXMLDeclaration        = -3,
00124             kUnterminatedDocTypeDeclaration    = -4,
00125             kUnterminatedComment               = -5,
00126             kMalformedElement                  = -6,
00127             kOutOfMemory                       = -7,
00128             kUnterminatedAttributeValue        = -8,
00129             kUnterminatedElement               = -9,
00130             kElementNeverBegun                 = -10,
00131             kUnterminatedProcessingInstruction = -11
00132         };


Constructor & Destructor Documentation

avmplus::XMLParser::XMLParser AvmCore core  ) 
 

Definition at line 468 of file XMLParser16.cpp.

References avmplus::Hashtable::add(), avmplus::AvmCore::constant(), core, MMgc::GCRoot::GetGC(), avmplus::AvmCore::intToAtom(), and avmplus::AvmCore::xmlEntities.

00469     {
00470         this->core = core;
00471 
00472         if (!core->xmlEntities)
00473         {
00474             // Lazy creation of the XML entities table.
00475             core->xmlEntities = new (core->GetGC()) Hashtable(core->GetGC());
00476 
00477             const char *entities = "&amp\0\"quot\0'apos\0<lt\0>gt\0\xA0nbsp\0";
00478         
00479             while (*entities)
00480             {
00481                 core->xmlEntities->add(core->constant(entities+1),
00482                                (void*)core->intToAtom(*entities));
00483                 while (*entities++) {
00484                     // do nothing
00485                 }
00486             }
00487         }
00488     }

avmplus::XMLParser::~XMLParser  )  [inline]
 

Definition at line 105 of file XMLParser16.h.

References core, m_condenseWhite, m_ignoreWhite, m_ptr, m_source, and NULL.

00106         {
00107             core = NULL;
00108             m_source = NULL;
00109             m_ptr = NULL;
00110             m_ignoreWhite = false;
00111             m_condenseWhite = false;
00112         }


Member Function Documentation

void avmplus::XMLParser::condenseWhitespace Stringp  text  )  [private]
 

Definition at line 73 of file XMLParser16.cpp.

References AvmAssert, avmplus::String::isInterned(), avmplus::String::isSpace(), avmplus::String::length(), avmplus::String::lockBuffer(), and avmplus::String::unlockBuffer().

Referenced by getNext().

00074     {
00075         AvmAssert (!text->isInterned());
00076         wchar *str = text->lockBuffer();
00077         int len = text->length();
00078 
00079         wchar *dst = str;
00080         wchar *src = str;
00081         bool leadingWhite = true;
00082         wchar *lastChar = 0;
00083 
00084         while (len--) {
00085             if (String::isSpace(*src)) {
00086                 if (!leadingWhite) {
00087                     *dst++ = *src;
00088                 }
00089                 src++;
00090             } else {
00091                 leadingWhite = false; // first non-space char, no more 
00092                 lastChar = dst;
00093                 *dst++ = *src++;
00094             }
00095         }
00096 
00097         if (lastChar)
00098             lastChar[1] = 0;
00099 
00100         *dst = 0;
00101 
00102         text->unlockBuffer((int)((lastChar ? (lastChar + 1) : dst)-str));
00103     }

bool avmplus::XMLParser::getCondenseWhite  )  const [inline]
 

Definition at line 136 of file XMLParser16.h.

References m_condenseWhite.

00136 { return m_condenseWhite; }

int avmplus::XMLParser::getNext XMLTag tag  ) 
 

Definition at line 105 of file XMLParser16.cpp.

References condenseWhitespace(), core, MMgc::GCRoot::GetGC(), avmplus::String::isSpace(), avmplus::XMLTag::kCDataSection, avmplus::XMLTag::kComment, avmplus::XMLTag::kDocTypeDeclaration, avmplus::XMLTag::kElementType, kEndOfDocument, kMalformedElement, kNoError, avmplus::XMLTag::kProcessingInstruction, avmplus::XMLTag::kTextNodeType, kUnterminatedCDataSection, kUnterminatedComment, kUnterminatedDocTypeDeclaration, kUnterminatedProcessingInstruction, kUnterminatedXMLDeclaration, avmplus::XMLTag::kXMLDeclaration, m_condenseWhite, m_ignoreWhite, m_ptr, m_source, avmplus::XMLTag::nodeType, NULL, avmplus::XMLTag::reset(), avmplus::stripPrefix(), avmplus::XMLTag::text, and unescape().

Referenced by avmplus::XMLObject::XMLObject().

00106     {
00107         tag.reset();
00108 
00109         // If there's nothing left, exit.
00110         if (!m_ptr || !*m_ptr) {
00111             return XMLParser::kEndOfDocument;
00112         }
00113 
00114         // R41
00115         // If the ignore whitespace flag is on, don't produce
00116         // all-whitespace text nodes.
00117         if (m_ignoreWhite) {
00118             const wchar *ptr = m_ptr;
00119             while (String::isSpace(*ptr)) {
00120                 ptr++;
00121             }
00122             if (*ptr == '<' || !*ptr) {
00123                 // If we reached the end of the document,
00124                 // or we reached a tag, skip all the
00125                 // whitesapce, because it would turn into
00126                 // an empty text node.
00127                 m_ptr = ptr;
00128             }
00129             // If there's nothing left, exit.
00130             // But only do it for Flash 6 because we want
00131             // to exactly preserve Flash 5 behavior.
00132             if (!*m_ptr) {
00133                 return XMLParser::kEndOfDocument;
00134             }
00135         }
00136         // end R41
00137 
00138         // If it starts with <, it's an XML element.
00139         // If it doesn't, it must be a text element.
00140         if (*m_ptr != '<') {
00141             // Treat it as text.  Scan up to the next < or until EOF.
00142             const wchar *start = m_ptr;
00143             while (*m_ptr && *m_ptr != '<') {
00144                 m_ptr++;
00145             }
00146             tag.text = unescape(m_source, start, (int)(m_ptr-start), false);
00147 
00148             // Condense whitespace if desired
00149             if (m_ignoreWhite && m_condenseWhite) {
00150                 condenseWhitespace(tag.text);
00151             }
00152 
00153             tag.nodeType = XMLTag::kTextNodeType;
00154             return XMLParser::kNoError;
00155         }
00156 
00157         // Is this a <?xml> declaration?
00158         wchar *temp;
00159         if ((temp = stripPrefix(m_ptr, "<?xml ")) != NULL) {
00160             // Scan forward for "?>"
00161             const wchar *start = m_ptr;
00162             m_ptr = temp;
00163             while (*m_ptr) {
00164                 if (m_ptr[0] == '?' && m_ptr[1] == '>') 
00165                 {
00166                     // We have the end of the XML declaration
00167                     // !!@ changed to not return <?...?> parts
00168                     tag.text = new (core->GetGC()) String(start + 2, (int)(m_ptr - start - 2));
00169                     m_ptr += 2;
00170                     tag.nodeType = XMLTag::kXMLDeclaration;
00171                     return XMLParser::kNoError;
00172                 }
00173                 else
00174                 {
00175                     m_ptr++;
00176                 }
00177             }
00178             return XMLParser::kUnterminatedXMLDeclaration;
00179         }
00180 
00181         // Is this a <!DOCTYPE> declaration?
00182         if ((temp = stripPrefix(m_ptr, "<!DOCTYPE")) != NULL) {
00183             // Scan forward for '>'.
00184             const wchar *start = m_ptr;
00185             m_ptr = temp;
00186             int depth = 0;
00187             while (*m_ptr) {
00188                 if (*m_ptr == '<') {
00189                     depth++;
00190                 }
00191                 if (*m_ptr == '>') {
00192                     if (!depth) {
00193                         // We've reached the end of the DOCTYPE.
00194                         m_ptr++;
00195                         tag.text = new (core->GetGC()) String(start, (int)(m_ptr-start));
00196                         tag.nodeType = XMLTag::kDocTypeDeclaration;
00197                         return XMLParser::kNoError;
00198                     }
00199                     depth--;
00200                 }
00201                 m_ptr++;
00202             }
00203             return XMLParser::kUnterminatedDocTypeDeclaration;
00204         }
00205 
00206         // Is this a CDATA section?
00207         wchar *cdata;
00208         if ((cdata = stripPrefix(m_ptr, "<![CDATA[")) != NULL) {
00209             // Scan forward for "]]>"
00210             m_ptr = cdata;
00211             while (*m_ptr) {
00212                 if (m_ptr[0] == ']' && m_ptr[1] == ']' && m_ptr[2] == '>') {
00213                     // We have the end of the CDATA section.
00214                     tag.text = new (core->GetGC()) String(cdata, (int)(m_ptr-cdata));
00215                     tag.nodeType = XMLTag::kCDataSection;
00216                     m_ptr += 3;
00217                     return XMLParser::kNoError;
00218                 }
00219                 m_ptr++;
00220             }
00221             return XMLParser::kUnterminatedCDataSection;
00222         }
00223 
00224         // Is this a processing instruction?
00225         wchar *pi;
00226         if ((pi = stripPrefix(m_ptr, "<?")) != NULL) {
00227             // Scan forward for "?>"
00228             m_ptr = pi;
00229             while (*m_ptr) {
00230                 if (m_ptr[0] == '?' && m_ptr[1] == '>') {
00231                     // We have the end of the processing instruction.
00232                     tag.text = new (core->GetGC()) String(pi, (int)(m_ptr - pi));
00233                     tag.nodeType = XMLTag::kProcessingInstruction;
00234                     m_ptr += 2;
00235                     return XMLParser::kNoError;
00236                 }
00237                 m_ptr++;
00238             }
00239             return XMLParser::kUnterminatedProcessingInstruction;
00240         }
00241 
00242         // Advance past the "<"
00243         m_ptr++;
00244 
00245         // Is this a comment?  Return a comment tag->
00246         const wchar *comment;
00247         if (m_ptr[0] == '!' && m_ptr[1] == '-' && m_ptr[2] == '-') {
00248             // Skip up to '-->'.
00249             m_ptr += 3;
00250             comment = m_ptr;
00251             while (*m_ptr) {
00252                 if (m_ptr[0] == '-' && m_ptr[1] == '-' && m_ptr[2] == '>') 
00253                 {
00254                     tag.text = new (core->GetGC()) String(comment, (int)(m_ptr-comment));
00255                     tag.nodeType = XMLTag::kComment;
00256                     m_ptr += 3;
00257                     return XMLParser::kNoError;
00258                 }
00259                 m_ptr++;
00260             }
00261             // Got to the end of the buffer without finding a new tag->
00262             return XMLParser::kUnterminatedComment;
00263         }
00264 
00265 
00266         // Extract the tag name.  Scan up to ">" or whitespace.
00267         const wchar *tagStart = m_ptr;
00268         while (!String::isSpace(*m_ptr) && *m_ptr != '>') {
00269             if (*m_ptr == '/' && *(m_ptr+1) == '>') {
00270                 // Found close of an empty element.
00271                 // Exit!
00272                 break;
00273             }
00274             if (!*m_ptr) {
00275                 // Premature end!
00276                 return XMLParser::kMalformedElement;
00277             }
00278             m_ptr++;
00279         }
00280 
00281         // Give up if tag name is empty
00282         if (m_ptr == tagStart) {
00283             return XMLParser::kMalformedElement;
00284         }
00285 
00286         tag.text = unescape(m_source, tagStart, (int)(m_ptr-tagStart), true);
00287 
00288         tag.nodeType = XMLTag::kElementType;
00289 
00290         // Extract attributes.
00291         for (;;) {
00292             if (!*m_ptr) {
00293                 // Premature end!
00294                 return XMLParser::kMalformedElement;
00295             }
00296 
00297             // Skip any whitespace.
00298             while (String::isSpace(*m_ptr)) {
00299                 m_ptr++;
00300             }
00301 
00302             if (*m_ptr == '>') {
00303                 break;
00304             }
00305 
00306             if (*m_ptr == '/' && *(m_ptr+1) == '>') {
00307                 // Found close of an empty element.
00308                 // Exit!
00309                 tag.empty = true;
00310                 m_ptr++;
00311                 break;
00312             }
00313 
00314             // Extract the attribute name.
00315             const wchar *nameStart = m_ptr;
00316             while (!String::isSpace(*m_ptr) && *m_ptr != '=' && *m_ptr != '>') {
00317                 if (!*m_ptr) {
00318                     // Premature end!
00319                     return XMLParser::kMalformedElement;
00320                 }
00321                 m_ptr++;
00322             }
00323             if (m_ptr == nameStart) {
00324                 // Empty attribute name?
00325                 return XMLParser::kMalformedElement;
00326             }
00327 
00328             Stringp attributeName = unescape(m_source, nameStart, (int)(m_ptr-nameStart), true);
00329 
00330             while (String::isSpace(*m_ptr)) {
00331                 m_ptr++;
00332             }
00333             if (*m_ptr != '=') {
00334                 // No '=' sign, no attribute value, error!
00335                 return XMLParser::kMalformedElement;
00336             } else {
00337                 // Skip over whitespace.
00338                 while (String::isSpace(*++m_ptr))
00339                     ;
00340                 const wchar *attrStart = m_ptr;
00341                 // Extract the attribute value.
00342                 if (*m_ptr != '"' && *m_ptr != '\'') {
00343                     // Error; no opening quote for attribute value.
00344                     return XMLParser::kMalformedElement;
00345                 }
00346                 wchar delimiter = *m_ptr;
00347                 // Extract up to the next quote.
00348                 attrStart++;
00349                 while (*++m_ptr != delimiter) {
00350                     if (*m_ptr == '<') {
00351                         // '<' is not permitted in an attribute value
00352                         // Changed this from kMalformedElement to kUnterminatedAttributeValue for bug 117058(105422)
00353                         return XMLParser::kUnterminatedAttributeValue;
00354                     }
00355                     if (!*m_ptr) {
00356                         // If at end of file, 
00357                         // we have an unterminated attribute value on our hands.
00358                         return XMLParser::kUnterminatedAttributeValue;
00359                     }
00360                 }
00361                 const wchar *attrEnd = m_ptr;
00362                 m_ptr++;
00363 
00364                 Stringp attributeValue = unescape(m_source, attrStart, (int)(attrEnd-attrStart), false);
00365 
00366                 AvmAssert (attributeName->isInterned());
00367                 tag.attributes.add(attributeName);
00368                 tag.attributes.add(attributeValue);
00369             }
00370         }
00371 
00372         // Advance past the end > of this element.
00373         if (*m_ptr == '>') {
00374             m_ptr++;
00375         }
00376 
00377         return XMLParser::kNoError;
00378     }

void avmplus::XMLParser::parse Stringp  source,
bool  ignoreWhite = false
 

Definition at line 490 of file XMLParser16.cpp.

References avmplus::String::c_str(), m_ignoreWhite, and m_source.

Referenced by avmplus::XMLObject::XMLObject().

00492     {
00493         m_source = source;
00494         m_ptr = m_source->c_str();
00495         m_ignoreWhite = ignoreWhite;
00496     }

void avmplus::XMLParser::setCondenseWhite bool  flag  )  [inline]
 

Definition at line 137 of file XMLParser16.h.

References m_condenseWhite.

Referenced by avmplus::XMLObject::XMLObject().

00137 { m_condenseWhite = flag; }

Stringp avmplus::XMLParser::unescape Stringp  buffer,
const wchar start,
int  len,
bool  bIntern
[private]
 

Definition at line 380 of file XMLParser16.cpp.

References util::threadpool::i.

Referenced by getNext().

00381     {
00382         bool bUseSubString = true;
00383         for (int i = 0; i < len; i++)
00384         {
00385             if (startChar[i] == '&')
00386             {
00387                 bUseSubString = false;
00388                 break;
00389             }
00390         }
00391 
00392         if (bUseSubString)
00393         {
00394             if (bIntern)
00395             {
00396                 return core->internAlloc (startChar, len);
00397             }
00398             else
00399             {
00400                 MMgc::GC* gc = MMgc::GC::GetGC(text);
00401                 int start = (int)(startChar - text->c_str());
00402                 AvmAssert (start < text->length());
00403                 return new (gc) String (text, start, len);
00404             }
00405         }
00406 
00407         MMgc::GC* gc = MMgc::GC::GetGC(text);
00408         Stringp news = new (gc) String (startChar, len);
00409         wchar *buffer = news->lockBuffer();
00410     
00411         // Remove XML &#xx; escape entities, and &lt; &gt; &amp; &apos;
00412         wchar *dst = buffer;
00413         wchar *src = buffer;
00414 
00415         while (*src) {
00416             if (*src == '&') {
00417                 bool success = false;
00418                 // Scan forward to the ';'
00419                 wchar *endPtr = src;
00420                 while (*endPtr && *endPtr != ';') {
00421                     endPtr++;
00422                 }
00423                 if (*endPtr) {
00424                     *endPtr = 0;
00425                     int len = (int)(endPtr-src-1);
00426 
00427                     if (*(src+1) == '#') {
00428                         // Parse a &#xx; decimal sequence.  Or a &#xDD hex sequence
00429                         double value = MathUtils::parseInt(src+2, len-1);
00430                         if (MathUtils::isNaN(value)) {
00431                             if (len > 2 && src[2] == 'x') {
00432                                 // Handle xFF hex encoded tags, too             
00433                                 value = MathUtils::parseInt(src+3, len-2, 16);
00434                             }
00435                         }
00436                         if (!MathUtils::isNaN(value)) {
00437                             *dst++ = (wchar) (int) value;
00438                             success = true;
00439                         }
00440                     } else if (len <= 4) // Our xmlEntities are only 4 characters or less
00441                     {
00442                         Atom entityAtom = core->internAlloc(src+1, len)->atom();
00443                         Atom result = core->xmlEntities->get(entityAtom);
00444                         if (result != undefinedAtom) {
00445                             *dst++ = (wchar)(result>>3);
00446                             success = true;
00447                         }
00448                     }
00449                     *endPtr = ';';
00450                 }
00451                 if (success) {
00452                     // If successful, advance past the sequence
00453                     src = endPtr+1;
00454                 } else {
00455                     // Otherwise copy the sequence literally
00456                     *dst++ = *src++;
00457                 }
00458             } else {
00459                 *dst++ = *src++;
00460             }
00461         }
00462         *dst = 0;
00463 
00464         news->unlockBuffer((int)(dst-buffer));
00465         return (bIntern) ? core->internString (news) : news;
00466     }


Member Data Documentation

AvmCore* avmplus::XMLParser::core
 

Definition at line 134 of file XMLParser16.h.

Referenced by getNext(), XMLParser(), and ~XMLParser().

bool avmplus::XMLParser::m_condenseWhite [private]
 

Definition at line 146 of file XMLParser16.h.

Referenced by getCondenseWhite(), getNext(), setCondenseWhite(), and ~XMLParser().

bool avmplus::XMLParser::m_ignoreWhite [private]
 

Definition at line 145 of file XMLParser16.h.

Referenced by getNext(), parse(), and ~XMLParser().

const wchar* avmplus::XMLParser::m_ptr [private]
 

Definition at line 143 of file XMLParser16.h.

Referenced by getNext(), and ~XMLParser().

Stringp avmplus::XMLParser::m_source [private]
 

Definition at line 142 of file XMLParser16.h.

Referenced by getNext(), parse(), and ~XMLParser().


The documentation for this class was generated from the following files:
Generated on Sun Oct 12 18:51:26 2008 for Tamarin by  doxygen 1.4.6